Merge branch 'plotInHistory' into 'develop'

History sensitive plots See merge request !317

Merge branch 'plotInHistory' into 'develop'
History sensitive plots See merge request !317
dc8532f5 · Peter Lünenschloß · 26b52fb6 · dc13d638 · dc8532f5 · dc8532f5
Commit dc8532f5 authored 3 years ago by Peter Lünenschloß
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,4 @@ PyWavelets==1.1.1
 scikit-learn==1.0
 scipy==1.7.1
 typing_extensions==3.10.0.2
+seaborn==0.11.2
--- a/saqc/core/modules/tools.py
+++ b/saqc/core/modules/tools.py
@@ -6,6 +6,7 @@ from typing import Optional
 from typing_extensions import Literal

 import saqc
+import numpy as np
 from saqc.lib.types import FreqString


@@ -37,10 +38,12 @@ class Tools:
        path: Optional[str] = None,
        max_gap: Optional[FreqString] = None,
        stats: bool = False,
-        plot_kwargs: Optional[dict] = None,
-        fig_kwargs: Optional[dict] = None,
+        history: Optional[Literal["valid", "complete"]] = "valid",
+        xscope: Optional[slice] = None,
+        phaseplot: Optional[str] = None,
        stats_dict: Optional[dict] = None,
        store_kwargs: Optional[dict] = None,
+        to_mask: Optional[float] = np.inf,
        **kwargs,
    ) -> saqc.SaQC:
        return self._defer("plot", locals())
--- a/saqc/funcs/tools.py
+++ b/saqc/funcs/tools.py
@@ -251,18 +251,20 @@ def plot(
    path: Optional[str] = None,
    max_gap: Optional[FreqString] = None,
    stats: bool = False,
-    plot_kwargs: Optional[dict] = None,
-    fig_kwargs: Optional[dict] = None,
+    history: Optional[Literal["valid", "complete", "clear"]] = "valid",
+    xscope: Optional[slice] = None,
+    phaseplot: Optional[str] = None,
    stats_dict: Optional[dict] = None,
    store_kwargs: Optional[dict] = None,
+    to_mask: Optional[float] = np.inf,
    **kwargs,
 ):
    """
    Stores or shows a figure object, containing data graph with flag marks for field.

-    There are two modes, 'interactive' and 'store' mode, wich is determind via the
+    There are two modes, 'interactive' and 'store', which are determind through the
    ``save_path`` keyword. In interactive mode (default) the plot is shown at runtime
-    and the execution stops until the plot window is closed manually by a user. In
+    and the program execution stops until the plot window is closed manually. In
    store mode the generated plot is stored to disk and no manually interaction is
    needed.

@@ -286,31 +288,26 @@ def plot(
    max_gap : str, default None
        If None, all the points in the data will be connected, resulting in long linear
        lines, where continous chunks of data is missing. Nans in the data get dropped
-        before plotting. If an Offset string is passed, only points that have a distance
+        before plotting. If an offset string is passed, only points that have a distance
        below `max_gap` get connected via the plotting line.

    stats : bool, default False
        Whether to include statistics table in plot.

-    plot_kwargs : dict, default None
-        Keyword arguments controlling plot generation. Will be passed on to the
-        ``Matplotlib.axes.Axes.set()`` property batch setter for the axes showing the
-        data plot. The most relevant of those properties might be "ylabel", "title" and
-        "ylim". In Addition, following options are available:
-
-        * {'slice': s} property, that determines a chunk of the data to be plotted /
-            processed. `s` can be anything, that is a valid argument to the
-            ``pandas.Series.__getitem__`` method.
-        * {'history': str}
-            * str="all": All the flags are plotted with colored dots, refering to the
-                tests they originate from
-            * str="valid": - same as 'all' - but only plots those flags, that are not
-                removed by later tests
-
-    fig_kwargs : dict, default None
-        Keyword arguments controlling figure generation. In interactive mode,
-        ``None`` defaults to ``{"figsize": (16, 9)}`` to ensure a proper figure size
-        in store-mode.
+    history : {"valid", "complete", None}, default "valid"
+        Discriminate the plotted flags with respect to the tests they originate from.
+        * "valid" - Only plot those flags, that do not get altered or "unflagged" by subsequent tests. Only list tests
+          in the legend, that actually contributed flags to the overall resault.
+        * "complete" - plot all the flags set and list all the tests ran on a variable. Suitable for debugging/tracking.
+        * "clear" - clear plot from all the flagged values
+        * None - just plot the resulting flags for one variable, without any historical meta information.
+
+    xscope : slice or Offset, default None
+        Parameter, that determines a chunk of the data to be plotted /
+        processed. `xscope` can be anything, that is a valid argument to the ``pandas.Series.__getitem__`` method.
+
+    phaseplot : str or None, default None
+        If a string is passed, plot ``field`` in the phase space it forms together with the Variable ``phaseplot``.

    store_kwargs : dict, default {}
        Keywords to be passed on to the ``matplotlib.pyplot.savefig`` method, handling
@@ -349,6 +346,11 @@ def plot(
    >>> func = lambda x, y, z: round((x.isna().sum()) / len(x), 2)
    """
    interactive = path is None
+    level = kwargs.get("flag", BAD)
+
+    if to_mask < np.inf:
+        data = data.copy()
+        data.loc[flags[field] >= to_mask, field] = np.nan

    if store_kwargs is None:
        store_kwargs = {}
@@ -358,19 +360,17 @@ def plot(

    else:
        mpl.use("Agg")
-        # ensure a proper size in stored plot
-        if fig_kwargs is None:
-            fig_kwargs = {"figsize": (16, 9)}

    fig = makeFig(
        data=data,
        field=field,
        flags=flags,
-        level=kwargs.get("flag", BAD),
+        level=level,
        max_gap=max_gap,
        stats=stats,
-        plot_kwargs=plot_kwargs,
-        fig_kwargs=fig_kwargs,
+        history=history,
+        xscope=xscope,
+        phaseplot=phaseplot,
        stats_dict=stats_dict,
    )


--- a/saqc/lib/plotting.py
+++ b/saqc/lib/plotting.py
@@ -2,10 +2,14 @@
 # -*- coding: utf-8 -*-

 from typing import Optional
-
+from typing_extensions import Literal
+from saqc.lib.tools import toSequence
 import pandas as pd
+import numpy as np
 import matplotlib as mpl
-
+import itertools
+import matplotlib.pyplot as plt
+import seaborn
 from saqc.constants import *
 from saqc.core import Flags
 from saqc.lib.types import DiosLikeT, FreqString
@@ -19,6 +23,18 @@ STATSDICT = {
    "flagged percentage": lambda x, y, z: round(((y >= z).sum()) / len(x), 2),
 }

+PLOT_KWARGS = {"alpha": 0.8, "linewidth": 1}
+AX_KWARGS = {}
+FIG_KWARGS = {"figsize": (16, 9)}
+SCATTER_KWARGS = {
+    "marker": ["s", "D", "^", "o"],
+    "color": seaborn.color_palette("bright"),
+    "alpha": 0.7,
+    "zorder": 10,
+    "edgecolors": "black",
+    "s": 70,
+}
+

 def makeFig(
    data: DiosLikeT,
@@ -27,8 +43,9 @@ def makeFig(
    level: float,
    max_gap: Optional[FreqString] = None,
    stats: bool = False,
-    plot_kwargs: Optional[dict] = None,
-    fig_kwargs: Optional[dict] = None,
+    history: Optional[Literal["valid", "complete"]] = "valid",
+    xscope: Optional[slice] = None,
+    phaseplot: Optional[str] = None,
    stats_dict: Optional[dict] = None,
 ):
    """
@@ -57,24 +74,19 @@ def makeFig(
    stats : bool, default False
        Whether to include statistics table in plot.

-    plot_kwargs : dict, default None
-        Keyword arguments controlling plot generation. Will be passed on to the
-        ``Matplotlib.axes.Axes.set()`` property batch setter for the axes showing the
-        data plot. The most relevant of those properties might be "ylabel",
-        "title" and "ylim".
-        In Addition, following options are available:
-
-        * {'slice': s} property, that determines a chunk of the data to be plotted /
-            processed. `s` can be anything,
-            that is a valid argument to the ``pandas.Series.__getitem__`` method.
-        * {'history': str}
-            * str="all": All the flags are plotted with colored dots, refering to the
-                tests they originate from
-            * str="valid": - same as 'all' - but only plots those flags, that are not
-                removed by later tests
-    fig_kwargs : dict, default None
-        Keyword arguments controlling figure generation. None defaults to
-        {"figsize": (16, 9)}
+    history : {"valid", "complete", None}, default "valid"
+        Discriminate the plotted flags with respect to the tests they originate from.
+        * "valid" - Only plot those flags, that do not get altered or "unflagged" by subsequent tests. Only list tests
+          in the legend, that actually contributed flags to the overall resault.
+        * "complete" - plot all the flags set and list all the tests ran on a variable. Suitable for debugging/tracking.
+        * "clear" - clear plot from all the flagged values
+        * None - just plot the resulting flags for one variable, without any historical meta information.
+
+    xscope : slice or Offset, default None
+        Parameter, that determines a chunk of the data to be plotted /
+        processed. `s` can be anything, that is a valid argument to the ``pandas.Series.__getitem__`` method.
+
+    phaseplot :

    stats_dict: dict, default None
        (Only relevant if `stats`=True).
@@ -111,33 +123,45 @@ def makeFig(

    >>> func = lambda x, y, z: round((x.isna().sum()) / len(x), 2)
    """
-    if plot_kwargs is None:
-        plot_kwargs = {"history": False}
-    if fig_kwargs is None:
-        fig_kwargs = {}
+
    if stats_dict is None:
        stats_dict = {}

    # data retrieval
    d = data[field]
    # data slicing:
-    s = plot_kwargs.pop("slice", slice(None))
-    d = d[s]
-    flags_vals = flags[field][s]
-    flags_hist = flags.history[field].hist.loc[s]
+    xscope = xscope or slice(xscope)
+    d = d[xscope]
+    flags_vals = flags[field][xscope]
+    flags_hist = flags.history[field].hist.loc[xscope]
+    flags_meta = flags.history[field].meta
    if stats:
        stats_dict.update(STATSDICT)
        stats_dict = _evalStatsDict(stats_dict, d, flags_vals, level)

    na_mask = d.isna()
    d = d[~na_mask]
+    if phaseplot:
+        flags_vals = flags_vals.copy()
+        flags_hist = flags_hist.copy()
+        phase_index = data[phaseplot][xscope].values
+        phase_index_d = phase_index[~na_mask]
+        na_mask.index = phase_index
+        d.index = phase_index_d
+        flags_vals.index = phase_index
+        flags_hist.index = phase_index
+        plot_kwargs = {**PLOT_KWARGS, **{"marker": "o", "linewidth": 0}}
+        ax_kwargs = {**{"xlabel": phaseplot, "ylabel": d.name}, **AX_KWARGS}
+    else:
+        plot_kwargs = PLOT_KWARGS
+        ax_kwargs = AX_KWARGS

    # insert nans between values mutually spaced > max_gap
-    if max_gap:
+    if max_gap and not d.empty:
        d = _insertBlockingNaNs(d, max_gap)

    # figure composition
-    fig = mpl.pyplot.figure(constrained_layout=True, **fig_kwargs)
+    fig = mpl.pyplot.figure(constrained_layout=True, **FIG_KWARGS)
    grid = fig.add_gridspec()
    if stats:
        plot_gs, tab_gs = grid[0].subgridspec(ncols=2, nrows=1, width_ratios=[5, 1])
@@ -147,7 +171,19 @@ def makeFig(
    else:
        ax = fig.add_subplot(grid[0])

-    _plotVarWithFlags(ax, d, flags_vals, flags_hist, level, plot_kwargs, na_mask)
+    _plotVarWithFlags(
+        ax,
+        d,
+        flags_vals,
+        flags_hist,
+        flags_meta,
+        history,
+        level,
+        na_mask,
+        plot_kwargs,
+        ax_kwargs,
+        SCATTER_KWARGS,
+    )
    return fig


@@ -173,34 +209,75 @@ def _plotStatsTable(ax, stats_dict):
    tab_obj.set_fontsize(10)


-def _plotVarWithFlags(ax, datser, flags_vals, flags_hist, level, plot_kwargs, na_mask):
+def _plotVarWithFlags(
+    ax,
+    datser,
+    flags_vals,
+    flags_hist,
+    flags_meta,
+    history,
+    level,
+    na_mask,
+    plot_kwargs,
+    ax_kwargs,
+    scatter_kwargs,
+):
+    scatter_kwargs = scatter_kwargs.copy()
    ax.set_title(datser.name)
-    ax.plot(datser)
-    history = plot_kwargs.pop("history", False)
-    ax.set(**plot_kwargs)
+    ax.plot(datser, color="black", **plot_kwargs)
+    ax.set(**ax_kwargs)
+    shape_cycle = scatter_kwargs.get("marker", "o")
+    shape_cycle = itertools.cycle(toSequence(shape_cycle))
+    color_cycle = scatter_kwargs.get(
+        "color", plt.rcParams["axes.prop_cycle"].by_key()["color"]
+    )
+    color_cycle = itertools.cycle(toSequence(color_cycle))
    if history:
        for i in flags_hist.columns:
-            if history == "all":
-                _plotFlags(
-                    ax,
-                    datser,
-                    flags_hist[i],
-                    na_mask,
-                    level,
-                    {"label": "test " + str(i)},
+            # catch empty but existing history case (flags_meta={})
+            if len(flags_meta[i]) == 0:
+                continue
+            label = (
+                flags_meta[i].get("label", None) or flags_meta[i]["func"].split(".")[-1]
+            )
+            scatter_kwargs.update({"label": label})
+            flags_i = flags_hist[i].astype(float)
+            if history == "complete":
+                scatter_kwargs.update(
+                    {"color": next(color_cycle), "marker": next(shape_cycle)}
                )
+                _plotFlags(ax, datser, flags_i, na_mask, level, scatter_kwargs)
            if history == "valid":
+                # only plot those flags, that do not get altered later on:
+                mask = flags_i.eq(flags_vals)
+                flags_i[~mask] = np.nan
+                # Skip plot, if the test did not have no effect on the all over flagging result. This avoids
+                # legend overflow
+                if ~(flags_i >= level).any():
+                    continue
+
+                # Also skip plot, if all flagged values are np.nans (to catch flag missing and masked results mainly)
+                temp_i = datser.index.join(flags_i.index, how="inner")
+                if datser[temp_i][flags_i[temp_i].notna()].isna().all() or (
+                    "flagMissing" in flags_meta[i]["func"]
+                ):
+                    continue
+
+                scatter_kwargs.update(
+                    {"color": next(color_cycle), "marker": next(shape_cycle)}
+                )
                _plotFlags(
                    ax,
                    datser,
-                    flags_hist[i].combine(flags_vals, min),
+                    flags_i,
                    na_mask,
                    level,
-                    {"label": "test " + str(i)},
+                    scatter_kwargs,
                )
        ax.legend()
    else:
-        _plotFlags(ax, datser, flags_vals, na_mask, level, {"color": "r"})
+        scatter_kwargs.update({"color": next(color_cycle), "marker": next(shape_cycle)})
+        _plotFlags(ax, datser, flags_vals, na_mask, level, scatter_kwargs)


 def _plotFlags(ax, datser, flags, na_mask, level, scatter_kwargs):

--- a/tests/funcs/test_tools.py
+++ b/tests/funcs/test_tools.py
@@ -27,10 +27,8 @@ def test_makeFig():
    dummy_path = ""

    d_saqc = d_saqc.plot(field="data", path="")
+    d_saqc = d_saqc.plot(field="data", path=dummy_path, history="valid", stats=True)
+    d_saqc = d_saqc.plot(field="data", path=dummy_path, history="complete")
    d_saqc = d_saqc.plot(
-        field="data", path=dummy_path, plot_kwargs={"history": "valid"}, stats=True
-    )
-    d_saqc = d_saqc.plot(field="data", path=dummy_path, plot_kwargs={"history": "all"})
-    d_saqc = d_saqc.plot(
-        field="data", path=dummy_path, plot_kwargs={"slice": "2000-10"}, stats=True
+        field="data", path=dummy_path, ax_kwargs={"ylim": "2000-10"}, stats=True
    )