Skip to content
Snippets Groups Projects
Commit dc8532f5 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

Merge branch 'plotInHistory' into 'develop'

History sensitive plots

See merge request !317
parents 26b52fb6 dc13d638
No related branches found
No related tags found
2 merge requests!370Release 2.0,!317History sensitive plots
Pipeline #50865 passed with stage
in 1 minute and 18 seconds
......@@ -14,3 +14,4 @@ PyWavelets==1.1.1
scikit-learn==1.0
scipy==1.7.1
typing_extensions==3.10.0.2
seaborn==0.11.2
......@@ -6,6 +6,7 @@ from typing import Optional
from typing_extensions import Literal
import saqc
import numpy as np
from saqc.lib.types import FreqString
......@@ -37,10 +38,12 @@ class Tools:
path: Optional[str] = None,
max_gap: Optional[FreqString] = None,
stats: bool = False,
plot_kwargs: Optional[dict] = None,
fig_kwargs: Optional[dict] = None,
history: Optional[Literal["valid", "complete"]] = "valid",
xscope: Optional[slice] = None,
phaseplot: Optional[str] = None,
stats_dict: Optional[dict] = None,
store_kwargs: Optional[dict] = None,
to_mask: Optional[float] = np.inf,
**kwargs,
) -> saqc.SaQC:
return self._defer("plot", locals())
......@@ -251,18 +251,20 @@ def plot(
path: Optional[str] = None,
max_gap: Optional[FreqString] = None,
stats: bool = False,
plot_kwargs: Optional[dict] = None,
fig_kwargs: Optional[dict] = None,
history: Optional[Literal["valid", "complete", "clear"]] = "valid",
xscope: Optional[slice] = None,
phaseplot: Optional[str] = None,
stats_dict: Optional[dict] = None,
store_kwargs: Optional[dict] = None,
to_mask: Optional[float] = np.inf,
**kwargs,
):
"""
Stores or shows a figure object, containing data graph with flag marks for field.
There are two modes, 'interactive' and 'store' mode, wich is determind via the
There are two modes, 'interactive' and 'store', which are determind through the
``save_path`` keyword. In interactive mode (default) the plot is shown at runtime
and the execution stops until the plot window is closed manually by a user. In
and the program execution stops until the plot window is closed manually. In
store mode the generated plot is stored to disk and no manually interaction is
needed.
......@@ -286,31 +288,26 @@ def plot(
max_gap : str, default None
If None, all the points in the data will be connected, resulting in long linear
lines, where continous chunks of data is missing. Nans in the data get dropped
before plotting. If an Offset string is passed, only points that have a distance
before plotting. If an offset string is passed, only points that have a distance
below `max_gap` get connected via the plotting line.
stats : bool, default False
Whether to include statistics table in plot.
plot_kwargs : dict, default None
Keyword arguments controlling plot generation. Will be passed on to the
``Matplotlib.axes.Axes.set()`` property batch setter for the axes showing the
data plot. The most relevant of those properties might be "ylabel", "title" and
"ylim". In Addition, following options are available:
* {'slice': s} property, that determines a chunk of the data to be plotted /
processed. `s` can be anything, that is a valid argument to the
``pandas.Series.__getitem__`` method.
* {'history': str}
* str="all": All the flags are plotted with colored dots, refering to the
tests they originate from
* str="valid": - same as 'all' - but only plots those flags, that are not
removed by later tests
fig_kwargs : dict, default None
Keyword arguments controlling figure generation. In interactive mode,
``None`` defaults to ``{"figsize": (16, 9)}`` to ensure a proper figure size
in store-mode.
history : {"valid", "complete", None}, default "valid"
Discriminate the plotted flags with respect to the tests they originate from.
* "valid" - Only plot those flags, that do not get altered or "unflagged" by subsequent tests. Only list tests
in the legend, that actually contributed flags to the overall resault.
* "complete" - plot all the flags set and list all the tests ran on a variable. Suitable for debugging/tracking.
* "clear" - clear plot from all the flagged values
* None - just plot the resulting flags for one variable, without any historical meta information.
xscope : slice or Offset, default None
Parameter, that determines a chunk of the data to be plotted /
processed. `xscope` can be anything, that is a valid argument to the ``pandas.Series.__getitem__`` method.
phaseplot : str or None, default None
If a string is passed, plot ``field`` in the phase space it forms together with the Variable ``phaseplot``.
store_kwargs : dict, default {}
Keywords to be passed on to the ``matplotlib.pyplot.savefig`` method, handling
......@@ -349,6 +346,11 @@ def plot(
>>> func = lambda x, y, z: round((x.isna().sum()) / len(x), 2)
"""
interactive = path is None
level = kwargs.get("flag", BAD)
if to_mask < np.inf:
data = data.copy()
data.loc[flags[field] >= to_mask, field] = np.nan
if store_kwargs is None:
store_kwargs = {}
......@@ -358,19 +360,17 @@ def plot(
else:
mpl.use("Agg")
# ensure a proper size in stored plot
if fig_kwargs is None:
fig_kwargs = {"figsize": (16, 9)}
fig = makeFig(
data=data,
field=field,
flags=flags,
level=kwargs.get("flag", BAD),
level=level,
max_gap=max_gap,
stats=stats,
plot_kwargs=plot_kwargs,
fig_kwargs=fig_kwargs,
history=history,
xscope=xscope,
phaseplot=phaseplot,
stats_dict=stats_dict,
)
......
......@@ -2,10 +2,14 @@
# -*- coding: utf-8 -*-
from typing import Optional
from typing_extensions import Literal
from saqc.lib.tools import toSequence
import pandas as pd
import numpy as np
import matplotlib as mpl
import itertools
import matplotlib.pyplot as plt
import seaborn
from saqc.constants import *
from saqc.core import Flags
from saqc.lib.types import DiosLikeT, FreqString
......@@ -19,6 +23,18 @@ STATSDICT = {
"flagged percentage": lambda x, y, z: round(((y >= z).sum()) / len(x), 2),
}
PLOT_KWARGS = {"alpha": 0.8, "linewidth": 1}
AX_KWARGS = {}
FIG_KWARGS = {"figsize": (16, 9)}
SCATTER_KWARGS = {
"marker": ["s", "D", "^", "o"],
"color": seaborn.color_palette("bright"),
"alpha": 0.7,
"zorder": 10,
"edgecolors": "black",
"s": 70,
}
def makeFig(
data: DiosLikeT,
......@@ -27,8 +43,9 @@ def makeFig(
level: float,
max_gap: Optional[FreqString] = None,
stats: bool = False,
plot_kwargs: Optional[dict] = None,
fig_kwargs: Optional[dict] = None,
history: Optional[Literal["valid", "complete"]] = "valid",
xscope: Optional[slice] = None,
phaseplot: Optional[str] = None,
stats_dict: Optional[dict] = None,
):
"""
......@@ -57,24 +74,19 @@ def makeFig(
stats : bool, default False
Whether to include statistics table in plot.
plot_kwargs : dict, default None
Keyword arguments controlling plot generation. Will be passed on to the
``Matplotlib.axes.Axes.set()`` property batch setter for the axes showing the
data plot. The most relevant of those properties might be "ylabel",
"title" and "ylim".
In Addition, following options are available:
* {'slice': s} property, that determines a chunk of the data to be plotted /
processed. `s` can be anything,
that is a valid argument to the ``pandas.Series.__getitem__`` method.
* {'history': str}
* str="all": All the flags are plotted with colored dots, refering to the
tests they originate from
* str="valid": - same as 'all' - but only plots those flags, that are not
removed by later tests
fig_kwargs : dict, default None
Keyword arguments controlling figure generation. None defaults to
{"figsize": (16, 9)}
history : {"valid", "complete", None}, default "valid"
Discriminate the plotted flags with respect to the tests they originate from.
* "valid" - Only plot those flags, that do not get altered or "unflagged" by subsequent tests. Only list tests
in the legend, that actually contributed flags to the overall resault.
* "complete" - plot all the flags set and list all the tests ran on a variable. Suitable for debugging/tracking.
* "clear" - clear plot from all the flagged values
* None - just plot the resulting flags for one variable, without any historical meta information.
xscope : slice or Offset, default None
Parameter, that determines a chunk of the data to be plotted /
processed. `s` can be anything, that is a valid argument to the ``pandas.Series.__getitem__`` method.
phaseplot :
stats_dict: dict, default None
(Only relevant if `stats`=True).
......@@ -111,33 +123,45 @@ def makeFig(
>>> func = lambda x, y, z: round((x.isna().sum()) / len(x), 2)
"""
if plot_kwargs is None:
plot_kwargs = {"history": False}
if fig_kwargs is None:
fig_kwargs = {}
if stats_dict is None:
stats_dict = {}
# data retrieval
d = data[field]
# data slicing:
s = plot_kwargs.pop("slice", slice(None))
d = d[s]
flags_vals = flags[field][s]
flags_hist = flags.history[field].hist.loc[s]
xscope = xscope or slice(xscope)
d = d[xscope]
flags_vals = flags[field][xscope]
flags_hist = flags.history[field].hist.loc[xscope]
flags_meta = flags.history[field].meta
if stats:
stats_dict.update(STATSDICT)
stats_dict = _evalStatsDict(stats_dict, d, flags_vals, level)
na_mask = d.isna()
d = d[~na_mask]
if phaseplot:
flags_vals = flags_vals.copy()
flags_hist = flags_hist.copy()
phase_index = data[phaseplot][xscope].values
phase_index_d = phase_index[~na_mask]
na_mask.index = phase_index
d.index = phase_index_d
flags_vals.index = phase_index
flags_hist.index = phase_index
plot_kwargs = {**PLOT_KWARGS, **{"marker": "o", "linewidth": 0}}
ax_kwargs = {**{"xlabel": phaseplot, "ylabel": d.name}, **AX_KWARGS}
else:
plot_kwargs = PLOT_KWARGS
ax_kwargs = AX_KWARGS
# insert nans between values mutually spaced > max_gap
if max_gap:
if max_gap and not d.empty:
d = _insertBlockingNaNs(d, max_gap)
# figure composition
fig = mpl.pyplot.figure(constrained_layout=True, **fig_kwargs)
fig = mpl.pyplot.figure(constrained_layout=True, **FIG_KWARGS)
grid = fig.add_gridspec()
if stats:
plot_gs, tab_gs = grid[0].subgridspec(ncols=2, nrows=1, width_ratios=[5, 1])
......@@ -147,7 +171,19 @@ def makeFig(
else:
ax = fig.add_subplot(grid[0])
_plotVarWithFlags(ax, d, flags_vals, flags_hist, level, plot_kwargs, na_mask)
_plotVarWithFlags(
ax,
d,
flags_vals,
flags_hist,
flags_meta,
history,
level,
na_mask,
plot_kwargs,
ax_kwargs,
SCATTER_KWARGS,
)
return fig
......@@ -173,34 +209,75 @@ def _plotStatsTable(ax, stats_dict):
tab_obj.set_fontsize(10)
def _plotVarWithFlags(ax, datser, flags_vals, flags_hist, level, plot_kwargs, na_mask):
def _plotVarWithFlags(
ax,
datser,
flags_vals,
flags_hist,
flags_meta,
history,
level,
na_mask,
plot_kwargs,
ax_kwargs,
scatter_kwargs,
):
scatter_kwargs = scatter_kwargs.copy()
ax.set_title(datser.name)
ax.plot(datser)
history = plot_kwargs.pop("history", False)
ax.set(**plot_kwargs)
ax.plot(datser, color="black", **plot_kwargs)
ax.set(**ax_kwargs)
shape_cycle = scatter_kwargs.get("marker", "o")
shape_cycle = itertools.cycle(toSequence(shape_cycle))
color_cycle = scatter_kwargs.get(
"color", plt.rcParams["axes.prop_cycle"].by_key()["color"]
)
color_cycle = itertools.cycle(toSequence(color_cycle))
if history:
for i in flags_hist.columns:
if history == "all":
_plotFlags(
ax,
datser,
flags_hist[i],
na_mask,
level,
{"label": "test " + str(i)},
# catch empty but existing history case (flags_meta={})
if len(flags_meta[i]) == 0:
continue
label = (
flags_meta[i].get("label", None) or flags_meta[i]["func"].split(".")[-1]
)
scatter_kwargs.update({"label": label})
flags_i = flags_hist[i].astype(float)
if history == "complete":
scatter_kwargs.update(
{"color": next(color_cycle), "marker": next(shape_cycle)}
)
_plotFlags(ax, datser, flags_i, na_mask, level, scatter_kwargs)
if history == "valid":
# only plot those flags, that do not get altered later on:
mask = flags_i.eq(flags_vals)
flags_i[~mask] = np.nan
# Skip plot, if the test did not have no effect on the all over flagging result. This avoids
# legend overflow
if ~(flags_i >= level).any():
continue
# Also skip plot, if all flagged values are np.nans (to catch flag missing and masked results mainly)
temp_i = datser.index.join(flags_i.index, how="inner")
if datser[temp_i][flags_i[temp_i].notna()].isna().all() or (
"flagMissing" in flags_meta[i]["func"]
):
continue
scatter_kwargs.update(
{"color": next(color_cycle), "marker": next(shape_cycle)}
)
_plotFlags(
ax,
datser,
flags_hist[i].combine(flags_vals, min),
flags_i,
na_mask,
level,
{"label": "test " + str(i)},
scatter_kwargs,
)
ax.legend()
else:
_plotFlags(ax, datser, flags_vals, na_mask, level, {"color": "r"})
scatter_kwargs.update({"color": next(color_cycle), "marker": next(shape_cycle)})
_plotFlags(ax, datser, flags_vals, na_mask, level, scatter_kwargs)
def _plotFlags(ax, datser, flags, na_mask, level, scatter_kwargs):
......
......@@ -27,10 +27,8 @@ def test_makeFig():
dummy_path = ""
d_saqc = d_saqc.plot(field="data", path="")
d_saqc = d_saqc.plot(field="data", path=dummy_path, history="valid", stats=True)
d_saqc = d_saqc.plot(field="data", path=dummy_path, history="complete")
d_saqc = d_saqc.plot(
field="data", path=dummy_path, plot_kwargs={"history": "valid"}, stats=True
)
d_saqc = d_saqc.plot(field="data", path=dummy_path, plot_kwargs={"history": "all"})
d_saqc = d_saqc.plot(
field="data", path=dummy_path, plot_kwargs={"slice": "2000-10"}, stats=True
field="data", path=dummy_path, ax_kwargs={"ylim": "2000-10"}, stats=True
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment