diff --git a/saqc/core/core.py b/saqc/core/core.py index c21782f8c110a13693936bed10d08c906423bbc8..6c8d33dc77ea2ee84bd35b3cc03ae098514c6eaf 100644 --- a/saqc/core/core.py +++ b/saqc/core/core.py @@ -21,6 +21,8 @@ def _collectVariables(meta, data): for idx, configrow in meta.iterrows(): varname = configrow[Fields.VARNAME] assign = configrow[Fields.ASSIGN] + if varname in flags: + continue if varname in data: flags.append(varname) elif varname not in flags and assign is True: @@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra meta = config[config.columns.difference(tests.columns)] # # prepapre the flags - # varnames = collectVariables(meta, data) - # fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames)) - # flags = fresh if flags is None else flags.join(fresh) - if flags is None: - flag_cols = _collectVariables(meta, data) - flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols)) - else: - flagger = flagger.initFlags(flags=flags) + varnames = _collectVariables(meta, data) + fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames)) + flagger = fresh if flags is None else flags._flags.join(fresh._flags) + # if flags is None: + # flag_cols = _collectVariables(meta, data) + # flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols)) + # else: + # flagger = flagger.initFlags(flags=flags) # this checks comes late, but the compiling of the user-test need fully prepared flags checkConfig(config, data, flagger, nodata) @@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra continue flagger = flagger.setFlagger(flagger_chunk_result) - # plotHook(dchunk, fchunk, ffchunk, varname, configrow[Fields.PLOT], flag_test, flagger) - # plotAllHook(data, flags, flagger) + + plotHook(dchunk, flagger_chunk, flagger_chunk_result, varname, configrow[Fields.PLOT], flag_test) + + plotAllHook(data, flagger) return data, flagger diff --git a/saqc/lib/plotting.py b/saqc/lib/plotting.py index ed394d4e58515a5cec3b608cc5a1dc13e4b7efae..3ea8dea32475651bcbfaf9f12347c873b55b404e 100644 --- a/saqc/lib/plotting.py +++ b/saqc/lib/plotting.py @@ -2,37 +2,43 @@ # -*- coding: utf-8 -*- # TODO: use the logging module +import logging +import pandas as pd +import numpy as np from warnings import warn __plotvars = [] +_colors = dict(unflagged='silver', good='seagreen', bad='firebrick', suspicious='gold') -def plotAllHook(data, flags, flagger): + +def plotAllHook(data, flagger): if len(__plotvars) > 1: - _plot(data, flags, True, __plotvars, flagger) + _plot(data, flagger, True, __plotvars) -def plotHook(data, old, new, varname, do_plot, flag_test, flagger): +def plotHook(data, old, new, varname, do_plot, flag_test, plot_nans=True): + # old/new: flagger if do_plot: __plotvars.append(varname) # cannot use getFlags here, because if a flag was set (e.g. with force) the # flag may be the same, but any additional row (e.g. comment-field) would differ - mask = (old[varname] == new[varname]).any(axis=1) - _plot(data, new, mask, varname, flagger, title=flag_test) + mask = (old._flags[varname] != new._flags[varname]).any(axis=1) + _plot(data, new, mask, varname, title=flag_test, plot_nans=plot_nans) def _plot( data, - flags, + flagger, flagmask, varname, - flagger, interactive_backend=True, title="Data Plot", - show_nans=True, + plot_nans=True, ): + # todo: try catch warn (once) return # only import if plotting is requested by the user import matplotlib as mpl @@ -47,22 +53,19 @@ def _plot( # needed for datetime conversion from pandas.plotting import register_matplotlib_converters - register_matplotlib_converters() if not isinstance(varname, (list, set)): varname = [varname] varname = set(varname) - # filter out variables to which no data is associated + # filter out variables to which no data is associated (e.g. freshly assigned vars) tmp = [] for var in varname: if var in data.columns: tmp.append(var) else: - warn( - f"Cannot plot column '{var}' that is not present in data.", UserWarning - ) + logging.warning(f"Cannot plot column '{var}', because it is not present in data.") if not tmp: return varname = tmp @@ -72,62 +75,84 @@ def _plot( fig, axes = plt.subplots(plots, 1, sharex=True) axes[0].set_title(title) for i, v in enumerate(varname): - _plotQflags(data, v, flagger, flagmask, axes[i], show_nans) + _plotByQualtyFlag(data, v, flagger, flagmask, axes[i], plot_nans) else: fig, ax = plt.subplots() plt.title(title) - _plotQflags(data, varname.pop(), flagger, flagmask, ax, show_nans) + _plotByQualtyFlag(data, varname.pop(), flagger, flagmask, ax, plot_nans) - plt.xlabel("time") # dummy plot for the label `missing` see plot_vline for more info plt.plot([], [], ":", color="silver", label="missing data") + + plt.xlabel("time") plt.legend() + if interactive_backend: plt.show() -def _plotQflags(data, varname, flagger, flagmask, ax, show_nans): +def _plotByQualtyFlag(data, varname, flagger, flagmask, ax, plot_nans): ax.set_ylabel(varname) x = data.index y = data[varname] - ax.plot(x, y, "-", markersize=1, color="silver") - # plot all data in silver (NaNs as vertical lines) + # base plot: show all(!) data ax.plot(x, y, "-", color="silver", label="data") - flagged = flagger.isFlagged(varname) - if show_nans: - nans = y.isna() - idx = y.index[nans & ~flagged] - _plotVline(ax, idx, color="silver") - - # plot all data (and nans) that are already flagged in black - ax.plot(x[flagged], y[flagged], ".", color="black", label="flagged by other test") - if show_nans: - idx = y.index[nans & flagged & ~flagmask] - _plotVline(ax, idx, color="black") - - -# # plot flags in the color corresponding to the flag -# # BAD red, GOOD green, all in between aka SUSPISIOUS in yellow -# <<<<<<< HEAD -# for i, f in enumerate(flagger.categories): -# if i == 0: -# continue -# flagged = flagger.isFlagged(varname, flag=f, comparator='==') & flagmask -# ======= -# bads = flagger.isFlagged(flags, varname, flag=flagger.BAD, comparator='==') & flagmask -# good = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='==') & flagmask -# susp = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='>') & flagmask & ~bads -# flaglist = [flagger.GOOD, flagger.BAD, 'Suspicious'] -# for f, flagged in zip(flaglist, [good, bads, susp]): -# >>>>>>> master -# label = f"flag: {f}" -# color = _getColor(f, flagger) -# ax.plot(x[flagged], y[flagged], '.', color=color, label=label) -# if show_nans: -# idx = y.index[nans & flagged] -# _plotVline(ax, idx, color=color) + + # ANY OLD FLAG + # plot all(!) data that are already flagged in black + flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>=') + oldflags = flagged & ~flagmask + ax.plot(x[oldflags], y[oldflags], ".", color="black", label="flagged by other test") + if plot_nans: + _plot_nans(y[oldflags], 'black', ax) + + # now we just want to show data that was flagged + if flagmask is not True: + x = x[flagmask] + y = y[flagmask] + flagger = flagger.getFlagger(varname, flagmask) + + if x.empty: + return + + suspicious = pd.Series(data=np.ones(len(y), dtype=bool), index=y.index) + # flag by categories + + # plot UNFLAGGED (only nans are needed) + flag, color = flagger.UNFLAGGED, _colors['unflagged'] + flagged = flagger.isFlagged(varname, flag=flag, comparator='==') + ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}") + if plot_nans: + _plot_nans(y[flagged], color, ax) + + # plot GOOD + flag, color = flagger.GOOD, _colors['good'] + flagged = flagger.isFlagged(varname, flag=flag, comparator='==') + ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}") + if plot_nans: + _plot_nans(y[flagged], color, ax) + + # plot BAD + flag, color = flagger.BAD, _colors['bad'] + flagged = flagger.isFlagged(varname, flag=flag, comparator='==') + ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}") + if plot_nans: + _plot_nans(y[flagged], color, ax) + + # plot SUSPICIOS + color = _colors['suspicious'] + flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>') + flagged &= flagger.isFlagged(varname, flag=flagger.BAD, comparator='<') + ax.plot(x[flagged], y[flagged], '.', color=color, label=f"{flagger.GOOD} < flag < {flagger.BAD}") + if plot_nans: + _plot_nans(y[flagged], color, ax) + + +def _plot_nans(y, color, ax): + nans = y.isna() + _plotVline(ax, y[nans].index, color=color) def _plotVline(plt, points, color="blue"): @@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"): for point in points: plt.axvline(point, color=color, linestyle=":") - -def _getColor(flag, flagger): - if flag == flagger.UNFLAGGED: - return "silver" - elif flag == flagger.GOOD: - return "green" - elif flag == flagger.BAD: - return "red" - else: - # suspicios - return "yellow"