Skip to content
Snippets Groups Projects
Commit 2fdf7c3d authored by Bert Palm's avatar Bert Palm 🎇
Browse files

plotting fixed

parent a3f34ea5
No related branches found
No related tags found
No related merge requests found
...@@ -21,6 +21,8 @@ def _collectVariables(meta, data): ...@@ -21,6 +21,8 @@ def _collectVariables(meta, data):
for idx, configrow in meta.iterrows(): for idx, configrow in meta.iterrows():
varname = configrow[Fields.VARNAME] varname = configrow[Fields.VARNAME]
assign = configrow[Fields.ASSIGN] assign = configrow[Fields.ASSIGN]
if varname in flags:
continue
if varname in data: if varname in data:
flags.append(varname) flags.append(varname)
elif varname not in flags and assign is True: elif varname not in flags and assign is True:
...@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra ...@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
meta = config[config.columns.difference(tests.columns)] meta = config[config.columns.difference(tests.columns)]
# # prepapre the flags # # prepapre the flags
# varnames = collectVariables(meta, data) varnames = _collectVariables(meta, data)
# fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames)) fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames))
# flags = fresh if flags is None else flags.join(fresh) flagger = fresh if flags is None else flags._flags.join(fresh._flags)
if flags is None: # if flags is None:
flag_cols = _collectVariables(meta, data) # flag_cols = _collectVariables(meta, data)
flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols)) # flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols))
else: # else:
flagger = flagger.initFlags(flags=flags) # flagger = flagger.initFlags(flags=flags)
# this checks comes late, but the compiling of the user-test need fully prepared flags # this checks comes late, but the compiling of the user-test need fully prepared flags
checkConfig(config, data, flagger, nodata) checkConfig(config, data, flagger, nodata)
...@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra ...@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
continue continue
flagger = flagger.setFlagger(flagger_chunk_result) flagger = flagger.setFlagger(flagger_chunk_result)
# plotHook(dchunk, fchunk, ffchunk, varname, configrow[Fields.PLOT], flag_test, flagger)
# plotAllHook(data, flags, flagger) plotHook(dchunk, flagger_chunk, flagger_chunk_result, varname, configrow[Fields.PLOT], flag_test)
plotAllHook(data, flagger)
return data, flagger return data, flagger
......
...@@ -2,37 +2,43 @@ ...@@ -2,37 +2,43 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# TODO: use the logging module # TODO: use the logging module
import logging
import pandas as pd
import numpy as np
from warnings import warn from warnings import warn
__plotvars = [] __plotvars = []
_colors = dict(unflagged='silver', good='seagreen', bad='firebrick', suspicious='gold')
def plotAllHook(data, flags, flagger):
def plotAllHook(data, flagger):
if len(__plotvars) > 1: if len(__plotvars) > 1:
_plot(data, flags, True, __plotvars, flagger) _plot(data, flagger, True, __plotvars)
def plotHook(data, old, new, varname, do_plot, flag_test, flagger): def plotHook(data, old, new, varname, do_plot, flag_test, plot_nans=True):
# old/new: flagger
if do_plot: if do_plot:
__plotvars.append(varname) __plotvars.append(varname)
# cannot use getFlags here, because if a flag was set (e.g. with force) the # cannot use getFlags here, because if a flag was set (e.g. with force) the
# flag may be the same, but any additional row (e.g. comment-field) would differ # flag may be the same, but any additional row (e.g. comment-field) would differ
mask = (old[varname] == new[varname]).any(axis=1) mask = (old._flags[varname] != new._flags[varname]).any(axis=1)
_plot(data, new, mask, varname, flagger, title=flag_test) _plot(data, new, mask, varname, title=flag_test, plot_nans=plot_nans)
def _plot( def _plot(
data, data,
flags, flagger,
flagmask, flagmask,
varname, varname,
flagger,
interactive_backend=True, interactive_backend=True,
title="Data Plot", title="Data Plot",
show_nans=True, plot_nans=True,
): ):
# todo: try catch warn (once) return
# only import if plotting is requested by the user # only import if plotting is requested by the user
import matplotlib as mpl import matplotlib as mpl
...@@ -47,22 +53,19 @@ def _plot( ...@@ -47,22 +53,19 @@ def _plot(
# needed for datetime conversion # needed for datetime conversion
from pandas.plotting import register_matplotlib_converters from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters() register_matplotlib_converters()
if not isinstance(varname, (list, set)): if not isinstance(varname, (list, set)):
varname = [varname] varname = [varname]
varname = set(varname) varname = set(varname)
# filter out variables to which no data is associated # filter out variables to which no data is associated (e.g. freshly assigned vars)
tmp = [] tmp = []
for var in varname: for var in varname:
if var in data.columns: if var in data.columns:
tmp.append(var) tmp.append(var)
else: else:
warn( logging.warning(f"Cannot plot column '{var}', because it is not present in data.")
f"Cannot plot column '{var}' that is not present in data.", UserWarning
)
if not tmp: if not tmp:
return return
varname = tmp varname = tmp
...@@ -72,62 +75,84 @@ def _plot( ...@@ -72,62 +75,84 @@ def _plot(
fig, axes = plt.subplots(plots, 1, sharex=True) fig, axes = plt.subplots(plots, 1, sharex=True)
axes[0].set_title(title) axes[0].set_title(title)
for i, v in enumerate(varname): for i, v in enumerate(varname):
_plotQflags(data, v, flagger, flagmask, axes[i], show_nans) _plotByQualtyFlag(data, v, flagger, flagmask, axes[i], plot_nans)
else: else:
fig, ax = plt.subplots() fig, ax = plt.subplots()
plt.title(title) plt.title(title)
_plotQflags(data, varname.pop(), flagger, flagmask, ax, show_nans) _plotByQualtyFlag(data, varname.pop(), flagger, flagmask, ax, plot_nans)
plt.xlabel("time")
# dummy plot for the label `missing` see plot_vline for more info # dummy plot for the label `missing` see plot_vline for more info
plt.plot([], [], ":", color="silver", label="missing data") plt.plot([], [], ":", color="silver", label="missing data")
plt.xlabel("time")
plt.legend() plt.legend()
if interactive_backend: if interactive_backend:
plt.show() plt.show()
def _plotQflags(data, varname, flagger, flagmask, ax, show_nans): def _plotByQualtyFlag(data, varname, flagger, flagmask, ax, plot_nans):
ax.set_ylabel(varname) ax.set_ylabel(varname)
x = data.index x = data.index
y = data[varname] y = data[varname]
ax.plot(x, y, "-", markersize=1, color="silver")
# plot all data in silver (NaNs as vertical lines) # base plot: show all(!) data
ax.plot(x, y, "-", color="silver", label="data") ax.plot(x, y, "-", color="silver", label="data")
flagged = flagger.isFlagged(varname)
if show_nans: # ANY OLD FLAG
nans = y.isna() # plot all(!) data that are already flagged in black
idx = y.index[nans & ~flagged] flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>=')
_plotVline(ax, idx, color="silver") oldflags = flagged & ~flagmask
ax.plot(x[oldflags], y[oldflags], ".", color="black", label="flagged by other test")
# plot all data (and nans) that are already flagged in black if plot_nans:
ax.plot(x[flagged], y[flagged], ".", color="black", label="flagged by other test") _plot_nans(y[oldflags], 'black', ax)
if show_nans:
idx = y.index[nans & flagged & ~flagmask] # now we just want to show data that was flagged
_plotVline(ax, idx, color="black") if flagmask is not True:
x = x[flagmask]
y = y[flagmask]
# # plot flags in the color corresponding to the flag flagger = flagger.getFlagger(varname, flagmask)
# # BAD red, GOOD green, all in between aka SUSPISIOUS in yellow
# <<<<<<< HEAD if x.empty:
# for i, f in enumerate(flagger.categories): return
# if i == 0:
# continue suspicious = pd.Series(data=np.ones(len(y), dtype=bool), index=y.index)
# flagged = flagger.isFlagged(varname, flag=f, comparator='==') & flagmask # flag by categories
# =======
# bads = flagger.isFlagged(flags, varname, flag=flagger.BAD, comparator='==') & flagmask # plot UNFLAGGED (only nans are needed)
# good = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='==') & flagmask flag, color = flagger.UNFLAGGED, _colors['unflagged']
# susp = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='>') & flagmask & ~bads flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
# flaglist = [flagger.GOOD, flagger.BAD, 'Suspicious'] ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
# for f, flagged in zip(flaglist, [good, bads, susp]): if plot_nans:
# >>>>>>> master _plot_nans(y[flagged], color, ax)
# label = f"flag: {f}"
# color = _getColor(f, flagger) # plot GOOD
# ax.plot(x[flagged], y[flagged], '.', color=color, label=label) flag, color = flagger.GOOD, _colors['good']
# if show_nans: flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
# idx = y.index[nans & flagged] ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
# _plotVline(ax, idx, color=color) if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot BAD
flag, color = flagger.BAD, _colors['bad']
flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot SUSPICIOS
color = _colors['suspicious']
flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>')
flagged &= flagger.isFlagged(varname, flag=flagger.BAD, comparator='<')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"{flagger.GOOD} < flag < {flagger.BAD}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
def _plot_nans(y, color, ax):
nans = y.isna()
_plotVline(ax, y[nans].index, color=color)
def _plotVline(plt, points, color="blue"): def _plotVline(plt, points, color="blue"):
...@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"): ...@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"):
for point in points: for point in points:
plt.axvline(point, color=color, linestyle=":") plt.axvline(point, color=color, linestyle=":")
def _getColor(flag, flagger):
if flag == flagger.UNFLAGGED:
return "silver"
elif flag == flagger.GOOD:
return "green"
elif flag == flagger.BAD:
return "red"
else:
# suspicios
return "yellow"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment