Skip to content
Snippets Groups Projects
Commit e91368f7 authored by David Schäfer's avatar David Schäfer
Browse files

Merge branch 'master' of https://git.ufz.de/rdm/saqc

parents 976a238a 2fdf7c3d
No related branches found
No related tags found
No related merge requests found
...@@ -21,6 +21,8 @@ def _collectVariables(meta, data): ...@@ -21,6 +21,8 @@ def _collectVariables(meta, data):
for idx, configrow in meta.iterrows(): for idx, configrow in meta.iterrows():
varname = configrow[Fields.VARNAME] varname = configrow[Fields.VARNAME]
assign = configrow[Fields.ASSIGN] assign = configrow[Fields.ASSIGN]
if varname in flags:
continue
if varname in data: if varname in data:
flags.append(varname) flags.append(varname)
elif varname not in flags and assign is True: elif varname not in flags and assign is True:
...@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra ...@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
meta = config[config.columns.difference(tests.columns)] meta = config[config.columns.difference(tests.columns)]
# # prepapre the flags # # prepapre the flags
# varnames = collectVariables(meta, data) varnames = _collectVariables(meta, data)
# fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames)) fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames))
# flags = fresh if flags is None else flags.join(fresh) flagger = fresh if flags is None else flags._flags.join(fresh._flags)
if flags is None: # if flags is None:
flag_cols = _collectVariables(meta, data) # flag_cols = _collectVariables(meta, data)
flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols)) # flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols))
else: # else:
flagger = flagger.initFlags(flags=flags) # flagger = flagger.initFlags(flags=flags)
# this checks comes late, but the compiling of the user-test need fully prepared flags # this checks comes late, but the compiling of the user-test need fully prepared flags
checkConfig(config, data, flagger, nodata) checkConfig(config, data, flagger, nodata)
...@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra ...@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
continue continue
flagger = flagger.setFlagger(flagger_chunk_result) flagger = flagger.setFlagger(flagger_chunk_result)
# plotHook(dchunk, fchunk, ffchunk, varname, configrow[Fields.PLOT], flag_test, flagger)
# plotAllHook(data, flags, flagger) plotHook(dchunk, flagger_chunk, flagger_chunk_result, varname, configrow[Fields.PLOT], flag_test)
plotAllHook(data, flagger)
return data, flagger return data, flagger
......
...@@ -14,6 +14,7 @@ from saqc.lib.tools import toSequence, assertScalar, assertDataFrame ...@@ -14,6 +14,7 @@ from saqc.lib.tools import toSequence, assertScalar, assertDataFrame
COMPARATOR_MAP = { COMPARATOR_MAP = {
"!=": op.ne,
"==": op.eq, "==": op.eq,
">=": op.ge, ">=": op.ge,
">": op.gt, ">": op.gt,
......
...@@ -69,6 +69,9 @@ def harmWrapper(heap={}): ...@@ -69,6 +69,9 @@ def harmWrapper(heap={}):
data, flagger = flagMissing( data, flagger = flagMissing(
data, field, flagger, nodata=data_missing_value, **kwargs data, field, flagger, nodata=data_missing_value, **kwargs
) )
# and dropped for harmonization:
if drop_flags is not None:
drop_flags.append(flagger.BAD)
# before sending the current flags and data frame to the future (for backtracking reasons), we clear it # before sending the current flags and data frame to the future (for backtracking reasons), we clear it
# from merge-nans that just resulted from harmonization of other variables! # from merge-nans that just resulted from harmonization of other variables!
......
...@@ -129,7 +129,7 @@ def flagSoilMoistureBySoilFrost( ...@@ -129,7 +129,7 @@ def flagSoilMoistureBySoilFrost(
""" """
# retrieve reference series # retrieve reference series
refseries = data[soil_temp_reference] refseries = data[soil_temp_reference].copy()
ref_use = flagger.isFlagged( ref_use = flagger.isFlagged(
soil_temp_reference, flag=flagger.GOOD, comparator="==" soil_temp_reference, flag=flagger.GOOD, comparator="=="
) | flagger.isFlagged(soil_temp_reference, flag=flagger.UNFLAGGED, comparator="==") ) | flagger.isFlagged(soil_temp_reference, flag=flagger.UNFLAGGED, comparator="==")
...@@ -137,35 +137,14 @@ def flagSoilMoistureBySoilFrost( ...@@ -137,35 +137,14 @@ def flagSoilMoistureBySoilFrost(
refseries = refseries[ref_use.values] refseries = refseries[ref_use.values]
# drop nan values from reference series, since those are values you dont want to refer to. # drop nan values from reference series, since those are values you dont want to refer to.
refseries = refseries.dropna() refseries = refseries.dropna()
# skip further processing if reference series is empty: # skip further processing if reference series is empty:
if refseries.empty: if refseries.empty:
return data, flagger return data, flagger
# wrap around df.index.get_loc method, to catch key error in case of empty tolerance window: refseries = refseries.reindex(data[field].dropna().index, method="nearest", tolerance=tolerated_deviation)
def _checkNearestForFrost(ref_date, ref_series, tolerance, check_level): refseries = refseries[refseries < frost_level].index
try: flagger = flagger.setFlags(field, refseries, **kwargs)
# if there is no reference value within tolerance margin, following line will raise key error and
# trigger the exception
ref_pos = ref_series.index.get_loc(
ref_date, method="nearest", tolerance=tolerance
)
except KeyError:
# since test is not applicable: make no change to flag state
return False
# if reference value index is available, return comparison result (to determine flag)
return ref_series[ref_pos] <= check_level
# make temporal frame holding date index, since df.apply cant access index
temp_frame = pd.Series(data.index)
# get flagging mask ("True" denotes "bad"="test succesfull")
mask = temp_frame.apply(
_checkNearestForFrost, args=(refseries, tolerated_deviation, frost_level)
)
# apply calculated flags
flagger = flagger.setFlags(field, mask.values, **kwargs)
return data, flagger return data, flagger
...@@ -236,9 +215,8 @@ def flagSoilMoistureByPrecipitationEvents( ...@@ -236,9 +215,8 @@ def flagSoilMoistureByPrecipitationEvents(
:param ignore_missing: :param ignore_missing:
""" """
dataseries, moist_rate = retrieveTrustworthyOriginal(data, field, flagger)
# data harmonized: refseries, ref_rate = retrieveTrustworthyOriginal(data, prec_reference, flagger) dataseries, moist_rate = retrieveTrustworthyOriginal(data, field, flagger)
# data not hamronized: # data not hamronized:
refseries = data[prec_reference].dropna() refseries = data[prec_reference].dropna()
...@@ -249,7 +227,7 @@ def flagSoilMoistureByPrecipitationEvents( ...@@ -249,7 +227,7 @@ def flagSoilMoistureByPrecipitationEvents(
return data, flagger return data, flagger
refseries = refseries.reindex(refseries.index.join(dataseries.index, how='outer')) refseries = refseries.reindex(refseries.index.join(dataseries.index, how='outer'))
# get 24 h prec. monitor (this makes last-24h-rainfall-evaluation independent from preceeding entries) # get 24 h prec. monitor
prec_count = refseries.rolling(window="1D").sum() prec_count = refseries.rolling(window="1D").sum()
# exclude data not signifying a raise:: # exclude data not signifying a raise::
if raise_reference is None: if raise_reference is None:
......
...@@ -2,37 +2,43 @@ ...@@ -2,37 +2,43 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# TODO: use the logging module # TODO: use the logging module
import logging
import pandas as pd
import numpy as np
from warnings import warn from warnings import warn
__plotvars = [] __plotvars = []
_colors = dict(unflagged='silver', good='seagreen', bad='firebrick', suspicious='gold')
def plotAllHook(data, flags, flagger):
def plotAllHook(data, flagger):
if len(__plotvars) > 1: if len(__plotvars) > 1:
_plot(data, flags, True, __plotvars, flagger) _plot(data, flagger, True, __plotvars)
def plotHook(data, old, new, varname, do_plot, flag_test, flagger): def plotHook(data, old, new, varname, do_plot, flag_test, plot_nans=True):
# old/new: flagger
if do_plot: if do_plot:
__plotvars.append(varname) __plotvars.append(varname)
# cannot use getFlags here, because if a flag was set (e.g. with force) the # cannot use getFlags here, because if a flag was set (e.g. with force) the
# flag may be the same, but any additional row (e.g. comment-field) would differ # flag may be the same, but any additional row (e.g. comment-field) would differ
mask = (old[varname] == new[varname]).any(axis=1) mask = (old._flags[varname] != new._flags[varname]).any(axis=1)
_plot(data, new, mask, varname, flagger, title=flag_test) _plot(data, new, mask, varname, title=flag_test, plot_nans=plot_nans)
def _plot( def _plot(
data, data,
flags, flagger,
flagmask, flagmask,
varname, varname,
flagger,
interactive_backend=True, interactive_backend=True,
title="Data Plot", title="Data Plot",
show_nans=True, plot_nans=True,
): ):
# todo: try catch warn (once) return
# only import if plotting is requested by the user # only import if plotting is requested by the user
import matplotlib as mpl import matplotlib as mpl
...@@ -47,22 +53,19 @@ def _plot( ...@@ -47,22 +53,19 @@ def _plot(
# needed for datetime conversion # needed for datetime conversion
from pandas.plotting import register_matplotlib_converters from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters() register_matplotlib_converters()
if not isinstance(varname, (list, set)): if not isinstance(varname, (list, set)):
varname = [varname] varname = [varname]
varname = set(varname) varname = set(varname)
# filter out variables to which no data is associated # filter out variables to which no data is associated (e.g. freshly assigned vars)
tmp = [] tmp = []
for var in varname: for var in varname:
if var in data.columns: if var in data.columns:
tmp.append(var) tmp.append(var)
else: else:
warn( logging.warning(f"Cannot plot column '{var}', because it is not present in data.")
f"Cannot plot column '{var}' that is not present in data.", UserWarning
)
if not tmp: if not tmp:
return return
varname = tmp varname = tmp
...@@ -72,62 +75,84 @@ def _plot( ...@@ -72,62 +75,84 @@ def _plot(
fig, axes = plt.subplots(plots, 1, sharex=True) fig, axes = plt.subplots(plots, 1, sharex=True)
axes[0].set_title(title) axes[0].set_title(title)
for i, v in enumerate(varname): for i, v in enumerate(varname):
_plotQflags(data, v, flagger, flagmask, axes[i], show_nans) _plotByQualtyFlag(data, v, flagger, flagmask, axes[i], plot_nans)
else: else:
fig, ax = plt.subplots() fig, ax = plt.subplots()
plt.title(title) plt.title(title)
_plotQflags(data, varname.pop(), flagger, flagmask, ax, show_nans) _plotByQualtyFlag(data, varname.pop(), flagger, flagmask, ax, plot_nans)
plt.xlabel("time")
# dummy plot for the label `missing` see plot_vline for more info # dummy plot for the label `missing` see plot_vline for more info
plt.plot([], [], ":", color="silver", label="missing data") plt.plot([], [], ":", color="silver", label="missing data")
plt.xlabel("time")
plt.legend() plt.legend()
if interactive_backend: if interactive_backend:
plt.show() plt.show()
def _plotQflags(data, varname, flagger, flagmask, ax, show_nans): def _plotByQualtyFlag(data, varname, flagger, flagmask, ax, plot_nans):
ax.set_ylabel(varname) ax.set_ylabel(varname)
x = data.index x = data.index
y = data[varname] y = data[varname]
ax.plot(x, y, "-", markersize=1, color="silver")
# plot all data in silver (NaNs as vertical lines) # base plot: show all(!) data
ax.plot(x, y, "-", color="silver", label="data") ax.plot(x, y, "-", color="silver", label="data")
flagged = flagger.isFlagged(varname)
if show_nans: # ANY OLD FLAG
nans = y.isna() # plot all(!) data that are already flagged in black
idx = y.index[nans & ~flagged] flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>=')
_plotVline(ax, idx, color="silver") oldflags = flagged & ~flagmask
ax.plot(x[oldflags], y[oldflags], ".", color="black", label="flagged by other test")
# plot all data (and nans) that are already flagged in black if plot_nans:
ax.plot(x[flagged], y[flagged], ".", color="black", label="flagged by other test") _plot_nans(y[oldflags], 'black', ax)
if show_nans:
idx = y.index[nans & flagged & ~flagmask] # now we just want to show data that was flagged
_plotVline(ax, idx, color="black") if flagmask is not True:
x = x[flagmask]
y = y[flagmask]
# # plot flags in the color corresponding to the flag flagger = flagger.getFlagger(varname, flagmask)
# # BAD red, GOOD green, all in between aka SUSPISIOUS in yellow
# <<<<<<< HEAD if x.empty:
# for i, f in enumerate(flagger.categories): return
# if i == 0:
# continue suspicious = pd.Series(data=np.ones(len(y), dtype=bool), index=y.index)
# flagged = flagger.isFlagged(varname, flag=f, comparator='==') & flagmask # flag by categories
# =======
# bads = flagger.isFlagged(flags, varname, flag=flagger.BAD, comparator='==') & flagmask # plot UNFLAGGED (only nans are needed)
# good = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='==') & flagmask flag, color = flagger.UNFLAGGED, _colors['unflagged']
# susp = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='>') & flagmask & ~bads flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
# flaglist = [flagger.GOOD, flagger.BAD, 'Suspicious'] ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
# for f, flagged in zip(flaglist, [good, bads, susp]): if plot_nans:
# >>>>>>> master _plot_nans(y[flagged], color, ax)
# label = f"flag: {f}"
# color = _getColor(f, flagger) # plot GOOD
# ax.plot(x[flagged], y[flagged], '.', color=color, label=label) flag, color = flagger.GOOD, _colors['good']
# if show_nans: flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
# idx = y.index[nans & flagged] ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
# _plotVline(ax, idx, color=color) if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot BAD
flag, color = flagger.BAD, _colors['bad']
flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot SUSPICIOS
color = _colors['suspicious']
flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>')
flagged &= flagger.isFlagged(varname, flag=flagger.BAD, comparator='<')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"{flagger.GOOD} < flag < {flagger.BAD}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
def _plot_nans(y, color, ax):
nans = y.isna()
_plotVline(ax, y[nans].index, color=color)
def _plotVline(plt, points, color="blue"): def _plotVline(plt, points, color="blue"):
...@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"): ...@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"):
for point in points: for point in points:
plt.axvline(point, color=color, linestyle=":") plt.axvline(point, color=color, linestyle=":")
def _getColor(flag, flagger):
if flag == flagger.UNFLAGGED:
return "silver"
elif flag == flagger.GOOD:
return "green"
elif flag == flagger.BAD:
return "red"
else:
# suspicios
return "yellow"
...@@ -29,10 +29,10 @@ def test_flagSoilMoistureBySoilFrost(flagger): ...@@ -29,10 +29,10 @@ def test_flagSoilMoistureBySoilFrost(flagger):
data, flagger_result = flagSoilMoistureBySoilFrost( data, flagger_result = flagSoilMoistureBySoilFrost(
data, "soil_moisture", flagger, "soil_temperature" data, "soil_moisture", flagger, "soil_temperature"
) )
flag_assertion = np.arange(18, 37) flag_assertion = np.arange(19, 37)
flag_result = flagger_result.getFlags("soil_moisture") # .iloc[:, 0] flag_result = flagger_result.getFlags("soil_moisture") # .iloc[:, 0]
test_sum = (flag_result[flag_assertion] == flagger.BAD).sum() assert (flag_result[flag_assertion] == flagger.BAD).all()
assert test_sum == len(flag_assertion)
@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("flagger", TESTFLAGGER)
...@@ -60,4 +60,4 @@ def test_flagSoilMoisturePrecipitationEvents(flagger): ...@@ -60,4 +60,4 @@ def test_flagSoilMoisturePrecipitationEvents(flagger):
if __name__ == "__main__": if __name__ == "__main__":
flagger = TESTFLAGGER[2] flagger = TESTFLAGGER[2]
test_flagSoilMoisturePrecipitationEvents(flagger) test_flagSoilMoistureBySoilFrost(flagger)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment