Skip to content
Snippets Groups Projects
Commit e91368f7 authored by David Schäfer's avatar David Schäfer
Browse files

Merge branch 'master' of https://git.ufz.de/rdm/saqc

parents 976a238a 2fdf7c3d
No related branches found
No related tags found
No related merge requests found
......@@ -21,6 +21,8 @@ def _collectVariables(meta, data):
for idx, configrow in meta.iterrows():
varname = configrow[Fields.VARNAME]
assign = configrow[Fields.ASSIGN]
if varname in flags:
continue
if varname in data:
flags.append(varname)
elif varname not in flags and assign is True:
......@@ -73,14 +75,14 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
meta = config[config.columns.difference(tests.columns)]
# # prepapre the flags
# varnames = collectVariables(meta, data)
# fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames))
# flags = fresh if flags is None else flags.join(fresh)
if flags is None:
flag_cols = _collectVariables(meta, data)
flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols))
else:
flagger = flagger.initFlags(flags=flags)
varnames = _collectVariables(meta, data)
fresh = flagger.initFlags(pd.DataFrame(index=data.index, columns=varnames))
flagger = fresh if flags is None else flags._flags.join(fresh._flags)
# if flags is None:
# flag_cols = _collectVariables(meta, data)
# flagger = flagger.initFlags(pd.DataFrame(index=data.index, columns=flag_cols))
# else:
# flagger = flagger.initFlags(flags=flags)
# this checks comes late, but the compiling of the user-test need fully prepared flags
checkConfig(config, data, flagger, nodata)
......@@ -129,8 +131,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan, error_policy="ra
continue
flagger = flagger.setFlagger(flagger_chunk_result)
# plotHook(dchunk, fchunk, ffchunk, varname, configrow[Fields.PLOT], flag_test, flagger)
# plotAllHook(data, flags, flagger)
plotHook(dchunk, flagger_chunk, flagger_chunk_result, varname, configrow[Fields.PLOT], flag_test)
plotAllHook(data, flagger)
return data, flagger
......
......@@ -14,6 +14,7 @@ from saqc.lib.tools import toSequence, assertScalar, assertDataFrame
COMPARATOR_MAP = {
"!=": op.ne,
"==": op.eq,
">=": op.ge,
">": op.gt,
......
......@@ -69,6 +69,9 @@ def harmWrapper(heap={}):
data, flagger = flagMissing(
data, field, flagger, nodata=data_missing_value, **kwargs
)
# and dropped for harmonization:
if drop_flags is not None:
drop_flags.append(flagger.BAD)
# before sending the current flags and data frame to the future (for backtracking reasons), we clear it
# from merge-nans that just resulted from harmonization of other variables!
......
......@@ -129,7 +129,7 @@ def flagSoilMoistureBySoilFrost(
"""
# retrieve reference series
refseries = data[soil_temp_reference]
refseries = data[soil_temp_reference].copy()
ref_use = flagger.isFlagged(
soil_temp_reference, flag=flagger.GOOD, comparator="=="
) | flagger.isFlagged(soil_temp_reference, flag=flagger.UNFLAGGED, comparator="==")
......@@ -137,35 +137,14 @@ def flagSoilMoistureBySoilFrost(
refseries = refseries[ref_use.values]
# drop nan values from reference series, since those are values you dont want to refer to.
refseries = refseries.dropna()
# skip further processing if reference series is empty:
if refseries.empty:
return data, flagger
# wrap around df.index.get_loc method, to catch key error in case of empty tolerance window:
def _checkNearestForFrost(ref_date, ref_series, tolerance, check_level):
try:
# if there is no reference value within tolerance margin, following line will raise key error and
# trigger the exception
ref_pos = ref_series.index.get_loc(
ref_date, method="nearest", tolerance=tolerance
)
except KeyError:
# since test is not applicable: make no change to flag state
return False
# if reference value index is available, return comparison result (to determine flag)
return ref_series[ref_pos] <= check_level
# make temporal frame holding date index, since df.apply cant access index
temp_frame = pd.Series(data.index)
# get flagging mask ("True" denotes "bad"="test succesfull")
mask = temp_frame.apply(
_checkNearestForFrost, args=(refseries, tolerated_deviation, frost_level)
)
# apply calculated flags
flagger = flagger.setFlags(field, mask.values, **kwargs)
refseries = refseries.reindex(data[field].dropna().index, method="nearest", tolerance=tolerated_deviation)
refseries = refseries[refseries < frost_level].index
flagger = flagger.setFlags(field, refseries, **kwargs)
return data, flagger
......@@ -236,9 +215,8 @@ def flagSoilMoistureByPrecipitationEvents(
:param ignore_missing:
"""
dataseries, moist_rate = retrieveTrustworthyOriginal(data, field, flagger)
# data harmonized: refseries, ref_rate = retrieveTrustworthyOriginal(data, prec_reference, flagger)
dataseries, moist_rate = retrieveTrustworthyOriginal(data, field, flagger)
# data not hamronized:
refseries = data[prec_reference].dropna()
......@@ -249,7 +227,7 @@ def flagSoilMoistureByPrecipitationEvents(
return data, flagger
refseries = refseries.reindex(refseries.index.join(dataseries.index, how='outer'))
# get 24 h prec. monitor (this makes last-24h-rainfall-evaluation independent from preceeding entries)
# get 24 h prec. monitor
prec_count = refseries.rolling(window="1D").sum()
# exclude data not signifying a raise::
if raise_reference is None:
......
......@@ -2,37 +2,43 @@
# -*- coding: utf-8 -*-
# TODO: use the logging module
import logging
import pandas as pd
import numpy as np
from warnings import warn
__plotvars = []
_colors = dict(unflagged='silver', good='seagreen', bad='firebrick', suspicious='gold')
def plotAllHook(data, flags, flagger):
def plotAllHook(data, flagger):
if len(__plotvars) > 1:
_plot(data, flags, True, __plotvars, flagger)
_plot(data, flagger, True, __plotvars)
def plotHook(data, old, new, varname, do_plot, flag_test, flagger):
def plotHook(data, old, new, varname, do_plot, flag_test, plot_nans=True):
# old/new: flagger
if do_plot:
__plotvars.append(varname)
# cannot use getFlags here, because if a flag was set (e.g. with force) the
# flag may be the same, but any additional row (e.g. comment-field) would differ
mask = (old[varname] == new[varname]).any(axis=1)
_plot(data, new, mask, varname, flagger, title=flag_test)
mask = (old._flags[varname] != new._flags[varname]).any(axis=1)
_plot(data, new, mask, varname, title=flag_test, plot_nans=plot_nans)
def _plot(
data,
flags,
flagger,
flagmask,
varname,
flagger,
interactive_backend=True,
title="Data Plot",
show_nans=True,
plot_nans=True,
):
# todo: try catch warn (once) return
# only import if plotting is requested by the user
import matplotlib as mpl
......@@ -47,22 +53,19 @@ def _plot(
# needed for datetime conversion
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
if not isinstance(varname, (list, set)):
varname = [varname]
varname = set(varname)
# filter out variables to which no data is associated
# filter out variables to which no data is associated (e.g. freshly assigned vars)
tmp = []
for var in varname:
if var in data.columns:
tmp.append(var)
else:
warn(
f"Cannot plot column '{var}' that is not present in data.", UserWarning
)
logging.warning(f"Cannot plot column '{var}', because it is not present in data.")
if not tmp:
return
varname = tmp
......@@ -72,62 +75,84 @@ def _plot(
fig, axes = plt.subplots(plots, 1, sharex=True)
axes[0].set_title(title)
for i, v in enumerate(varname):
_plotQflags(data, v, flagger, flagmask, axes[i], show_nans)
_plotByQualtyFlag(data, v, flagger, flagmask, axes[i], plot_nans)
else:
fig, ax = plt.subplots()
plt.title(title)
_plotQflags(data, varname.pop(), flagger, flagmask, ax, show_nans)
_plotByQualtyFlag(data, varname.pop(), flagger, flagmask, ax, plot_nans)
plt.xlabel("time")
# dummy plot for the label `missing` see plot_vline for more info
plt.plot([], [], ":", color="silver", label="missing data")
plt.xlabel("time")
plt.legend()
if interactive_backend:
plt.show()
def _plotQflags(data, varname, flagger, flagmask, ax, show_nans):
def _plotByQualtyFlag(data, varname, flagger, flagmask, ax, plot_nans):
ax.set_ylabel(varname)
x = data.index
y = data[varname]
ax.plot(x, y, "-", markersize=1, color="silver")
# plot all data in silver (NaNs as vertical lines)
# base plot: show all(!) data
ax.plot(x, y, "-", color="silver", label="data")
flagged = flagger.isFlagged(varname)
if show_nans:
nans = y.isna()
idx = y.index[nans & ~flagged]
_plotVline(ax, idx, color="silver")
# plot all data (and nans) that are already flagged in black
ax.plot(x[flagged], y[flagged], ".", color="black", label="flagged by other test")
if show_nans:
idx = y.index[nans & flagged & ~flagmask]
_plotVline(ax, idx, color="black")
# # plot flags in the color corresponding to the flag
# # BAD red, GOOD green, all in between aka SUSPISIOUS in yellow
# <<<<<<< HEAD
# for i, f in enumerate(flagger.categories):
# if i == 0:
# continue
# flagged = flagger.isFlagged(varname, flag=f, comparator='==') & flagmask
# =======
# bads = flagger.isFlagged(flags, varname, flag=flagger.BAD, comparator='==') & flagmask
# good = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='==') & flagmask
# susp = flagger.isFlagged(flags, varname, flag=flagger.GOOD, comparator='>') & flagmask & ~bads
# flaglist = [flagger.GOOD, flagger.BAD, 'Suspicious']
# for f, flagged in zip(flaglist, [good, bads, susp]):
# >>>>>>> master
# label = f"flag: {f}"
# color = _getColor(f, flagger)
# ax.plot(x[flagged], y[flagged], '.', color=color, label=label)
# if show_nans:
# idx = y.index[nans & flagged]
# _plotVline(ax, idx, color=color)
# ANY OLD FLAG
# plot all(!) data that are already flagged in black
flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>=')
oldflags = flagged & ~flagmask
ax.plot(x[oldflags], y[oldflags], ".", color="black", label="flagged by other test")
if plot_nans:
_plot_nans(y[oldflags], 'black', ax)
# now we just want to show data that was flagged
if flagmask is not True:
x = x[flagmask]
y = y[flagmask]
flagger = flagger.getFlagger(varname, flagmask)
if x.empty:
return
suspicious = pd.Series(data=np.ones(len(y), dtype=bool), index=y.index)
# flag by categories
# plot UNFLAGGED (only nans are needed)
flag, color = flagger.UNFLAGGED, _colors['unflagged']
flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot GOOD
flag, color = flagger.GOOD, _colors['good']
flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot BAD
flag, color = flagger.BAD, _colors['bad']
flagged = flagger.isFlagged(varname, flag=flag, comparator='==')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"flag: {flag}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
# plot SUSPICIOS
color = _colors['suspicious']
flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator='>')
flagged &= flagger.isFlagged(varname, flag=flagger.BAD, comparator='<')
ax.plot(x[flagged], y[flagged], '.', color=color, label=f"{flagger.GOOD} < flag < {flagger.BAD}")
if plot_nans:
_plot_nans(y[flagged], color, ax)
def _plot_nans(y, color, ax):
nans = y.isna()
_plotVline(ax, y[nans].index, color=color)
def _plotVline(plt, points, color="blue"):
......@@ -137,14 +162,3 @@ def _plotVline(plt, points, color="blue"):
for point in points:
plt.axvline(point, color=color, linestyle=":")
def _getColor(flag, flagger):
if flag == flagger.UNFLAGGED:
return "silver"
elif flag == flagger.GOOD:
return "green"
elif flag == flagger.BAD:
return "red"
else:
# suspicios
return "yellow"
......@@ -29,10 +29,10 @@ def test_flagSoilMoistureBySoilFrost(flagger):
data, flagger_result = flagSoilMoistureBySoilFrost(
data, "soil_moisture", flagger, "soil_temperature"
)
flag_assertion = np.arange(18, 37)
flag_assertion = np.arange(19, 37)
flag_result = flagger_result.getFlags("soil_moisture") # .iloc[:, 0]
test_sum = (flag_result[flag_assertion] == flagger.BAD).sum()
assert test_sum == len(flag_assertion)
assert (flag_result[flag_assertion] == flagger.BAD).all()
@pytest.mark.parametrize("flagger", TESTFLAGGER)
......@@ -60,4 +60,4 @@ def test_flagSoilMoisturePrecipitationEvents(flagger):
if __name__ == "__main__":
flagger = TESTFLAGGER[2]
test_flagSoilMoisturePrecipitationEvents(flagger)
test_flagSoilMoistureBySoilFrost(flagger)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment