Skip to content
Snippets Groups Projects
Commit ededc635 authored by David Schäfer's avatar David Schäfer
Browse files

bringing the plot functionality back and changes to the default meta headers

parent ba1c656d
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,7 @@ class Fields:
START = "start_date"
END = "end_date"
ASSIGN = "assign"
FLAGS = "check*"
TESTS = "test*"
PLOT = "plot"
......
......@@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
from .config import Fields, Params
from .config import Fields
from .evaluator import evalExpression
from ..lib.plotting import plot
from ..lib.tools import setup
......@@ -34,7 +34,8 @@ def collectVariables(meta, flagger, data, flags):
"""
# NOTE: get to know every variable from meta
for idx, configrow in meta.iterrows():
varname, _, _, assign = configrow
varname = configrow[Fields.VARNAME]
assign = configrow[Fields.ASSIGN]
if varname not in flags and \
(varname in data or varname not in data and assign is True):
col_flags = flagger.initFlags(pd.DataFrame(index=data.index,
......@@ -48,11 +49,8 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan):
setup()
meta = prepareMeta(readMeta(metafname), data)
# NOTE: split meta into the test and some 'meta' data
fields = [Fields.VARNAME, Fields.START, Fields.END, Fields.ASSIGN]
tests = meta[meta.columns.to_series().filter(regex=Fields.FLAGS)]
meta = meta[fields]
plotvars = []
tests = meta[meta.columns.to_series().filter(regex=Fields.TESTS)]
meta = meta[meta.columns.difference(tests.columns)]
# NOTE: prep the flags
if flags is None:
......@@ -69,7 +67,10 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan):
if testcol.dropna().empty:
continue
for idx, (varname, start_date, end_date, _) in meta.iterrows():
for idx, configrow in meta.iterrows():
varname = configrow[Fields.VARNAME]
start_date = configrow[Fields.START]
end_date = configrow[Fields.END]
flag_test = testcol[idx]
if pd.isnull(flag_test):
......@@ -92,10 +93,12 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan):
data.loc[start_date:end_date] = dchunk
flags.loc[start_date:end_date] = fchunk.squeeze()
# NOTE: this method should be removed
flagger.nextTest()
# plot all together
if len(plotvars) > 1:
plotvars = meta[meta[Fields.PLOT]][Fields.VARNAME].tolist()
if plotvars:
plot(data, flags, True, plotvars, flagger)
return data, flags
......@@ -110,20 +113,25 @@ def prepareMeta(meta, data):
# everything else from data
# no dates given, fall back to the available index range
if Fields.START not in meta:
meta = meta.assign(**{Fields.START: np.nan})
if Fields.END not in meta:
meta = meta.assign(**{Fields.END: np.nan})
meta = meta.fillna(
{Fields.END: data.index.max(),
Fields.START: data.index.min()})
if Fields.ASSIGN not in meta:
meta = meta.assign(**{Fields.ASSIGN: False})
# rows without a variables name don't help much
meta = meta.dropna(subset=[Fields.VARNAME])
for field in [Fields.VARNAME, Fields.TESTS, Fields.START, Fields.END, Fields.ASSIGN, Fields.PLOT]:
if field not in meta:
meta = meta.assign(**{field: np.nan})
meta = meta.fillna({
Fields.VARNAME: np.nan,
Fields.TESTS: np.nan,
Fields.START: data.index.min(),
Fields.END: data.index.max(),
Fields.ASSIGN: False,
Fields.PLOT: False,
})
if meta[Fields.VARNAME].isna().any():
raise TypeError(f"columns {Fields.VARNAME} is needed")
tests = meta.filter(regex=Fields.TESTS)
if tests.isna().all(axis=1).any():
raise TypeError("at least one test must be given")
dtype = np.datetime64 if isinstance(data.index, pd.DatetimeIndex) else int
......
......@@ -3,7 +3,6 @@
import pytest
import pandas as pd
import numpy as np
from saqc.funcs import register, flagRange
from saqc.core.core import runner
......@@ -35,9 +34,9 @@ def test_temporalPartitioning(data, flagger):
split_date = data.index[len(data.index)//2]
metadict = [
{F.VARNAME: var1, "check": "flagAll()"},
{F.VARNAME: var2, "check": "flagAll()", F.END: split_date},
{F.VARNAME: var3, "check": "flagAll()", F.START: split_date},
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_date},
{F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_date},
]
meta_file, meta_frame = initMetaDict(metadict, data)
......@@ -58,9 +57,9 @@ def test_positionalPartitioning(data, flagger):
split_index = int(len(data.index)//2)
metadict = [
{F.VARNAME: var1, "check": "flagAll()"},
{F.VARNAME: var2, "check": "flagAll()", F.END: split_index},
{F.VARNAME: var3, "check": "flagAll()", F.START: split_index},
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_index},
{F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_index},
]
meta_file, meta_frame = initMetaDict(metadict, data)
......@@ -82,7 +81,7 @@ def test_missingConfig(data, flagger):
"""
var1, var2, *_ = data.columns
metadict = [{F.VARNAME: var1, "check": "flagAll()"}]
metadict = [{F.VARNAME: var1, F.TESTS: "flagAll()"}]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data)
......@@ -100,8 +99,8 @@ def test_missingVariable(flagger):
var, *_ = data.columns
metadict = [
{F.VARNAME: var, "check": "flagAll()"},
{F.VARNAME: "empty", "check": "flagAll()"},
{F.VARNAME: var, F.TESTS: "flagAll()"},
{F.VARNAME: "empty", F.TESTS: "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
......@@ -121,8 +120,8 @@ def test_assignVariable(flagger):
var2 = "empty"
metadict = [
{F.VARNAME: var1, F.ASSIGN: False, "check": "flagAll()"},
{F.VARNAME: var2, F.ASSIGN: True, "check": "flagAll()"},
{F.VARNAME: var1, F.ASSIGN: False, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.ASSIGN: True, F.TESTS: "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
......@@ -148,8 +147,8 @@ def test_dtypes(data, flagger):
var1, var2, *_ = data.columns
metadict = [
{F.VARNAME: var1, "check": "flagAll()"},
{F.VARNAME: var2, "check": "flagAll()"},
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, "test": "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data, flags)
......@@ -157,22 +156,57 @@ def test_dtypes(data, flagger):
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_plotting(flagger):
""" Test if the plotting code runs. does not show any plot.
Note:
This test is ignored if matplotlib is not available on the test-system
def test_plotting(data, flagger):
"""
Test if the plotting code runs, does not show any plot.
NOTE:
This test is ignored if matplotlib is not available on the test-system
"""
pytest.importorskip("matplotlib", reason="requires matplotlib")
field = 'testdata'
index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
field, *_ = data.columns
flags = flagger.initFlags(data)
_, flagged = flagRange(data, flags, field, flagger, min=10, max=90, flag=flagger.BAD)
_, flagged = flagRange(data, flagged, field, flagger, min=40, max=60, flag=flagger.GOOD)
mask = flagger.getFlags(flags[field]) != flagger.getFlags(flagged[field])
plot(data, flagged, mask, field, flagger, interactive_backend=False)
def test_configReader():
meta = """
var1|2012-01-01|
"""
def test_configReader(data):
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
tests = [
{F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False},
{F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()", F.ASSIGN: True},
{F.VARNAME: var3, F.TESTS: "flagAll()", },
]
defaults = {
F.START: data.index.min(), F.END: data.index.max(),
F.ASSIGN: False, F.PLOT: False
}
for test in tests:
_, meta_frame = initMetaDict([test], data)
result = dict(zip(meta_frame.columns, meta_frame.iloc[0]))
expected = {**defaults, **test}
assert result == expected
def test_configReaderExcpetion(data):
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
tests = [
{},
{F.TESTS: "flagAll()"},
{F.VARNAME: var2},
{F.VARNAME: var3, F.END: date, F.ASSIGN: True},
]
for test in tests:
with pytest.raises(TypeError):
initMetaDict([test], data)
......@@ -21,10 +21,10 @@ def test_basic():
metadata = [
{Fields.VARNAME: var1,
"check_1": f"generic(func=this < {var1mean}, flag='DOUBTFUL')",
"check_2": f"range(min=10, max=20, comment='saqc')"},
"test_1": f"generic(func=this < {var1mean}, flag='DOUBTFUL')",
"test_2": f"range(min=10, max=20, comment='saqc')"},
{Fields.VARNAME: var2,
"check_1": f"generic(func=this > {var1mean}, cause='error')"}
"test_1": f"generic(func=this > {var1mean}, cause='error')"}
]
meta_file, _ = initMetaDict(metadata, data)
......@@ -53,9 +53,9 @@ def test_flagOrder():
metadata = [
{Fields.VARNAME: var,
"check": f"generic(func=this > mean(this), flag='{flagger.BAD}')"},
"test": f"generic(func=this > mean(this), flag='{flagger.BAD}')"},
{Fields.VARNAME: var,
"check": f"generic(func=this >= min(this), flag='{flagger.GOOD}')"},
"test": f"generic(func=this >= min(this), flag='{flagger.GOOD}')"},
]
meta_file, _ = initMetaDict(metadata, data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment