From b6349c99e3f767fbc71b7fff3fd0eb8dd4fa773e Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Mon, 22 Mar 2021 16:38:48 +0100 Subject: [PATCH] imporved harm tests --- tests/common.py | 46 ++++++++++++++ tests/funcs/test_harm_funcs.py | 112 ++++++++++++++++++++++----------- 2 files changed, 121 insertions(+), 37 deletions(-) diff --git a/tests/common.py b/tests/common.py index 3e70ff349..e225b6410 100644 --- a/tests/common.py +++ b/tests/common.py @@ -42,3 +42,49 @@ def writeIO(content): return f +def checkDataFlaggerInvariants(data, flagger, field, identical=True): + """ + Check all invariants that must hold at any point for + * field + * data + * flagger + * data[field] + * flagger[field] + * data[field].index + * flagger[field].index + * between data and flagger + * between data[field] and flagger[field] + + Parameters + ---------- + data : dios.DictOfSeries + data container + flagger : Flags + flags container + field : str + the field in question + identical : bool, default True + whether to check indexes of data and flagger to be + identical (True, default) of just for equality. + """ + assert isinstance(data, dios.DictOfSeries) + assert isinstance(flagger, Flagger) + + # all columns in data are in flagger + assert data.columns.difference(flagger.columns).empty + + # ------------------------------------------------------------------------ + # below here, we just check on and with field + # ------------------------------------------------------------------------ + assert field in data + assert field in flagger + + assert flagger[field].dtype == float + + # `pd.Index.identical` also check index attributes like `freq` + if identical: + assert data[field].index.identical(flagger[field].index) + else: + assert data[field].index.equals(flagger[field].index) + + diff --git a/tests/funcs/test_harm_funcs.py b/tests/funcs/test_harm_funcs.py index 3b1ec42c6..7cc99a7cf 100644 --- a/tests/funcs/test_harm_funcs.py +++ b/tests/funcs/test_harm_funcs.py @@ -16,6 +16,8 @@ from saqc.funcs.resampling import ( mapToOriginal, ) +from tests.common import checkDataFlaggerInvariants + @pytest.fixture def data(): @@ -33,6 +35,74 @@ def data(): return data +@pytest.mark.parametrize('func, kws', [ + ('linear', dict()), + ('shift', dict(method="nshift")), + ('interpolate', dict(method="spline")), + ('aggregate', dict(value_func=np.nansum, method="nagg")), +]) +def test_wrapper(data, func, kws): + field = 'data' + freq = "15min" + flagger = initFlagsLike(data) + + import saqc + func = getattr(saqc.funcs, func) + data, flagger = func(data, field, flagger, freq, **kws) + + # check minimal requirements + checkDataFlaggerInvariants(data, flagger, field) + assert data[field].index.freq == pd.Timedelta(freq) + + +@pytest.mark.parametrize("method", ["time", "polynomial"]) +def test_gridInterpolation(data, method): + freq = "15min" + field = 'data' + data = data[field] + data = (data * np.sin(data)).append(data.shift(1, "2h")).shift(1, "3s") + data = dios.DictOfSeries(data) + flagger = initFlagsLike(data) + + # we are just testing if the interpolation gets passed to the series without causing an error: + res = interpolate(data, field, flagger, freq, method=method, downcast_interpolation=True) + + if method == "polynomial": + res = interpolate(data, field, flagger, freq, order=2, method=method, downcast_interpolation=True) + res = interpolate(data, field, flagger, freq, order=10, method=method, downcast_interpolation=True) + + # check minimal requirements + rdata, rflagger = res + checkDataFlaggerInvariants(rdata, rflagger, field, identical=False) + assert rdata[field].index.freq == pd.Timedelta(freq) + + +@pytest.mark.parametrize('func, kws', [ + ('linear', dict()), + ('shift', dict(method="nshift")), + ('interpolate', dict(method="spline")), + ('aggregate', dict(value_func=np.nansum, method="nagg")), +]) +def test_flagsSurviveReshaping(reshaper): + """ + flagging -> reshaping -> test (flags also was reshaped correctly) + """ + pass + + +def test_flagsSurviveInverseReshaping(): + """ + inverse reshaping -> flagging -> test (flags also was reshaped correctly)""" + pass + + +def test_flagsSurviveBackprojection(): + """ + flagging -> reshaping -> inverse reshaping -> test (flags == original-flags) + """ + pass + + @pytest.mark.parametrize("reshaper", ["nshift", "fshift", "bshift", "nagg", "bagg", "fagg", "interpolation"]) def test_harmSingleVarIntermediateFlagging(data, reshaper): flagger = initFlagsLike(data) @@ -96,6 +166,7 @@ def test_harmSingleVarIntermediateFlagging(data, reshaper): def test_harmSingleVarInterpolationAgg(data, params, expected): flagger = initFlagsLike(data) field = 'data' + pre_data = data.copy() pre_flaggger = flagger.copy() method, freq = params @@ -111,14 +182,14 @@ def test_harmSingleVarInterpolationAgg(data, params, expected): @pytest.mark.parametrize( 'params, expected', [ - (("fshift", "15Min"), pd.Series(data=[np.nan, -37.5, -25.0, 0.0, 37.5, 50.0], index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"))), - (("fshift", "30Min"), pd.Series(data=[np.nan, -37.5, 0.0, 50.0], index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"))), (("bshift", "15Min"), pd.Series(data=[-50.0, -37.5, -25.0, 12.5, 37.5, 50.0], index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"))), - (("bshift", "30Min"), pd.Series(data=[-50.0, -37.5, 12.5, 50.0], index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"))), + (("fshift", "15Min"), pd.Series(data=[np.nan, -37.5, -25.0, 0.0, 37.5, 50.0], index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"))), (("nshift", "15min"), pd.Series(data=[np.nan, -37.5, -25.0, 12.5, 37.5, 50.0], index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"))), + (("bshift", "30Min"), pd.Series(data=[-50.0, -37.5, 12.5, 50.0], index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"))), + (("fshift", "30Min"), pd.Series(data=[np.nan, -37.5, 0.0, 50.0], index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"))), (("nshift", "30min"), pd.Series(data=[np.nan, -37.5, 12.5, 50.0], index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"))), ]) -def test_harmSingleVarInterpolationShift(data, params, expected): +def test_harmSingleVarInterpolationShift(data, params, expected): flagger = initFlagsLike(data) field = 'data' pre_data = data.copy() @@ -133,36 +204,3 @@ def test_harmSingleVarInterpolationShift(data, params, expected): assert flagger_deharm[field].equals(pre_flagger[field]) -@pytest.mark.parametrize("method", ["time", "polynomial"]) -def test_gridInterpolation(data, method): - freq = "15min" - field = 'data' - data = data[field] - data = (data * np.sin(data)).append(data.shift(1, "2h")).shift(1, "3s") - data = dios.DictOfSeries(data) - flagger = initFlagsLike(data) - - # we are just testing if the interpolation gets passed to the series without causing an error: - interpolate(data, field, flagger, freq, method=method, downcast_interpolation=True) - - if method == "polynomial": - interpolate(data, field, flagger, freq, order=2, method=method, downcast_interpolation=True) - interpolate(data, field, flagger, freq, order=10, method=method, downcast_interpolation=True) - - -@pytest.mark.parametrize('func, kws', [ - ('linear', dict(to_drop=None)), - ('shift', dict(method="nshift", to_drop=None)), - ('interpolate', dict(method="spline")), - ('aggregate', dict(value_func=np.nansum, method="nagg", to_drop=None)), -]) -def test_wrapper(data, func, kws): - # we are only testing, whether the wrappers do pass processing: - field = 'data' - freq = "15min" - flagger = initFlagsLike(data) - - import saqc - func = getattr(saqc.funcs, func) - func(data, field, flagger, freq, **kws) - -- GitLab