From d2785dae7567d0616efbf9022bb69fa4daa8705b Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Thu, 4 Mar 2021 02:11:42 +0100 Subject: [PATCH] fixed flagger syntax in most tests --- test/common.py | 23 +-- test/core/test_core.py | 52 +++--- .../{test_core_new.py => test_creation.py} | 0 test/core/test_masking.py | 3 +- test/core/test_reader.py | 15 +- test/flagger/test_dmpflagger.py | 4 +- test/flagger/test_positionalflagger.py | 3 +- test/funcs/conftest.py | 92 +++++----- test/funcs/test_constants_detection.py | 24 ++- test/funcs/test_functions.py | 165 ++++++++++-------- test/funcs/test_generic_api_functions.py | 46 ++--- test/funcs/test_generic_config_functions.py | 69 ++++---- test/funcs/test_pattern_rec.py | 34 ++-- test/funcs/test_spikes_detection.py | 61 +++---- 14 files changed, 288 insertions(+), 303 deletions(-) rename test/core/{test_core_new.py => test_creation.py} (100%) diff --git a/test/common.py b/test/common.py index e07cc5cfb..f774cd5ed 100644 --- a/test/common.py +++ b/test/common.py @@ -24,29 +24,22 @@ from hypothesis.strategies._internal.types import _global_type_lookup from dios import DictOfSeries +from saqc.common import * from saqc.core.register import FUNC_MAP from saqc.core.lib import SaQCFunction from saqc.lib.types import FreqString, ColumnName, IntegerWindow -from saqc.flagger import ( - CategoricalFlagger, - SimpleFlagger, - DmpFlagger, -) +from saqc.flagger import Flagger, initFlagsLike TESTNODATA = (np.nan, -9999) - - -TESTFLAGGER = ( - CategoricalFlagger(["NIL", "GOOD", "BAD"]), - SimpleFlagger(), - DmpFlagger(), -) +TESTFLAGGER = (Flagger(),) def flagAll(data, field, flagger, **kwargs): # NOTE: remember to rename flag -> flag_values - return data, flagger.setFlags(field=field, flag=flagger.BAD) + flagger.copy() + flagger[:, field] = BAD + return data, flagger def initData(cols=2, start_date="2017-01-01", end_date="2017-12-31", freq=None, rows=None): @@ -125,10 +118,10 @@ def flaggers(draw, data): initialize a flagger and set some flags """ # flagger = draw(sampled_from(TESTFLAGGER)).initFlags(data) - flagger = draw(sampled_from([SimpleFlagger()])).initFlags(data) + flagger = initFlagsLike(data) for col, srs in data.items(): loc_st = lists(sampled_from(sorted(srs.index)), unique=True, max_size=len(srs)-1) - flagger = flagger.setFlags(field=col, loc=draw(loc_st)) + flagger[draw(loc_st), col] = BAD return flagger diff --git a/test/core/test_core.py b/test/core/test_core.py index d409eb6a8..5527f2ee2 100644 --- a/test/core/test_core.py +++ b/test/core/test_core.py @@ -8,10 +8,12 @@ import numpy as np import pandas as pd -from saqc import SaQC, register +from saqc.common import * +from saqc.flagger import Flagger, initFlagsLike from saqc.funcs import flagRange from saqc.lib import plotting as splot from test.common import initData, TESTFLAGGER, flagAll +from saqc import SaQC, register # no logging output needed here @@ -31,13 +33,12 @@ def data(): @pytest.fixture -def flags(flagger, data, optional): +def flags(data, optional): if not optional: - return flagger.initFlags(data[data.columns[::2]])._flags + return initFlagsLike(data[data.columns[::2]]).toDios() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_errorHandling(data, flagger): +def test_errorHandling(data): @register(masking='field') def raisingFunc(data, field, flagger, **kwargs): @@ -47,18 +48,17 @@ def test_errorHandling(data, flagger): for policy in ["ignore", "warn"]: # NOTE: should not fail, that's all we are testing here - SaQC(flagger, data, error_policy=policy).raisingFunc(var1).getResult() + SaQC(data, error_policy=policy).raisingFunc(var1).getResult() with pytest.raises(TypeError): - SaQC(flagger, data, error_policy='raise').raisingFunc(var1).getResult() + SaQC(data, error_policy='raise').raisingFunc(var1).getResult() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_duplicatedVariable(flagger): +def test_duplicatedVariable(): data = initData(1) var1 = data.columns[0] - pdata, pflags = SaQC(flagger, data).flagtools.flagDummy(var1).getResult() + pdata, pflags = SaQC(data).flagtools.flagDummy(var1).getResult() if isinstance(pflags.columns, pd.MultiIndex): cols = pflags.columns.get_level_values(0).drop_duplicates() @@ -67,8 +67,7 @@ def test_duplicatedVariable(flagger): assert (pflags.columns == [var1]).all() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_sourceTarget(flagger): +def test_sourceTarget(): """ test implicit assignments """ @@ -76,32 +75,29 @@ def test_sourceTarget(flagger): var1 = data.columns[0] target = "new" - pdata, pflagger = SaQC(flagger, data).flagAll(field=var1, target=target).getResult(raw=True) - pflags = pflagger.isFlagged() + pdata, pflagger = SaQC(data).flagAll(field=var1, target=target).getResult(raw=True) assert (pdata[var1] == pdata[target]).all(axis=None) - assert (pflags[var1] == False).all(axis=None) - assert (pflags[target] == True).all(axis=None) + assert all(pflagger[var1] == UNFLAGGED) + assert all(pflagger[target] > UNFLAGGED) -@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("optional", OPTIONAL) -def test_dtypes(data, flagger, flags): +def test_dtypes(data, flags): """ Test if the categorical dtype is preserved through the core functionality """ - flagger = flagger.initFlags(data) - flags = flagger.getFlags() + flagger = initFlagsLike(data) + flags = flagger.toDios() var1, var2 = data.columns[:2] - pdata, pflagger = SaQC(flagger, data, flags=flags).flagAll(var1).flagAll(var2).getResult(raw=True) + pdata, pflagger = SaQC(data, flags=flags).flagAll(var1).flagAll(var2).getResult(raw=True) - pflags = pflagger.getFlags() - assert dict(flags.dtypes) == dict(pflags.dtypes) + for c in pflagger.columns: + assert pflagger[c].dtype == flagger[c].dtype -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_plotting(data, flagger): +def test_plotting(data): """ Test if the plotting code runs, does not show any plot. @@ -110,9 +106,9 @@ def test_plotting(data, flagger): """ pytest.importorskip("matplotlib", reason="requires matplotlib") field, *_ = data.columns - flagger = flagger.initFlags(data) - _, flagger_range = flagRange(data, field, flagger, min=10, max=90, flag=flagger.BAD) - data_new, flagger_range = flagRange(data, field, flagger_range, min=40, max=60, flag=flagger.GOOD) + flagger = initFlagsLike(data) + _, flagger_range = flagRange(data, field, flagger, min=10, max=90, flag=BAD) + data_new, flagger_range = flagRange(data, field, flagger_range, min=40, max=60, flag=DOUBT) splot._interactive = False splot._plotSingleVariable(data, data_new, flagger, flagger_range, sources=[], targets=[data_new.columns[0]]) splot._plotMultipleVariables(data, data_new, flagger, flagger_range, targets=data_new.columns) diff --git a/test/core/test_core_new.py b/test/core/test_creation.py similarity index 100% rename from test/core/test_core_new.py rename to test/core/test_creation.py diff --git a/test/core/test_masking.py b/test/core/test_masking.py index 4d285eabf..48207621e 100644 --- a/test/core/test_masking.py +++ b/test/core/test_masking.py @@ -12,7 +12,8 @@ from hypothesis.strategies import ( sampled_from, ) -from saqc.core.core import _maskData, _unmaskData +from saqc.common import * +from saqc.core.register import _maskData, _unmaskData from test.common import dataFieldFlagger, MAX_EXAMPLES diff --git a/test/core/test_reader.py b/test/core/test_reader.py index ef5b1c841..ce8438eff 100644 --- a/test/core/test_reader.py +++ b/test/core/test_reader.py @@ -12,7 +12,6 @@ from saqc.core.config import Fields as F from test.common import initData, writeIO from saqc.core.core import SaQC -from saqc.flagger import SimpleFlagger from saqc.core.register import FUNC_MAP, register @@ -29,7 +28,7 @@ def test_packagedConfig(): data_path = path / "data.csv" data = pd.read_csv(data_path, index_col=0, parse_dates=True,) - saqc = SaQC(SimpleFlagger(), dios.DictOfSeries(data)).readConfig(config_path) + saqc = SaQC(dios.DictOfSeries(data)).readConfig(config_path) saqc.getResult() @@ -46,7 +45,7 @@ def test_variableRegex(data): for regex, expected in tests: fobj = writeIO(header + "\n" + f"{regex} ; flagtools.flagDummy()") - saqc = SaQC(SimpleFlagger(), data).readConfig(fobj) + saqc = SaQC(data).readConfig(fobj) expansion = saqc._expandFields(saqc._to_call[0][0], saqc._to_call[0][2], data.columns) result = [s.field for s, _ in expansion] assert np.all(result == expected) @@ -60,7 +59,7 @@ def test_inlineComments(data): {F.VARNAME} ; {F.TEST} ; {F.PLOT} pre2 ; flagtools.flagDummy() # test ; False # test """ - saqc = SaQC(SimpleFlagger(), data).readConfig(writeIO(config)) + saqc = SaQC(data).readConfig(writeIO(config)) _, control, func = saqc._to_call[0] assert control.plot is False assert func.func == FUNC_MAP["flagtools.flagDummy"].func @@ -78,7 +77,7 @@ def test_configReaderLineNumbers(data): SM1 ; flagtools.flagDummy() """ - saqc = SaQC(SimpleFlagger(), data).readConfig(writeIO(config)) + saqc = SaQC(data).readConfig(writeIO(config)) result = [c.lineno for _, c, _ in saqc._to_call] expected = [3, 4, 5, 9] assert result == expected @@ -100,7 +99,7 @@ def test_configFile(data): SM1;flagtools.flagDummy() """ - SaQC(SimpleFlagger(), data).readConfig(writeIO(config)) + SaQC(data).readConfig(writeIO(config)) def test_configChecks(data): @@ -122,7 +121,7 @@ def test_configChecks(data): for test, expected in tests: fobj = writeIO(header + "\n" + test) with pytest.raises(expected): - SaQC(SimpleFlagger(), data).readConfig(fobj).getResult() + SaQC(data).readConfig(fobj).getResult() def test_supportedArguments(data): @@ -151,4 +150,4 @@ def test_supportedArguments(data): for test in tests: fobj = writeIO(header + "\n" + test) - SaQC(SimpleFlagger(), data).readConfig(fobj) + SaQC(data).readConfig(fobj) diff --git a/test/flagger/test_dmpflagger.py b/test/flagger/test_dmpflagger.py index b1a9c1b73..677f54cbe 100644 --- a/test/flagger/test_dmpflagger.py +++ b/test/flagger/test_dmpflagger.py @@ -8,7 +8,9 @@ import pandas as pd import pytest from test.common import initData -from saqc.flagger import DmpFlagger + +DmpFlagger = NotImplemented +pytest.skip("DmpFlagger is deprecated.", allow_module_level=True) @pytest.fixture diff --git a/test/flagger/test_positionalflagger.py b/test/flagger/test_positionalflagger.py index 9875a7c74..45506a070 100644 --- a/test/flagger/test_positionalflagger.py +++ b/test/flagger/test_positionalflagger.py @@ -6,8 +6,9 @@ import pytest import numpy as np from test.common import initData -from saqc.flagger import PositionalFlagger +PositionalFlagger = NotImplemented +pytest.skip("PositionalFlagger is deprecated.", allow_module_level=True) @pytest.fixture def data(): diff --git a/test/funcs/conftest.py b/test/funcs/conftest.py index 1fd4685e6..abecdd3f2 100644 --- a/test/funcs/conftest.py +++ b/test/funcs/conftest.py @@ -16,7 +16,6 @@ def char_dict(): } - @pytest.fixture def course_1(char_dict): # MONOTONOUSLY ASCENDING/DESCENDING @@ -24,23 +23,22 @@ def course_1(char_dict): # the resulting drop/raise per value equals: (peak_level - initial_level) / (0.5*(periods-2)) # periods number better be even! def fix_funk( - freq="10min", - periods=10, - initial_level=0, - peak_level=10, - initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), - char_dict=char_dict, - name='data' + freq="10min", + periods=10, + initial_level=0, + peak_level=10, + initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), + char_dict=char_dict, + name='data' ): - t_index = pd.date_range(initial_index, freq=freq, periods=periods) left = np.linspace(initial_level, peak_level, int(np.floor(len(t_index) / 2))) right = np.linspace(peak_level, initial_level, int(np.ceil(len(t_index) / 2))) s = pd.Series(np.append(left, right), index=t_index) - char_dict["raise"] = s.index[1 : int(np.floor(len(t_index) / 2))] - char_dict["drop"] = s.index[int(np.floor(len(t_index) / 2) + 1) :] - char_dict["peak"] = s.index[int(np.floor(len(t_index) / 2)) - 1 : int(np.floor(len(t_index) / 2)) + 1] + char_dict["raise"] = s.index[1: int(np.floor(len(t_index) / 2))] + char_dict["drop"] = s.index[int(np.floor(len(t_index) / 2) + 1):] + char_dict["peak"] = s.index[int(np.floor(len(t_index) / 2)) - 1: int(np.floor(len(t_index) / 2)) + 1] data = DictOfSeries(data=s, columns=[name]) return data, char_dict @@ -55,13 +53,13 @@ def course_2(char_dict): # one "anomalous" or "outlierish" value of magnitude "out_val" at position "periods/2" # number of periods better be even! def fix_funk( - freq="10min", - periods=10, - initial_level=0, - final_level=2, - out_val=5, - initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), - char_dict=char_dict, + freq="10min", + periods=10, + initial_level=0, + final_level=2, + out_val=5, + initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), + char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) data = np.linspace(initial_level, final_level, int(np.floor(len(t_index)))) @@ -88,21 +86,18 @@ def course_test(char_dict): # Test function for pattern detection - same as test pattern for first three values, than constant function def fix_funk(freq='1 D', initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), out_val=5, char_dict=char_dict): - t_index = pd.date_range(initial_index, freq=freq, periods=100) data = pd.Series(data=0, index=t_index) data.iloc[2] = out_val data.iloc[3] = out_val - data = DictOfSeries(data=data, columns=['data']) return data, char_dict return fix_funk - @pytest.fixture def course_3(char_dict): # CROWD IN A PIT/CROWD ON A SUMMIT @@ -113,15 +108,15 @@ def course_3(char_dict): # number of periods better be even! # chrowd_size * crowd_spacing better be less then freq[minutes]. def fix_funk( - freq="10min", - periods=10, - initial_level=0, - final_level=2, - out_val=-5, - initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), - char_dict=char_dict, - crowd_size=5, - crowd_spacing=1, + freq="10min", + periods=10, + initial_level=0, + final_level=2, + out_val=-5, + initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), + char_dict=char_dict, + crowd_size=5, + crowd_spacing=1, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) @@ -158,19 +153,18 @@ def course_4(char_dict): # of periods better be even! def fix_funk( - freq="10min", - periods=10, - base_level=0, - out_val=5, - initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), - char_dict=char_dict, + freq="10min", + periods=10, + base_level=0, + out_val=5, + initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), + char_dict=char_dict, ): - t_index = pd.date_range(initial_index, freq=freq, periods=periods) data = pd.Series(data=base_level, index=t_index) - data[int(len(t_index) / 2) :: 2] = out_val - char_dict["raise"] = t_index[int(len(t_index) / 2) :: 2] - char_dict["return"] = t_index[int((len(t_index) / 2) + 1) :: 2] + data[int(len(t_index) / 2):: 2] = out_val + char_dict["raise"] = t_index[int(len(t_index) / 2):: 2] + char_dict["return"] = t_index[int((len(t_index) / 2) + 1):: 2] data = DictOfSeries(data=data, columns=["data"]) return data, char_dict @@ -187,13 +181,13 @@ def course_5(char_dict): # periods better be greater 5 def fix_funk( - freq="10min", - periods=10, - nan_slice=slice(0, None, 5), - initial_level=0, - final_level=10, - initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), - char_dict=char_dict, + freq="10min", + periods=10, + nan_slice=slice(0, None, 5), + initial_level=0, + final_level=10, + initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), + char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) values = np.linspace(initial_level, final_level, periods) @@ -205,5 +199,3 @@ def course_5(char_dict): return data, char_dict return fix_funk - - diff --git a/test/funcs/test_constants_detection.py b/test/funcs/test_constants_detection.py index 75dab02ae..b7cabb50e 100644 --- a/test/funcs/test_constants_detection.py +++ b/test/funcs/test_constants_detection.py @@ -6,7 +6,7 @@ import numpy as np from saqc.funcs.constants import flagConstants, flagByVariance -from test.common import TESTFLAGGER, initData +from test.common import initData, initFlagsLike, BAD @pytest.fixture @@ -16,23 +16,21 @@ def data(): return constants_data -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_constants_flagBasic(data, flagger): +def test_constants_flagBasic(data): expected = np.arange(5, 22) field, *_ = data.columns - flagger = flagger.initFlags(data) - data, flagger_result = flagConstants(data, field, flagger, window="15Min", thresh=0.1, ) - flags = flagger_result.getFlags(field) - assert np.all(flags[expected] == flagger.BAD) + flagger = initFlagsLike(data) + data, flagger_result = flagConstants(data, field, flagger, window="15Min", thresh=0.1, flag=BAD) + flags = flagger_result[field] + assert np.all(flags[expected] == BAD) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_constants_flagVarianceBased(data, flagger): +def test_constants_flagVarianceBased(data): expected = np.arange(5, 25) field, *_ = data.columns - flagger = flagger.initFlags(data) - data, flagger_result1 = flagByVariance(data, field, flagger, window="1h") + flagger = initFlagsLike(data) + data, flagger_result1 = flagByVariance(data, field, flagger, window="1h", flag=BAD) - flag_result1 = flagger_result1.getFlags(field) - test_sum = (flag_result1[expected] == flagger.BAD).sum() + flag_result1 = flagger_result1[field] + test_sum = (flag_result1[expected] == BAD).sum() assert test_sum == len(expected) diff --git a/test/funcs/test_functions.py b/test/funcs/test_functions.py index a47331cd0..2a466df14 100644 --- a/test/funcs/test_functions.py +++ b/test/funcs/test_functions.py @@ -6,6 +6,8 @@ import pandas as pd import numpy as np import dios +from saqc.common import * +from saqc.flagger import Flagger, initFlagsLike from saqc.funcs.drift import flagDriftFromNorm, flagDriftFromReference, flagDriftFromScaledNorm from saqc.funcs.outliers import flagCrossStatistic, flagRange from saqc.funcs.flagtools import flagManual, forceFlags, clearFlags @@ -15,8 +17,6 @@ from saqc.funcs.breaks import flagIsolated from test.common import initData, TESTFLAGGER - - @pytest.fixture def data(): return initData(cols=1, start_date="2016-01-01", end_date="2018-12-31", freq="1D") @@ -27,89 +27,88 @@ def field(data): return data.columns[0] -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagRange(data, field, flagger): +def test_flagRange(data, field): min, max = 10, 90 - flagger = flagger.initFlags(data) - data, flagger = flagRange(data, field, flagger, min=min, max=max) - flagged = flagger.isFlagged(field) + flagger = initFlagsLike(data) + data, flagger = flagRange(data, field, flagger, min=min, max=max, flag=BAD) + flagged = flagger[field] > UNFLAGGED expected = (data[field] < min) | (data[field] > max) - assert (flagged == expected).all() + assert all(flagged == expected) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagSesonalRange(data, field, flagger): +def test_flagSesonalRange(data, field): # prepare data.iloc[::2] = 0 data.iloc[1::2] = 50 nyears = len(data[field].index.year.unique()) tests = [ - ({"min": 1, "max": 100, "startmonth": 7, "startday": 1, "endmonth": 8, "endday": 31,}, 31 * 2 * nyears // 2,), - ({"min": 1, "max": 100, "startmonth": 12, "startday": 16, "endmonth": 1, "endday": 15,}, 31 * nyears // 2 + 1,), + ({"min": 1, "max": 100, "startmonth": 7, "startday": 1, "endmonth": 8, "endday": 31, }, 31 * 2 * nyears // 2,), + ( + {"min": 1, "max": 100, "startmonth": 12, "startday": 16, "endmonth": 1, "endday": 15, }, 31 * nyears // 2 + 1,), ] for test, expected in tests: - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) newfield = f"{field}_masked" start = f"{test['startmonth']:02}-{test['startday']:02}T00:00:00" end = f"{test['endmonth']:02}-{test['endday']:02}T00:00:00" data, flagger = copy(data, field, flagger, field + "_masked") - data, flagger = mask(data, newfield, flagger, mode='periodic', period_start=start, period_end=end, - include_bounds=True) - data, flagger = flagRange(data, newfield, flagger, min=test['min'], max=test['max']) - data, flagger = reindexFlags(data, field, flagger, method='match', source=newfield) + data, flagger = mask( + data, newfield, flagger, + mode='periodic', period_start=start, period_end=end, include_bounds=True, flag=BAD + ) + data, flagger = flagRange(data, newfield, flagger, min=test['min'], max=test['max'], flag=BAD) + data, flagger = reindexFlags(data, field, flagger, method='match', source=newfield, flag=BAD) data, flagger = drop(data, newfield, flagger) - flagged = flagger.isFlagged(field) + flagged = flagger[field] > UNFLAGGED assert flagged.sum() == expected -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_clearFlags(data, field, flagger): - flagger = flagger.initFlags(data) - flags_orig = flagger.getFlags() - flags_set = flagger.setFlags(field, flag=flagger.BAD).getFlags() +def test_clearFlags(data, field): + flagger = initFlagsLike(data) + flagger[:, field] = BAD + assert all(flagger[field] == BAD) + _, flagger = clearFlags(data, field, flagger) - flags_cleared = flagger.getFlags() - assert (flags_orig != flags_set).all(None) - assert (flags_orig == flags_cleared).all(None) + assert all(flagger[field] == UNFLAGGED) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_forceFlags(data, flagger): - flagger = flagger.initFlags(data) - field, *_ = data.columns - flags_orig = flagger.setFlags(field).getFlags(field) - _, flagger = forceFlags(data, field, flagger, flag=flagger.GOOD) - flags_forced = flagger.getFlags(field) - assert np.all(flags_orig != flags_forced) +def test_forceFlags(data, field): + flagger = initFlagsLike(data) + flagger[:, field] = BAD + assert all(flagger[field] == BAD) + _, flagger = forceFlags(data, field, flagger, flag=DOUBT) + assert all(flagger[field] == DOUBT) + + +# todo: @luenensc: i dont get the test -- palmb +def test_flagIsolated(data, field): + flagger = initFlagsLike(data) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagIsolated(data, flagger): - field = data.columns[0] data.iloc[1:3, 0] = np.nan data.iloc[4:5, 0] = np.nan data.iloc[11:13, 0] = np.nan data.iloc[15:17, 0] = np.nan - flagger = flagger.initFlags(data) + s = data[field].iloc[5:6] - flagger = flagger.setFlags(field, loc=s) + flagger[s.index, field] = BAD - _, flagger_result = flagIsolated(data, field, flagger, group_window="1D", gap_window="2.1D") + _, flagger_result = flagIsolated(data, field, flagger, group_window="1D", gap_window="2.1D", flag=BAD) - assert flagger_result.isFlagged(field)[slice(3, 6, 2)].all() + assert flagger_result[field][slice(3, 6, 2)].all() data, flagger_result = flagIsolated( - data, field, flagger_result, group_window="2D", gap_window="2.1D", continuation_range="1.1D", + data, field, flagger_result, + group_window="2D", gap_window="2.1D", continuation_range="1.1D", flag=BAD ) - assert flagger_result.isFlagged(field)[[3, 5, 13, 14]].all() + assert flagger_result[field][[3, 5, 13, 14]].all() -@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")]) -def test_flagCrossScoring(dat, flagger): +def test_flagCrossScoring(dat): data1, characteristics = dat(initial_level=0, final_level=0, out_val=0) data2, characteristics = dat(initial_level=0, final_level=0, out_val=10) field = "dummy" @@ -118,17 +117,15 @@ def test_flagCrossScoring(dat, flagger): s1 = pd.Series(data=s1.values, index=s1.index) s2 = pd.Series(data=s2.values, index=s1.index) data = dios.DictOfSeries([s1, s2], columns=["data1", "data2"]) - flagger = flagger.initFlags(data) - _, flagger_result = flagCrossStatistic(data, field, flagger, fields=fields, thresh=3, cross_stat=np.mean) + flagger = initFlagsLike(data) + _, flagger_result = flagCrossStatistic(data, field, flagger, fields=fields, thresh=3, cross_stat=np.mean, flag=BAD) for field in fields: - isflagged = flagger_result.isFlagged(field) + isflagged = flagger_result[field] > UNFLAGGED assert isflagged[characteristics["raise"]].all() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagManual(data, flagger): - field = data.columns[0] - flagger = flagger.initFlags(data) +def test_flagManual(data, field): + flagger = initFlagsLike(data) args = data, field, flagger dat = data[field] @@ -139,20 +136,20 @@ def test_flagManual(data, flagger): shrinked = mdata.loc[index_exp.union(mdata.iloc[[1, 2, 3, 4, 600, 601]].index)] kwargs_list = [ - dict(mdata=mdata, mflag="a", method="plain"), - dict(mdata=mdata.to_list(), mflag="a", method="plain"), - dict(mdata=mdata, mflag="a", method="ontime"), - dict(mdata=shrinked, mflag="a", method="ontime"), + dict(mdata=mdata, mflag="a", method="plain", flag=BAD), + dict(mdata=mdata.to_list(), mflag="a", method="plain", flag=BAD), + dict(mdata=mdata, mflag="a", method="ontime", flag=BAD), + dict(mdata=shrinked, mflag="a", method="ontime", flag=BAD), ] for kw in kwargs_list: _, fl = flagManual(*args, **kw) - isflagged = fl.isFlagged(field) + isflagged = fl[field] > UNFLAGGED assert isflagged[isflagged].index.equals(index_exp) # flag not exist in mdata - _, fl = flagManual(*args, mdata=mdata, mflag="i do not exist", method="ontime") - isflagged = fl.isFlagged(field) + _, fl = flagManual(*args, mdata=mdata, mflag="i do not exist", method="ontime", flag=BAD) + isflagged = fl[field] > UNFLAGGED assert isflagged[isflagged].index.equals(pd.DatetimeIndex([])) # check right-open / ffill @@ -179,9 +176,10 @@ def test_flagManual(data, flagger): expected.loc[dat.index[-1]] = 1 expected = expected.astype(bool) - _, fl = flagManual(*args, mdata=mdata, mflag=1, method="right-open") - isflagged = fl.isFlagged(field) + _, fl = flagManual(*args, mdata=mdata, mflag=1, method="right-open", flag=BAD) + isflagged = fl[field] > UNFLAGGED last = expected.index[0] + for curr in expected.index[1:]: expected_value = mdata[last] # datetime slicing is inclusive ! @@ -194,10 +192,11 @@ def test_flagManual(data, flagger): # check left-open / bfill expected.loc[dat.index[-1]] = 0 # this time the last is False - _, fl = flagManual(*args, mdata=mdata, mflag=1, method="left-open") - isflagged = fl.isFlagged(field) + _, fl = flagManual(*args, mdata=mdata, mflag=1, method="left-open", flag=BAD) + isflagged = fl[field] > UNFLAGGED last = expected.index[0] assert isflagged[last] == expected[last] + for curr in expected.index[1:]: expected_value = mdata[curr] # datetime slicing is inclusive ! @@ -206,24 +205,40 @@ def test_flagManual(data, flagger): assert (chunk == expected_value).all() last = curr -@pytest.mark.parametrize("flagger", TESTFLAGGER) + @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_1")]) -def test_flagDriftFromNormal(dat, flagger): +def test_flagDriftFromNormal(dat): data = dat(periods=200, peak_level=5, name='d1')[0] data['d2'] = dat(periods=200, peak_level=10, name='d2')[0]['d2'] data['d3'] = dat(periods=200, peak_level=100, name='d3')[0]['d3'] data['d4'] = 3 + 4 * data['d1'] data['d5'] = 3 + 4 * data['d1'] - flagger = flagger.initFlags(data) - data_norm, flagger_norm = flagDriftFromNorm(data, 'dummy', flagger, ['d1', 'd2', 'd3'], segment_freq="200min", - norm_spread=5) + flagger = initFlagsLike(data) + data_norm, flagger_norm = flagDriftFromNorm( + data, 'dummy', flagger, + ['d1', 'd2', 'd3'], + segment_freq="200min", + norm_spread=5, + flag=BAD, + ) - data_ref, flagger_ref = flagDriftFromReference(data, 'd1', flagger, ['d1', 'd2', 'd3'], segment_freq="3D", - thresh=20) + data_ref, flagger_ref = flagDriftFromReference( + data, 'd1', flagger, + ['d1', 'd2', 'd3'], + segment_freq="3D", + thresh=20, + flag=BAD, + ) - data_scale, flagger_scale = flagDriftFromScaledNorm(data, 'dummy', flagger, ['d1', 'd3'], ['d4', 'd5'], segment_freq="3D", - thresh=20, norm_spread=5) - assert flagger_norm.isFlagged()['d3'].all() - assert flagger_ref.isFlagged()['d3'].all() - assert flagger_scale.isFlagged()['d3'].all() + data_scale, flagger_scale = flagDriftFromScaledNorm( + data, 'dummy', flagger, + ['d1', 'd3'], ['d4', 'd5'], + segment_freq="3D", + thresh=20, + norm_spread=5, + flag=BAD, + ) + assert all(flagger_norm['d3'] > UNFLAGGED) + assert all(flagger_ref['d3'] > UNFLAGGED) + assert all(flagger_scale['d3'] > UNFLAGGED) diff --git a/test/funcs/test_generic_api_functions.py b/test/funcs/test_generic_api_functions.py index 8d200034f..950dbfd7f 100644 --- a/test/funcs/test_generic_api_functions.py +++ b/test/funcs/test_generic_api_functions.py @@ -6,16 +6,12 @@ import pytest import numpy as np import pandas as pd -from dios import DictOfSeries - -from test.common import TESTFLAGGER, TESTNODATA, initData, writeIO, flagAll -from saqc.core.visitor import ConfigFunctionParser -from saqc.core.config import Fields as F +from saqc.common import * from saqc.core.register import register -from saqc import SaQC, SimpleFlagger -from saqc.funcs.generic import _execGeneric +from saqc import SaQC from saqc.funcs.tools import mask +from test.common import initData, flagAll register(masking='field')(flagAll) @@ -25,38 +21,34 @@ def data(): return initData() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_addFieldFlagGeneric(data, flagger): - saqc = SaQC(data=data, flagger=flagger) +def test_addFieldFlagGeneric(data): + saqc = SaQC(data=data) - data, flags = saqc.generic.flag( - "tmp1", - func=lambda var1: pd.Series(False, index=data[var1.name].index) - ).getResult() - assert "tmp1" in flags.columns and "tmp1" not in data + func = lambda var1: pd.Series(False, index=data[var1.name].index) + data, flagger = saqc.generic.flag("tmp1", func, flag=BAD).getResult() + assert "tmp1" in flagger.columns and "tmp1" not in data -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_addFieldProcGeneric(data, flagger): - saqc = SaQC(data=data, flagger=flagger) +def test_addFieldProcGeneric(data): + saqc = SaQC(data=data) - data, flagger = saqc.generic.process("tmp1", func=lambda: pd.Series([])).getResult(raw=True) + func = lambda: pd.Series([]) + data, flagger = saqc.generic.process("tmp1", func, flag=BAD ).getResult(raw=True) assert "tmp1" in data.columns and data["tmp1"].empty - data, flagger = saqc.generic.process("tmp2", func=lambda var1, var2: var1 + var2).getResult() + func = lambda var1, var2: var1 + var2 + data, flagger = saqc.generic.process("tmp2", func, flag=BAD).getResult() assert "tmp2" in data.columns and (data["tmp2"] == data["var1"] + data["var2"]).all(axis=None) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_mask(data, flagger): - - saqc = SaQC(data=data, flagger=flagger) +def test_mask(data): + saqc = SaQC(data=data) data_org = data.copy(deep=True) mean = data["var1"] / 2 - data, _ = saqc.generic.process("var1", lambda var1: mask(var1 < mean)).getResult() + data, _ = saqc.generic.process("var1", lambda var1: mask(var1 < mean), flag=BAD).getResult() assert ((data["var1"].isna()) == (data_org["var1"] < 10) & data_org["var1"].isna()).all(axis=None) - data, flags = saqc.generic.process("tmp", lambda var1: mask(var1 < mean)).getResult() - assert ("tmp" in data.columns) and ("tmp" in flags.columns) + data, flagger = saqc.generic.process("tmp", lambda var1: mask(var1 < mean), flag=BAD).getResult() + assert ("tmp" in data.columns) and ("tmp" in flagger.columns) assert ((data["tmp"].isna()) == (data_org["var1"] < 10) & data_org["var1"].isna()).all(axis=None) diff --git a/test/funcs/test_generic_config_functions.py b/test/funcs/test_generic_config_functions.py index 2a1e8a14c..7677c3c27 100644 --- a/test/funcs/test_generic_config_functions.py +++ b/test/funcs/test_generic_config_functions.py @@ -10,10 +10,13 @@ import pandas as pd from dios import DictOfSeries from test.common import TESTFLAGGER, TESTNODATA, initData, writeIO + +from saqc.common import * +from saqc.flagger import Flagger, initFlagsLike from saqc.core.visitor import ConfigFunctionParser from saqc.core.config import Fields as F from saqc.core.register import register -from saqc import SaQC, SimpleFlagger +from saqc import SaQC from saqc.funcs.generic import _execGeneric @@ -68,13 +71,12 @@ def test_syntaxError(flagger): _compileGeneric(f"flag(func={test})", flagger) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_typeError(flagger): - +def test_typeError(): """ test that forbidden constructs actually throw an error TODO: find a few more cases or get rid of the test """ + flagger = Flagger() # : think about cases that should be forbidden tests = ("lambda x: x * 2",) @@ -84,9 +86,8 @@ def test_typeError(flagger): _compileGeneric(f"generic.flag(func={test})", flagger) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_comparisonOperators(data, flagger): - flagger = flagger.initFlags(data) +def test_comparisonOperators(data): + flagger = initFlagsLike(data) var1, var2, *_ = data.columns this = var1 @@ -107,7 +108,7 @@ def test_comparisonOperators(data, flagger): @pytest.mark.parametrize("flagger", TESTFLAGGER) def test_arithmeticOperators(data, flagger): - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) var1, *_ = data.columns this = data[var1] @@ -128,7 +129,7 @@ def test_arithmeticOperators(data, flagger): @pytest.mark.parametrize("flagger", TESTFLAGGER) def test_nonReduncingBuiltins(data, flagger): - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) var1, *_ = data.columns this = var1 mean = data[var1].mean() @@ -151,7 +152,7 @@ def test_nonReduncingBuiltins(data, flagger): def test_reduncingBuiltins(data, flagger, nodata): data.loc[::4] = nodata - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) var1 = data.columns[0] this = data.iloc[:, 0] @@ -195,7 +196,7 @@ def test_bitOps(data, flagger, nodata): var1, var2, *_ = data.columns this = var1 - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) tests = [ ("~(this > mean(this))", ~(data[this] > np.nanmean(data[this]))), @@ -209,19 +210,18 @@ def test_bitOps(data, flagger, nodata): assert np.all(result == expected) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_isflagged(data, flagger): +def test_isflagged(data): var1, var2, *_ = data.columns - - flagger = flagger.initFlags(data).setFlags(var1, loc=data[var1].index[::2], flag=flagger.BAD) + flagger = initFlagsLike(data) + flagger[data[var1].index[::2], var1] = BAD tests = [ - (f"isflagged({var1})", flagger.isFlagged(var1)), - (f"isflagged({var1}, flag=BAD)", flagger.isFlagged(var1, flag=flagger.BAD, comparator=">=")), - (f"isflagged({var1}, UNFLAGGED, '==')", flagger.isFlagged(var1, flag=flagger.UNFLAGGED, comparator="==")), - (f"~isflagged({var2})", ~flagger.isFlagged(var2)), - (f"~({var2}>999) & (~isflagged({var2}))", ~(data[var2] > 999) & (~flagger.isFlagged(var2))), + (f"isflagged({var1})", flagger[var1] > UNFLAGGED), + (f"isflagged({var1}, flag=BAD)", flagger[var1] >= BAD), + (f"isflagged({var1}, UNFLAGGED, '==')", flagger[var1] == UNFLAGGED), + (f"~isflagged({var2})", ~(flagger[var2] > UNFLAGGED)), + (f"~({var2}>999) & (~isflagged({var2}))", ~(data[var2] > 999) & ~(flagger[var2] > UNFLAGGED)), ] for test, expected in tests: @@ -230,8 +230,7 @@ def test_isflagged(data, flagger): assert np.all(result == expected) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_variableAssignments(data, flagger): +def test_variableAssignments(data): var1, var2, *_ = data.columns config = f""" @@ -241,18 +240,17 @@ def test_variableAssignments(data, flagger): """ fobj = writeIO(config) - saqc = SaQC(flagger, data).readConfig(fobj) + saqc = SaQC(data).readConfig(fobj) result_data, result_flagger = saqc.getResult(raw=True) assert set(result_data.columns) == set(data.columns) | { "dummy1", } - assert set(result_flagger.getFlags().columns) == set(data.columns) | {"dummy1", "dummy2"} + assert set(result_flagger.columns) == set(data.columns) | {"dummy1", "dummy2"} @pytest.mark.xfail(strict=True) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_processMultiple(data_diff, flagger): +def test_processMultiple(data_diff): var1, var2, *_ = data_diff.columns config = f""" @@ -262,9 +260,9 @@ def test_processMultiple(data_diff, flagger): """ fobj = writeIO(config) - saqc = SaQC(flagger, data_diff).readConfig(fobj) + saqc = SaQC(data_diff).readConfig(fobj) result_data, result_flagger = saqc.getResult() - assert len(result_data["dummy"]) == len(result_flagger.getFlags("dummy")) + assert len(result_data["dummy"]) == len(result_flagger["dummy"]) def test_callableArgumentsUnary(data): @@ -274,9 +272,8 @@ def test_callableArgumentsUnary(data): @register(masking='field') def testFuncUnary(data, field, flagger, func, **kwargs): data[field] = data[field].rolling(window=window).apply(func) - return data, flagger.initFlags(data=data) + return data, initFlagsLike(data) - flagger = SimpleFlagger() var = data.columns[0] config = f""" @@ -291,22 +288,20 @@ def test_callableArgumentsUnary(data): for (name, func) in tests: fobj = writeIO(config.format(name)) - result_config, _ = SaQC(flagger, data).readConfig(fobj).getResult() - result_api, _ = SaQC(flagger, data).testFuncUnary(var, func=func).getResult() + result_config, _ = SaQC(data).readConfig(fobj).getResult() + result_api, _ = SaQC(data).testFuncUnary(var, func=func).getResult() expected = data[var].rolling(window=window).apply(func) assert (result_config[var].dropna() == expected.dropna()).all(axis=None) assert (result_api[var].dropna() == expected.dropna()).all(axis=None) def test_callableArgumentsBinary(data): - - flagger = SimpleFlagger() var1, var2 = data.columns[:2] @register(masking='field') def testFuncBinary(data, field, flagger, func, **kwargs): data[field] = func(data[var1], data[var2]) - return data, flagger.initFlags(data=data) + return data, initFlagsLike(data) config = f""" {F.VARNAME} ; {F.TEST} @@ -320,8 +315,8 @@ def test_callableArgumentsBinary(data): for (name, func) in tests: fobj = writeIO(config.format(name)) - result_config, _ = SaQC(flagger, data).readConfig(fobj).getResult() - result_api, _ = SaQC(flagger, data).testFuncBinary(var1, func=func).getResult() + result_config, _ = SaQC(data).readConfig(fobj).getResult() + result_api, _ = SaQC(data).testFuncBinary(var1, func=func).getResult() expected = func(data[var1], data[var2]) assert (result_config[var1].dropna() == expected.dropna()).all(axis=None) assert (result_api[var1].dropna() == expected.dropna()).all(axis=None) diff --git a/test/funcs/test_pattern_rec.py b/test/funcs/test_pattern_rec.py index 75f3f4e4c..0763a82f0 100644 --- a/test/funcs/test_pattern_rec.py +++ b/test/funcs/test_pattern_rec.py @@ -7,8 +7,10 @@ import pandas as pd from dios import dios +from saqc.common import * +from saqc.flagger import Flagger, initFlagsLike from saqc.funcs.pattern import * -from test.common import initData, TESTFLAGGER +from test.common import initData @pytest.fixture @@ -21,33 +23,31 @@ def field(data): return data.columns[0] -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagPattern_wavelet(flagger): - +@pytest.mark.skip(reason='faulty implementation - will get fixed by GL-MR191') +def test_flagPattern_wavelet(): data = pd.Series(0, index=pd.date_range(start="2000", end='2001', freq='1d')) data.iloc[2:4] = 7 pattern = data.iloc[1:6] data = dios.DictOfSeries(dict(data=data, pattern_data=pattern)) + flagger = initFlagsLike(data, name='data') + data, flagger = flagPatternByDTW(data, "data", flagger, ref_field="pattern_data", flag=BAD) - flagger = flagger.initFlags(data) - data, flagger = flagPatternByDTW(data, "data", flagger, ref_field="pattern_data") - assert (flagger.isFlagged("data")[1:6]).all() - assert (flagger.isFlagged("data")[:1]).any() - assert (flagger.isFlagged("data")[7:]).any() - + assert all(flagger["data"][1:6]) + assert any(flagger["data"][:1]) + assert any(flagger["data"][7:]) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagPattern_dtw(flagger): +@pytest.mark.skip(reason='faulty implementation - will get fixed by GL-MR191') +def test_flagPattern_dtw(): data = pd.Series(0, index=pd.date_range(start="2000", end='2001', freq='1d')) data.iloc[2:4] = 7 pattern = data.iloc[1:6] data = dios.DictOfSeries(dict(data=data, pattern_data=pattern)) + flagger = initFlagsLike(data, name='data') + data, flagger = flagPatternByWavelet(data, "data", flagger, ref_field="pattern_data", flag=BAD) - flagger = flagger.initFlags(data) - data, flagger = flagPatternByWavelet(data, "data", flagger, ref_field="pattern_data") - assert (flagger.isFlagged("data")[1:6]).all() - assert (flagger.isFlagged("data")[:1]).any() - assert (flagger.isFlagged("data")[7:]).any() + assert all(flagger["data"][1:6]) + assert any(flagger["data"][:1]) + assert any(flagger["data"][7:]) diff --git a/test/funcs/test_spikes_detection.py b/test/funcs/test_spikes_detection.py index da8683479..be38370e3 100644 --- a/test/funcs/test_spikes_detection.py +++ b/test/funcs/test_spikes_detection.py @@ -16,6 +16,8 @@ from saqc.funcs.outliers import ( ) from test.common import TESTFLAGGER +from saqc.common import * +from saqc.flagger import Flagger, initFlagsLike @pytest.fixture(scope="module") @@ -28,30 +30,27 @@ def spiky_data(): return dios.DictOfSeries(s), flag_assertion -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagMad(spiky_data, flagger): +def test_flagMad(spiky_data): data = spiky_data[0] field, *_ = data.columns - flagger = flagger.initFlags(data) - data, flagger_result = flagMAD(data, field, flagger, "1H") - flag_result = flagger_result.getFlags(field) - test_sum = (flag_result[spiky_data[1]] == flagger.BAD).sum() + flagger = initFlagsLike(data) + data, flagger_result = flagMAD(data, field, flagger, "1H", flag=BAD) + flag_result = flagger_result[field] + test_sum = (flag_result[spiky_data[1]] == BAD).sum() assert test_sum == len(spiky_data[1]) -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_flagSpikesBasic(spiky_data, flagger): +def test_flagSpikesBasic(spiky_data): data = spiky_data[0] field, *_ = data.columns - flagger = flagger.initFlags(data) - data, flagger_result = flagOffset(data, field, flagger, thresh=60, tolerance=10, window="20min") - flag_result = flagger_result.getFlags(field) - test_sum = (flag_result[spiky_data[1]] == flagger.BAD).sum() + flagger = initFlagsLike(data) + data, flagger_result = flagOffset(data, field, flagger, thresh=60, tolerance=10, window="20min", flag=BAD) + flag_result = flagger_result[field] + test_sum = (flag_result[spiky_data[1]] == BAD).sum() assert test_sum == len(spiky_data[1]) # see test/functs/conftest.py for the 'course_N' -@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize( "dat", [ @@ -61,22 +60,22 @@ def test_flagSpikesBasic(spiky_data, flagger): pytest.lazy_fixture("course_4"), ], ) -def test_flagSpikesLimitRaise(dat, flagger): +def test_flagSpikesLimitRaise(dat): data, characteristics = dat() field, *_ = data.columns - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) _, flagger_result = flagRaise( - data, field, flagger, thresh=2, intended_freq="10min", raise_window="20min", numba_boost=False + data, field, flagger, + thresh=2, intended_freq="10min", raise_window="20min", numba_boost=False, flag=BAD ) - assert flagger_result.isFlagged(field)[characteristics["raise"]].all() - assert not flagger_result.isFlagged(field)[characteristics["return"]].any() - assert not flagger_result.isFlagged(field)[characteristics["drop"]].any() + assert np.all(flagger_result[field][characteristics["raise"]] > UNFLAGGED) + assert not np.any(flagger_result[field][characteristics["return"]] > UNFLAGGED) + assert not np.any(flagger_result[field][characteristics["drop"]] > UNFLAGGED) # see test/functs/conftest.py for the 'course_N' -@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_3")]) -def test_flagMultivarScores(dat, flagger): +def test_flagMultivarScores(dat): data1, characteristics = dat(periods=1000, initial_level=5, final_level=15, out_val=50) data2, characteristics = dat(periods=1000, initial_level=20, final_level=1, out_val=30) field = "dummy" @@ -85,24 +84,26 @@ def test_flagMultivarScores(dat, flagger): s1 = pd.Series(data=s1.values, index=s1.index) s2 = pd.Series(data=s2.values, index=s1.index) data = dios.DictOfSeries([s1, s2], columns=["data1", "data2"]) - flagger = flagger.initFlags(data) + flagger = initFlagsLike(data) _, flagger_result = flagMVScores( - data, field, flagger, fields=fields, trafo=np.log, iter_start=0.95, n_neighbors=10 + data, field, flagger, fields=fields, trafo=np.log, iter_start=0.95, n_neighbors=10, flag=BAD ) for field in fields: - isflagged = flagger_result.isFlagged(field) + isflagged = flagger_result[field] > UNFLAGGED assert isflagged[characteristics["raise"]].all() assert not isflagged[characteristics["return"]].any() assert not isflagged[characteristics["drop"]].any() -@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_3")]) -def test_grubbs(dat, flagger): +def test_grubbs(dat): data, char_dict = dat( - freq="10min", periods=45, initial_level=0, final_level=0, crowd_size=1, crowd_spacing=3, out_val=-10 + freq="10min", periods=45, + initial_level=0, final_level=0, + crowd_size=1, crowd_spacing=3, + out_val=-10, ) - flagger = flagger.initFlags(data) - data, result_flagger = flagByGrubbs(data, "data", flagger, winsz=20, min_periods=15) - assert result_flagger.isFlagged("data")[char_dict["drop"]].all() + flagger = initFlagsLike(data) + data, result_flagger = flagByGrubbs(data, "data", flagger, winsz=20, min_periods=15, flag=BAD) + assert np.all(result_flagger["data"][char_dict["drop"]] > UNFLAGGED) -- GitLab