From 42520f9688874ef35caebb1c1cc943ed9328db1d Mon Sep 17 00:00:00 2001 From: David Schaefer <david.schaefer@ufz.de> Date: Fri, 12 Apr 2019 12:15:33 +0200 Subject: [PATCH] improved the test situation (a bit) --- test/common.py | 14 ++++ test/test_core.py | 176 ++++++++++++++++++++++++----------------- test/test_evaluator.py | 6 +- test/test_generic.py | 30 +++++-- test/testfuncs.py | 14 ---- 5 files changed, 147 insertions(+), 93 deletions(-) create mode 100644 test/common.py delete mode 100644 test/testfuncs.py diff --git a/test/common.py b/test/common.py new file mode 100644 index 000000000..d4bd05aea --- /dev/null +++ b/test/common.py @@ -0,0 +1,14 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd + + +def initData(cols=2, start_date="2017-01-01", end_date="2017-12-31", freq="1h"): + dates = pd.date_range(start="2017-01-01", end="2017-12-31", freq="1h") + data = {} + dummy = np.arange(len(dates)) + for col in range(1, cols+1): + data[f"var{col}"] = dummy*(col) + return pd.DataFrame(data, index=dates) diff --git a/test/test_core.py b/test/test_core.py index 171143f71..397132eef 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -5,102 +5,136 @@ import pytest import numpy as np import pandas as pd -from core import runner, flagNext -from funcs.functions import flagGeneric, Params +from core import runner, flagNext, prepareMeta from config import Fields -from flagger import SimpleFlagger, DmpFlagger +from flagger import SimpleFlagger, DmpFlagger, PositionalFlagger from .testfuncs import initData -def initMeta(data): - dates = data.index - variables = data.columns - randg = np.random.randint - start_dates = [dates[randg(0, (len(dates)//2)-1)] for _ in variables] - end_dates = [dates[randg(len(dates)//2, len(dates) - 1 )] for _ in variables] - tests = ["generic, {func: abs(this) + 1 > 0}"] * len(variables) - return pd.DataFrame({Fields.VARNAME: data.columns, - Fields.STARTDATE: start_dates, - Fields.ENDDATE: end_dates, - Fields.FLAGS: tests}) +TESTFLAGGERS = [ + SimpleFlagger(), DmpFlagger(), # PositionalFlagger() +] -def test_temporalPartitioning(): +@pytest.mark.parametrize("flagger", TESTFLAGGERS) +def test_temporalPartitioning(flagger): + """ + Check if the time span in meta is respected + """ + data = initData(3) + var1, var2, var3, *_ = data.columns + split_date = data.index[len(data.index)//2] + tests = ["range, {min: -2, max: -1}", + "generic, {func: this <= sum(this)}", + "generic, {func: this <= sum(this)}"] + + meta = prepareMeta( + pd.DataFrame( + {Fields.VARNAME: [var1, var2, var3], + Fields.STARTDATE: [None, None, split_date], + Fields.ENDDATE: [None, split_date, None], + Fields.FLAGS: tests}), + data) - data = initData() - meta = initMeta(data) - flagger = SimpleFlagger() pdata, pflags = runner(meta, flagger, data) fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE] for _, row in meta.iterrows(): vname, start_date, end_date = row[fields] - fchunk = pflags[vname].dropna() + fchunk = pflags.loc[flagger.isFlagged(pflags[vname]), vname] assert fchunk.index.min() == start_date, "different start dates" assert fchunk.index.max() == end_date, "different end dates" -def test_flagNextFill(): - flagger = SimpleFlagger() - data = initData().iloc[:, 1] - flags = flagger.emptyFlags(data) - - idx = [0, 1, 2] - flags.iloc[idx] = flagger.setFlag(flags.iloc[idx]) - - n = 4 - fflags = flagNext(flagger, flags.copy(), 4) - result_idx = np.unique(np.where(pd.notnull(fflags))[0]) - expected_idx = np.arange(min(idx), max(idx) + n + 1) - assert (result_idx == expected_idx).all() - +@pytest.mark.parametrize("flagger", TESTFLAGGERS) +def test_missingConfig(flagger): + """ + Test if variables available in the dataset but not the config + are handled correctly, i.e. are ignored + """ + data = initData(2) + var1, var2, *_ = data.columns + meta = prepareMeta( + pd.DataFrame( + {Fields.VARNAME: [var1], + Fields.FLAGS: ["range, {min: -9999, max: 9999}"]}), + data) -def test_flagNextOverwrite(): - flagger = SimpleFlagger() - data = initData().iloc[:, 0] - flags = flagger.emptyFlags(data) + pdata, pflags = runner(meta, flagger, data) + assert var1 in pdata and var2 not in pflags + + +@pytest.mark.parametrize("flagger", TESTFLAGGERS) +def test_missingVariable(flagger): + """ + Test if variables available in the config but not dataset + are handled correctly, i.e. are ignored + """ + data = initData(1) + var, *_ = data.columns + meta = prepareMeta( + pd.DataFrame( + {Fields.VARNAME: [var, "empty"], + Fields.FLAGS: ["range, {min: -9999, max: 9999}", + "range, {min: -9999, max: 9999}"]}), + data) - flags.iloc[0::3] = flagger.setFlag(flags.iloc[0::3], 1) - flags.iloc[2::3] = flagger.setFlag(flags.iloc[2::3], 2) + pdata, pflags = runner(meta, flagger, data) + assert (data.columns == [var]).all() + + +@pytest.mark.parametrize("flagger", TESTFLAGGERS) +def test_assignVariable(flagger): + """ + Test the assign keyword, a variable present in the configuration, but not + dataset will be added to output flags + """ + data = initData(1) + var1, *_ = data.columns + var2 = "empty" + meta = prepareMeta( + pd.DataFrame( + {Fields.VARNAME: [var1, var2], + Fields.FLAGS: ["range, {min: -9999, max: 9999}", + f"generic, {{func: isflagged({var2}), assign: True}}"]}), + data) - fflags = flagNext(flagger, flags.copy().iloc[:], 4) - assert ((fflags.values[pd.isnull(flags)] == 1).all(axis=None)) + pdata, pflags = runner(meta, flagger, data) + if isinstance(pflags.columns, pd.MultiIndex): + cols = (pflags + .columns.get_level_values(0) + .drop_duplicates()) + assert (cols == [var1, var2]).all() + else: + assert (pflags.columns == [var1, var2]).all() -def test_flagNextMulticolumn(): - flagger = DmpFlagger() - data = initData().iloc[:, 0] - flags = flagger.emptyFlags(data) - var, *_ = flags.columns.get_level_values(0) - flags.loc[data.index[0::3], var] = flagger.setFlag( - flags.loc[data.index[0::3], var], "DOUBTFUL") +@pytest.mark.parametrize("flagger", TESTFLAGGERS) +def test_flagNext(flagger): + """ + Test if the flagNext functionality works as expected + """ + data = initData().iloc[:, 1] + flags = flagger.initFlags(data) - flags.loc[data.index[2::3], var] = flagger.setFlag( - flags.loc[data.index[2::3], var], "BAD") + idx = [0, 1, 2] + flags.iloc[idx] = flagger.setFlag(flags.iloc[idx]) + n = 4 fflags = flagNext(flagger, flags.copy(), 4) - assert ((fflags.values[pd.isnull(flags)] == 1).all(axis=None)) - - -def test_flagGenericFailure(): - flagger = SimpleFlagger() - data = initData() - flags = flagger.emptyFlags(data) - var1, var2, *_ = data.columns - - # expression does not return a result of identical shape - with pytest.raises(TypeError): - flagGeneric(data, flags, var2, flagger, **{Params.FUNC: f"sum({var1})"}) - - # expression does not return a boolean result - with pytest.raises(TypeError): - flagGeneric(data, flags, var2, flagger, **{Params.FUNC: f"{var1}"}) + result_idx = np.unique(np.where(flagger.isFlagged(fflags))[0]) + expected_idx = np.arange(min(idx), max(idx) + n + 1) + assert (result_idx == expected_idx).all() if __name__ == "__main__": - test_temporalPartitioning() - test_flagNextFill() - test_flagNextOverwrite() - test_flagNextMulticolumn() - test_flagGenericFailure() + + # NOTE: PositionalFlagger is currently broken, going to fix it when needed + # for flagger in [SimpleFlagger, PositionalFlagger, DmpFlagger]: + for flagger in [SimpleFlagger(), DmpFlagger()]: + test_temporalPartitioning(flagger) + test_flagNext(flagger) + test_missingConfig(flagger) + test_missingVariable(flagger) + test_assignVariable(flagger) diff --git a/test/test_evaluator.py b/test/test_evaluator.py index 80d3608fa..da8ac5550 100644 --- a/test/test_evaluator.py +++ b/test/test_evaluator.py @@ -12,7 +12,7 @@ from dsl import evalExpression def test_evaluationBool(): data = initData() flagger = SimpleFlagger() - flags = flagger.emptyFlags(data, 0) + flags = flagger.initFlags(data, 0) var1, var2, *_ = data.columns tests = [ @@ -37,7 +37,7 @@ def test_evaluationBool(): def test_missingIdentifier(): data = initData() flagger = SimpleFlagger() - flags = flagger.emptyFlags(data) + flags = flagger.initFlags(data) tests = ["func(var2) < 5", "var3 != NODATA"] for test in tests: with pytest.raises(NameError): @@ -47,7 +47,7 @@ def test_missingIdentifier(): def test_flagPropagation(): data = initData() flagger = SimpleFlagger() - flags = flagger.emptyFlags(data, 0) + flags = flagger.initFlags(data, 0) flags.iloc[::5] = flagger.setFlag(flags.iloc[::5]) var1, var2, *_ = data.columns diff --git a/test/test_generic.py b/test/test_generic.py index dce728523..fd95ca937 100644 --- a/test/test_generic.py +++ b/test/test_generic.py @@ -3,11 +3,13 @@ import numpy as np import pandas as pd +import pytest from .testfuncs import initData from dsl import evalExpression from flagger import SimpleFlagger +from funcs.functions import flagGeneric, Params def test_ismissing(): @@ -19,7 +21,7 @@ def test_ismissing(): data.iloc[(len(data)//2)+1:, 0] = nodata flagger = SimpleFlagger() - flags = flagger.emptyFlags(data) + flags = flagger.initFlags(data) var1, var2, *_ = data.columns @@ -37,7 +39,7 @@ def test_isflagged(): flagger = SimpleFlagger() data = initData() - flags = flagger.emptyFlags(data, 0) + flags = flagger.initFlags(data, 0) var1, var2, *_ = data.columns flags.iloc[::2, 0] = flagger.setFlag(flags.iloc[::2, 0]) @@ -51,11 +53,11 @@ def test_isflagged(): assert (flagged == idx).all -def test_isflaggedNonstandard(): +def test_isflaggedArgument(): flagger = SimpleFlagger() data = initData() - flags = flagger.emptyFlags(data, 0) + flags = flagger.initFlags(data, 0) var1, var2, *_ = data.columns flags.iloc[::2, 0] = flagger.setFlag(flags.iloc[::2, 0], -9) @@ -69,7 +71,25 @@ def test_isflaggedNonstandard(): assert (flagged == idx).all +def test_flagFailure(): + flagger = SimpleFlagger() + data = initData() + flags = flagger.initFlags(data) + var1, var2, *_ = data.columns + + # expression does not return a result of identical shape + with pytest.raises(TypeError): + flagGeneric(data, flags, var2, flagger, + **{Params.FUNC: f"sum({var1})"}) + + # need a test for missing variables + with pytest.raises(NameError): + flagGeneric(data, flags, var2, flagger, + **{Params.FUNC: f"sum({var1 + 'x'})"}) + + if __name__ == "__main__": test_ismissing() test_isflagged() - test_isflaggedNonstandard() + test_isflaggedArgument() + test_flagFailure() diff --git a/test/testfuncs.py b/test/testfuncs.py deleted file mode 100644 index 3e60913bf..000000000 --- a/test/testfuncs.py +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -import numpy as np -import pandas as pd - - -def initData(start_date="2017-01-01", end_date="2017-12-31", freq="1h"): - dates = pd.date_range(start="2017-01-01", end="2017-12-31", freq="1h") - data = pd.DataFrame( - data={"var1": np.arange(len(dates)), - "var2": np.arange(len(dates), len(dates)*2)}, - index=dates) - return data -- GitLab