From b08789b9ec22e3caf2d6104f91e6248b9e622db1 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Tue, 15 Aug 2023 17:20:46 +0200 Subject: [PATCH] use string config instead of writeIO magic --- docs/documentation/GenericFunctions.rst | 66 +++------ tests/common.py | 7 - tests/core/test_reader.py | 54 ++++--- tests/funcs/test_generic_config_functions.py | 140 +++++++++---------- 4 files changed, 112 insertions(+), 155 deletions(-) diff --git a/docs/documentation/GenericFunctions.rst b/docs/documentation/GenericFunctions.rst index 74e7b2e8c..67f008dcc 100644 --- a/docs/documentation/GenericFunctions.rst +++ b/docs/documentation/GenericFunctions.rst @@ -51,8 +51,7 @@ dummy dataset, to lead us through the following code snippets: .. testsetup:: python - from saqc import fromConfig - from tests.common import writeIO + from saqc.parsing.reader import _ConfigReader as ConfigReader .. testcode:: python @@ -116,16 +115,13 @@ Simple constraints .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;------------------------ ... x ; flagGeneric(func=x < 30) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc1.flags #doctest:+NORMALIZE_WHITESPACE True @@ -177,16 +173,13 @@ Cross variable constraints .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;------------------------------------ ... x ; flagGeneric(field="y", func=y > 30) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc2.flags #doctest:+NORMALIZE_WHITESPACE True @@ -241,16 +234,13 @@ need to be put in parentheses. .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;-------------------------------------------------------- ... x ; flagGeneric(field=["y", "z"], func=(y > 30) & (z < 50)) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc3.flags #doctest:+NORMALIZE_WHITESPACE True @@ -293,16 +283,13 @@ Arithmetics .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;------------------------------------------------------- ... x ; flagGeneric(field=["x", "y", "z"], func=x > (y + z)/2) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc4.flags #doctest:+NORMALIZE_WHITESPACE True @@ -351,16 +338,13 @@ Special functions .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;--------------------------------------------------- ... x ; flagGeneric(field=["x", "z"], func=x > std(z) * 2) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc5.flags #doctest:+NORMALIZE_WHITESPACE True @@ -402,17 +386,14 @@ Special functions .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;------------------------------------------ ... y ; flagRange(min=10, max=60) ... x ; flagGeneric(field="y", func=isflagged(y)) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc6.flags #doctest:+NORMALIZE_WHITESPACE True @@ -481,16 +462,13 @@ Let's consider the following dataset: .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;--------------------------------------------------------------- ... meas ; flagGeneric(field=["fan", "volt"], func=(x == 0) | (y < 12.0)) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc7.flags #doctest:+NORMALIZE_WHITESPACE True @@ -533,8 +511,7 @@ But we could also quality check our independent variables first and than leverag .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;-------------------------------------------------------------------------- @@ -543,9 +520,7 @@ But we could also quality check our independent variables first and than leverag ... volt ; flagGeneric(func=volt < 12.0) ... meas ; flagGeneric(field=["fan", "volt"], func=isflagged(fan) | isflagged(volt)) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.flags == qc8.flags #doctest:+NORMALIZE_WHITESPACE True @@ -634,16 +609,13 @@ variables in a given dataset. We start with dummy data again: .. doctest:: python :hide: - >>> tmp = fromConfig( - ... writeIO( + >>> tmp = ConfigReader(data).readString( ... """ ... varname ; test ... #-------;------------------------------------------------------ ... mean ; processGeneric(field=["x", "y", "z"], func=(x+y+z)/2) ... """ - ... ), - ... data - ... ) + ... ).run() >>> tmp.data == qc1.data #doctest:+NORMALIZE_WHITESPACE True diff --git a/tests/common.py b/tests/common.py index cdf418c23..fd78476eb 100644 --- a/tests/common.py +++ b/tests/common.py @@ -42,13 +42,6 @@ def dummyHistory(hist: pd.DataFrame = None, meta: list = None): return createHistoryFromData(hist, meta, copy=True) -def writeIO(content): - f = io.StringIO() - f.write(content) - f.seek(0) - return f - - def checkInvariants(data, flags, field, identical=True): """ Check all invariants that must hold at any point for diff --git a/tests/core/test_reader.py b/tests/core/test_reader.py index 9234d0e48..599811d90 100644 --- a/tests/core/test_reader.py +++ b/tests/core/test_reader.py @@ -12,8 +12,8 @@ import pytest from saqc.core import DictOfSeries, Flags, SaQC, flagging from saqc.exceptions import ParsingError from saqc.parsing.environ import ENVIRONMENT -from saqc.parsing.reader import fromConfig, readFile -from tests.common import initData, writeIO +from saqc.parsing.reader import _ConfigReader +from tests.common import initData @pytest.fixture @@ -41,8 +41,9 @@ def test_variableRegex(data): ] for regex, expected in tests: - fobj = writeIO(header + "\n" + f"{regex} ; {function}()") - saqc = fromConfig(fobj, data=data) + cr = _ConfigReader(data) + cr.readString(header + "\n" + f"{regex} ; {function}()") + saqc = cr.run() result = getTestedVariables(saqc._flags, function) assert np.all(result == expected) @@ -50,9 +51,10 @@ def test_variableRegex(data): ("var[12]", []), # not quoted -> not a regex ] for regex, expected in tests: - fobj = writeIO(header + "\n" + f"{regex} ; {function}()") + cr = _ConfigReader(data=data) + cr.readString(header + "\n" + f"{regex} ; {function}()") with pytest.warns(RuntimeWarning): - saqc = fromConfig(fobj, data=data) + saqc = cr.run() result = getTestedVariables(saqc._flags, function) assert np.all(result == expected) @@ -67,7 +69,7 @@ def test_inlineComments(data): var1 ; flagDummy() # test """ - saqc = fromConfig(writeIO(config), data) + saqc = _ConfigReader(data).readString(config).run() func = saqc._flags.history["var1"].meta[0]["func"] assert func == "flagDummy" @@ -84,9 +86,9 @@ def test_configReaderLineNumbers(): SM1 ; flagDummy() """ - planned = readFile(writeIO(config)) + planned = _ConfigReader().readString(config) expected = [4, 5, 6, 10] - assert (planned.index == expected).all() + assert (planned.config.index == expected).all() @pytest.mark.filterwarnings("ignore::RuntimeWarning") @@ -105,7 +107,8 @@ def test_configFile(data): SM1;flagDummy() """ - fromConfig(writeIO(config), data) + c = _ConfigReader().readString(config).config + assert len(c) == 4 @pytest.mark.parametrize( @@ -124,12 +127,15 @@ def test_configChecks(data, test, expected): return data, flags header = f"varname;test" - fobj = writeIO(header + "\n" + test) + cr = _ConfigReader(data).readString(header + "\n" + test) with pytest.raises(expected): - fromConfig(fobj, data=data) + cr.run() -def test_supportedArguments(data): +@pytest.mark.parametrize( + "kwarg", ["NAN", "'a string'", "5", "5.5", "-5", "True", "sum([1, 2, 3])"] +) +def test_supportedArguments(data, kwarg): # test if the following function arguments # are supported (i.e. parsing does not fail) @@ -141,21 +147,8 @@ def test_supportedArguments(data): return saqc var1 = data.columns[0] - - header = f"varname;test" - tests = [ - f"{var1};func(kwarg=NAN)", - f"{var1};func(kwarg='str')", - f"{var1};func(kwarg=5)", - f"{var1};func(kwarg=5.5)", - f"{var1};func(kwarg=-5)", - f"{var1};func(kwarg=True)", - f"{var1};func(kwarg=sum([1, 2, 3]))", - ] - - for test in tests: - fobj = writeIO(header + "\n" + test) - fromConfig(fobj, data) + conf = f"varname;test" + "\n" + f"{var1};func(kwarg={kwarg})" + _ConfigReader(data).readString(conf).run() @pytest.mark.parametrize( @@ -172,5 +165,6 @@ def test_funtionArguments(data, func_string): {data.columns[0]} ; testFunction(func={func_string}) {data.columns[0]} ; testFunction(func="{func_string}") """ - - fromConfig(writeIO(config), data) + cr = _ConfigReader(data) + cr.readString(config) + cr.run() diff --git a/tests/funcs/test_generic_config_functions.py b/tests/funcs/test_generic_config_functions.py index 6b29d9d61..f19342a39 100644 --- a/tests/funcs/test_generic_config_functions.py +++ b/tests/funcs/test_generic_config_functions.py @@ -15,9 +15,9 @@ import pytest from saqc import BAD, UNFLAGGED, SaQC from saqc.core import DictOfSeries, Flags, initFlagsLike, register from saqc.funcs.generic import _execGeneric -from saqc.parsing.reader import fromConfig +from saqc.parsing.reader import _ConfigReader from saqc.parsing.visitor import ConfigFunctionParser -from tests.common import initData, writeIO +from tests.common import initData @pytest.fixture @@ -46,69 +46,65 @@ def _compileGeneric(expr): return kwargs["func"] -def test_syntaxError(): - tests = [ +@pytest.mark.parametrize( + "expr", + [ "range(x=5", "rangex=5)", "range[x=5]" "range{x=5}" "int->float(x=4)" "int*float(x=4)", - ] + ], +) +def test_syntaxError(expr): + with pytest.raises(SyntaxError): + _compileGeneric(f"flag(func={expr})") - for test in tests: - with pytest.raises(SyntaxError): - _compileGeneric(f"flag(func={test})") - -def test_typeError(): +# TODO: think about cases that should be forbidden +@pytest.mark.parametrize("expr", ["lambda x: x * 2"]) +def test_typeError(expr): """ test that forbidden constructs actually throw an error """ - - # TODO: think about cases that should be forbidden - tests = ("lambda x: x * 2",) - - for test in tests: - with pytest.raises(TypeError): - _compileGeneric(f"flagGeneric(func={test})") - - -def test_comparisonOperators(data): - var1, var2, *_ = data.columns + with pytest.raises(TypeError): + _compileGeneric(f"flagGeneric(func={expr})") + + +@pytest.mark.parametrize( + "fields,expr,expected", + [ + (["var1"], "x > 100", 'data["var1"] > 100'), + (["var2"], "10 >= y", '10 >= data["var2"]'), + (["var2"], f"y < 100", 'data["var2"] < 100'), + (["var1", "var2"], "x <= y", 'data["var1"] <= data["var2"]'), + (["var1", "var2"], "x == y", 'data["var1"] == data["var2"]'), + (["var1", "var2"], "x != y", 'data["var1"] != data["var2"]'), + ], +) +def test_comparisonOperators(data, fields, expr, expected): + expected = eval(expected) flags = initFlagsLike(data) - - tests = [ - (["var1"], "x > 100", data[var1] > 100), - (["var2"], "10 >= y", 10 >= data[var2]), - (["var2"], f"y < 100", data[var2] < 100), - (["var1", "var2"], "x <= y", data[var1] <= data[var2]), - (["var1", "var2"], "x == y", data[var1] == data[var2]), - (["var1", "var2"], "x != y", data[var1] != data[var2]), - ] - - for field, test, expected in tests: - func = _compileGeneric(f"flagGeneric(func={test})") - result = _execGeneric(Flags({f: flags[f] for f in field}), data[field], func) - assert (result == expected).all(axis=None) - - -def test_arithmeticOperators(data): - var1, *_ = data.columns - - data = data[var1] - flags = Flags({var1: pd.Series(UNFLAGGED, index=data.index)}) - - tests = [ - ("var1 + 100 > 110", data + 100 > 110), - ("var1 - 100 > 0", data - 100 > 0), - ("var1 * 100 > 200", data * 100 > 200), - ("var1 / 100 > .1", data / 100 > 0.1), - ("var1 % 2 == 1", data % 2 == 1), - ("var1 ** 2 == 0", data**2 == 0), - ] - - for test, expected in tests: - func = _compileGeneric(f"processGeneric(func={test})") - result = _execGeneric(flags, data, func) - assert (result == expected).all(axis=None) + func = _compileGeneric(f"flagGeneric(func={expr})") + result = _execGeneric(Flags({f: flags[f] for f in fields}), data[fields], func) + assert (result == expected).all(axis=None) + + +@pytest.mark.parametrize( + "expr,expected", + [ + ("var1 + 100 > 110", 'data["var1"] + 100 > 110'), + ("var1 - 100 > 0", 'data["var1"] - 100 > 0'), + ("var1 * 100 > 200", 'data["var1"] * 100 > 200'), + ("var1 / 100 > .1", 'data["var1"] / 100 > 0.1'), + ("var1 % 2 == 1", 'data["var1"] % 2 == 1'), + ("var1 ** 2 == 0", 'data["var1"]**2 == 0'), + ], +) +def test_arithmeticOperators(data, expr, expected): + expected = eval(expected) + flags = Flags({"var1": pd.Series(UNFLAGGED, index=data["var1"].index)}) + func = _compileGeneric(f"processGeneric(func={expr})") + result = _execGeneric(flags, data["var1"], func) + assert (result == expected).all(axis=None) def test_nonReduncingBuiltins(data): @@ -151,8 +147,8 @@ def test_variableAssignments(data): dummy2 ; flagGeneric(field=["var1", "var2"], func=x + y > 0) """ - fobj = writeIO(config) - saqc = fromConfig(fobj, data) + cr = _ConfigReader(data) + saqc = cr.readString(config).run() expected_columns = set(data.columns) | {"dummy1", "dummy2"} assert set(saqc.data.columns) == expected_columns @@ -166,8 +162,8 @@ def test_processExistingTarget(data): var2 ; processGeneric(func=y - 1) """ - fobj = writeIO(config) - saqc = fromConfig(fobj, data) + cr = _ConfigReader(data) + saqc = cr.readString(config).run() assert (saqc._data["var2"] == data["var2"] - 1).all() assert len(saqc._flags.history["var2"]) == 2 assert saqc._flags.history["var2"].hist[0].isna().all() @@ -181,8 +177,8 @@ def test_flagTargetExisting(data): dummy ; processGeneric(field="var2", func=y >1) """ - fobj = writeIO(config) - saqc = fromConfig(fobj, data) + cr = _ConfigReader(data) + saqc = cr.readString(config).run() assert len(saqc.data["dummy"]) == len(saqc.flags["dummy"]) @@ -193,9 +189,9 @@ def test_processTargetExistingFail(data_diff): dummy ; processGeneric(field="var2", func=y - 1) """ - fobj = writeIO(config) + cr = _ConfigReader(data_diff).readString(config) with pytest.raises(ValueError): - fromConfig(fobj, data_diff) + cr.run() def test_flagTargetExistingFail(data_diff): @@ -205,12 +201,11 @@ def test_flagTargetExistingFail(data_diff): dummy ; flagGeneric(field="var2", func=y > 1) """ - fobj = writeIO(config) + cr = _ConfigReader(data_diff).readString(config) with pytest.raises(ValueError): - fromConfig(fobj, data_diff) + cr.run() -@pytest.mark.slow def test_callableArgumentsUnary(data): window = 5 @@ -222,6 +217,9 @@ def test_callableArgumentsUnary(data): var = data.columns[0] + # we slice the data, because the test is very slow otherwise + data[var] = data[var].iloc[:100] + config = f""" varname ; test {var} ; testFuncUnary(func={{0}}) @@ -233,8 +231,8 @@ def test_callableArgumentsUnary(data): ] for name, func in tests: - fobj = writeIO(config.format(name)) - result_config = fromConfig(fobj, data).data + cr = _ConfigReader(data).readString(config.format(name)) + result_config = cr.run().data result_api = SaQC(data).testFuncUnary(var, func=func).data expected = data[var].rolling(window=window).apply(func) assert (result_config[var].dropna() == expected.dropna()).all(axis=None) @@ -260,8 +258,8 @@ def test_callableArgumentsBinary(data): ] for name, func in tests: - fobj = writeIO(config.format(name)) - result_config = fromConfig(fobj, data).data + cr = _ConfigReader(data).readString(config.format(name)) + result_config = cr.run().data result_api = SaQC(data).testFuncBinary(var1, func=func).data expected = func(data[var1], data[var2]) assert (result_config[var1].dropna() == expected.dropna()).all(axis=None) -- GitLab