From b08789b9ec22e3caf2d6104f91e6248b9e622db1 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Tue, 15 Aug 2023 17:20:46 +0200
Subject: [PATCH] use string config instead of writeIO magic

---
 docs/documentation/GenericFunctions.rst      |  66 +++------
 tests/common.py                              |   7 -
 tests/core/test_reader.py                    |  54 ++++---
 tests/funcs/test_generic_config_functions.py | 140 +++++++++----------
 4 files changed, 112 insertions(+), 155 deletions(-)

diff --git a/docs/documentation/GenericFunctions.rst b/docs/documentation/GenericFunctions.rst
index 74e7b2e8c..67f008dcc 100644
--- a/docs/documentation/GenericFunctions.rst
+++ b/docs/documentation/GenericFunctions.rst
@@ -51,8 +51,7 @@ dummy dataset, to lead us through the following code snippets:
 
 .. testsetup:: python
 
-   from saqc import fromConfig
-   from tests.common import writeIO
+   from saqc.parsing.reader import _ConfigReader as ConfigReader
 
 .. testcode:: python
               
@@ -116,16 +115,13 @@ Simple constraints
      .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test                    
         ...         #-------;------------------------
         ...         x       ; flagGeneric(func=x < 30)
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc1.flags  #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -177,16 +173,13 @@ Cross variable constraints
      .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test                    
         ...         #-------;------------------------------------
         ...         x       ; flagGeneric(field="y", func=y > 30)
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc2.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -241,16 +234,13 @@ need to be put in parentheses.
      .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test                    
         ...         #-------;--------------------------------------------------------
         ...         x       ; flagGeneric(field=["y", "z"], func=(y > 30) & (z < 50))
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc3.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -293,16 +283,13 @@ Arithmetics
      .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test
         ...         #-------;-------------------------------------------------------
         ...         x       ; flagGeneric(field=["x", "y", "z"], func=x > (y + z)/2)
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc4.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -351,16 +338,13 @@ Special functions
       .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test
         ...         #-------;---------------------------------------------------
         ...         x       ; flagGeneric(field=["x", "z"], func=x > std(z) * 2)
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc5.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -402,17 +386,14 @@ Special functions
       .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test
         ...         #-------;------------------------------------------
         ...         y       ; flagRange(min=10, max=60)
         ...         x       ; flagGeneric(field="y", func=isflagged(y))
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc6.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -481,16 +462,13 @@ Let's consider the following dataset:
       .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test
         ...         #-------;---------------------------------------------------------------
         ...         meas    ; flagGeneric(field=["fan", "volt"], func=(x == 0) | (y < 12.0))
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc7.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -533,8 +511,7 @@ But we could also quality check our independent variables first and than leverag
       .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test
         ...         #-------;--------------------------------------------------------------------------
@@ -543,9 +520,7 @@ But we could also quality check our independent variables first and than leverag
         ...         volt    ; flagGeneric(func=volt < 12.0)
         ...         meas    ; flagGeneric(field=["fan", "volt"], func=isflagged(fan) | isflagged(volt))
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.flags == qc8.flags #doctest:+NORMALIZE_WHITESPACE
         True
 
@@ -634,16 +609,13 @@ variables in a given dataset. We start with dummy data again:
      .. doctest:: python
         :hide:
 
-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
         ...         """
         ...         varname ; test                    
         ...         #-------;------------------------------------------------------
         ...         mean    ; processGeneric(field=["x", "y", "z"], func=(x+y+z)/2)
         ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
         >>> tmp.data == qc1.data #doctest:+NORMALIZE_WHITESPACE
         True
 
diff --git a/tests/common.py b/tests/common.py
index cdf418c23..fd78476eb 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -42,13 +42,6 @@ def dummyHistory(hist: pd.DataFrame = None, meta: list = None):
     return createHistoryFromData(hist, meta, copy=True)
 
 
-def writeIO(content):
-    f = io.StringIO()
-    f.write(content)
-    f.seek(0)
-    return f
-
-
 def checkInvariants(data, flags, field, identical=True):
     """
     Check all invariants that must hold at any point for
diff --git a/tests/core/test_reader.py b/tests/core/test_reader.py
index 9234d0e48..599811d90 100644
--- a/tests/core/test_reader.py
+++ b/tests/core/test_reader.py
@@ -12,8 +12,8 @@ import pytest
 from saqc.core import DictOfSeries, Flags, SaQC, flagging
 from saqc.exceptions import ParsingError
 from saqc.parsing.environ import ENVIRONMENT
-from saqc.parsing.reader import fromConfig, readFile
-from tests.common import initData, writeIO
+from saqc.parsing.reader import _ConfigReader
+from tests.common import initData
 
 
 @pytest.fixture
@@ -41,8 +41,9 @@ def test_variableRegex(data):
     ]
 
     for regex, expected in tests:
-        fobj = writeIO(header + "\n" + f"{regex} ; {function}()")
-        saqc = fromConfig(fobj, data=data)
+        cr = _ConfigReader(data)
+        cr.readString(header + "\n" + f"{regex} ; {function}()")
+        saqc = cr.run()
         result = getTestedVariables(saqc._flags, function)
         assert np.all(result == expected)
 
@@ -50,9 +51,10 @@ def test_variableRegex(data):
         ("var[12]", []),  # not quoted -> not a regex
     ]
     for regex, expected in tests:
-        fobj = writeIO(header + "\n" + f"{regex} ; {function}()")
+        cr = _ConfigReader(data=data)
+        cr.readString(header + "\n" + f"{regex} ; {function}()")
         with pytest.warns(RuntimeWarning):
-            saqc = fromConfig(fobj, data=data)
+            saqc = cr.run()
         result = getTestedVariables(saqc._flags, function)
         assert np.all(result == expected)
 
@@ -67,7 +69,7 @@ def test_inlineComments(data):
     var1    ; flagDummy() # test
     """
 
-    saqc = fromConfig(writeIO(config), data)
+    saqc = _ConfigReader(data).readString(config).run()
     func = saqc._flags.history["var1"].meta[0]["func"]
     assert func == "flagDummy"
 
@@ -84,9 +86,9 @@ def test_configReaderLineNumbers():
 
     SM1         ; flagDummy()
     """
-    planned = readFile(writeIO(config))
+    planned = _ConfigReader().readString(config)
     expected = [4, 5, 6, 10]
-    assert (planned.index == expected).all()
+    assert (planned.config.index == expected).all()
 
 
 @pytest.mark.filterwarnings("ignore::RuntimeWarning")
@@ -105,7 +107,8 @@ def test_configFile(data):
 
     SM1;flagDummy()
     """
-    fromConfig(writeIO(config), data)
+    c = _ConfigReader().readString(config).config
+    assert len(c) == 4
 
 
 @pytest.mark.parametrize(
@@ -124,12 +127,15 @@ def test_configChecks(data, test, expected):
         return data, flags
 
     header = f"varname;test"
-    fobj = writeIO(header + "\n" + test)
+    cr = _ConfigReader(data).readString(header + "\n" + test)
     with pytest.raises(expected):
-        fromConfig(fobj, data=data)
+        cr.run()
 
 
-def test_supportedArguments(data):
+@pytest.mark.parametrize(
+    "kwarg", ["NAN", "'a string'", "5", "5.5", "-5", "True", "sum([1, 2, 3])"]
+)
+def test_supportedArguments(data, kwarg):
     # test if the following function arguments
     # are supported (i.e. parsing does not fail)
 
@@ -141,21 +147,8 @@ def test_supportedArguments(data):
         return saqc
 
     var1 = data.columns[0]
-
-    header = f"varname;test"
-    tests = [
-        f"{var1};func(kwarg=NAN)",
-        f"{var1};func(kwarg='str')",
-        f"{var1};func(kwarg=5)",
-        f"{var1};func(kwarg=5.5)",
-        f"{var1};func(kwarg=-5)",
-        f"{var1};func(kwarg=True)",
-        f"{var1};func(kwarg=sum([1, 2, 3]))",
-    ]
-
-    for test in tests:
-        fobj = writeIO(header + "\n" + test)
-        fromConfig(fobj, data)
+    conf = f"varname;test" + "\n" + f"{var1};func(kwarg={kwarg})"
+    _ConfigReader(data).readString(conf).run()
 
 
 @pytest.mark.parametrize(
@@ -172,5 +165,6 @@ def test_funtionArguments(data, func_string):
     {data.columns[0]} ; testFunction(func={func_string})
     {data.columns[0]} ; testFunction(func="{func_string}")
     """
-
-    fromConfig(writeIO(config), data)
+    cr = _ConfigReader(data)
+    cr.readString(config)
+    cr.run()
diff --git a/tests/funcs/test_generic_config_functions.py b/tests/funcs/test_generic_config_functions.py
index 6b29d9d61..f19342a39 100644
--- a/tests/funcs/test_generic_config_functions.py
+++ b/tests/funcs/test_generic_config_functions.py
@@ -15,9 +15,9 @@ import pytest
 from saqc import BAD, UNFLAGGED, SaQC
 from saqc.core import DictOfSeries, Flags, initFlagsLike, register
 from saqc.funcs.generic import _execGeneric
-from saqc.parsing.reader import fromConfig
+from saqc.parsing.reader import _ConfigReader
 from saqc.parsing.visitor import ConfigFunctionParser
-from tests.common import initData, writeIO
+from tests.common import initData
 
 
 @pytest.fixture
@@ -46,69 +46,65 @@ def _compileGeneric(expr):
     return kwargs["func"]
 
 
-def test_syntaxError():
-    tests = [
+@pytest.mark.parametrize(
+    "expr",
+    [
         "range(x=5",
         "rangex=5)",
         "range[x=5]" "range{x=5}" "int->float(x=4)" "int*float(x=4)",
-    ]
+    ],
+)
+def test_syntaxError(expr):
+    with pytest.raises(SyntaxError):
+        _compileGeneric(f"flag(func={expr})")
 
-    for test in tests:
-        with pytest.raises(SyntaxError):
-            _compileGeneric(f"flag(func={test})")
 
-
-def test_typeError():
+# TODO: think about cases that should be forbidden
+@pytest.mark.parametrize("expr", ["lambda x: x * 2"])
+def test_typeError(expr):
     """
     test that forbidden constructs actually throw an error
     """
-
-    # TODO: think about cases that should be forbidden
-    tests = ("lambda x: x * 2",)
-
-    for test in tests:
-        with pytest.raises(TypeError):
-            _compileGeneric(f"flagGeneric(func={test})")
-
-
-def test_comparisonOperators(data):
-    var1, var2, *_ = data.columns
+    with pytest.raises(TypeError):
+        _compileGeneric(f"flagGeneric(func={expr})")
+
+
+@pytest.mark.parametrize(
+    "fields,expr,expected",
+    [
+        (["var1"], "x > 100", 'data["var1"] > 100'),
+        (["var2"], "10 >= y", '10 >= data["var2"]'),
+        (["var2"], f"y < 100", 'data["var2"] < 100'),
+        (["var1", "var2"], "x <= y", 'data["var1"] <= data["var2"]'),
+        (["var1", "var2"], "x == y", 'data["var1"] == data["var2"]'),
+        (["var1", "var2"], "x != y", 'data["var1"] != data["var2"]'),
+    ],
+)
+def test_comparisonOperators(data, fields, expr, expected):
+    expected = eval(expected)
     flags = initFlagsLike(data)
-
-    tests = [
-        (["var1"], "x > 100", data[var1] > 100),
-        (["var2"], "10 >= y", 10 >= data[var2]),
-        (["var2"], f"y < 100", data[var2] < 100),
-        (["var1", "var2"], "x <= y", data[var1] <= data[var2]),
-        (["var1", "var2"], "x == y", data[var1] == data[var2]),
-        (["var1", "var2"], "x != y", data[var1] != data[var2]),
-    ]
-
-    for field, test, expected in tests:
-        func = _compileGeneric(f"flagGeneric(func={test})")
-        result = _execGeneric(Flags({f: flags[f] for f in field}), data[field], func)
-        assert (result == expected).all(axis=None)
-
-
-def test_arithmeticOperators(data):
-    var1, *_ = data.columns
-
-    data = data[var1]
-    flags = Flags({var1: pd.Series(UNFLAGGED, index=data.index)})
-
-    tests = [
-        ("var1 + 100 > 110", data + 100 > 110),
-        ("var1 - 100 > 0", data - 100 > 0),
-        ("var1 * 100 > 200", data * 100 > 200),
-        ("var1 / 100 > .1", data / 100 > 0.1),
-        ("var1 % 2 == 1", data % 2 == 1),
-        ("var1 ** 2 == 0", data**2 == 0),
-    ]
-
-    for test, expected in tests:
-        func = _compileGeneric(f"processGeneric(func={test})")
-        result = _execGeneric(flags, data, func)
-        assert (result == expected).all(axis=None)
+    func = _compileGeneric(f"flagGeneric(func={expr})")
+    result = _execGeneric(Flags({f: flags[f] for f in fields}), data[fields], func)
+    assert (result == expected).all(axis=None)
+
+
+@pytest.mark.parametrize(
+    "expr,expected",
+    [
+        ("var1 + 100 > 110", 'data["var1"] + 100 > 110'),
+        ("var1 - 100 > 0", 'data["var1"] - 100 > 0'),
+        ("var1 * 100 > 200", 'data["var1"] * 100 > 200'),
+        ("var1 / 100 > .1", 'data["var1"] / 100 > 0.1'),
+        ("var1 % 2 == 1", 'data["var1"] % 2 == 1'),
+        ("var1 ** 2 == 0", 'data["var1"]**2 == 0'),
+    ],
+)
+def test_arithmeticOperators(data, expr, expected):
+    expected = eval(expected)
+    flags = Flags({"var1": pd.Series(UNFLAGGED, index=data["var1"].index)})
+    func = _compileGeneric(f"processGeneric(func={expr})")
+    result = _execGeneric(flags, data["var1"], func)
+    assert (result == expected).all(axis=None)
 
 
 def test_nonReduncingBuiltins(data):
@@ -151,8 +147,8 @@ def test_variableAssignments(data):
     dummy2  ; flagGeneric(field=["var1", "var2"], func=x + y > 0)
     """
 
-    fobj = writeIO(config)
-    saqc = fromConfig(fobj, data)
+    cr = _ConfigReader(data)
+    saqc = cr.readString(config).run()
 
     expected_columns = set(data.columns) | {"dummy1", "dummy2"}
     assert set(saqc.data.columns) == expected_columns
@@ -166,8 +162,8 @@ def test_processExistingTarget(data):
     var2   ; processGeneric(func=y - 1)
     """
 
-    fobj = writeIO(config)
-    saqc = fromConfig(fobj, data)
+    cr = _ConfigReader(data)
+    saqc = cr.readString(config).run()
     assert (saqc._data["var2"] == data["var2"] - 1).all()
     assert len(saqc._flags.history["var2"]) == 2
     assert saqc._flags.history["var2"].hist[0].isna().all()
@@ -181,8 +177,8 @@ def test_flagTargetExisting(data):
     dummy   ; processGeneric(field="var2", func=y >1)
     """
 
-    fobj = writeIO(config)
-    saqc = fromConfig(fobj, data)
+    cr = _ConfigReader(data)
+    saqc = cr.readString(config).run()
     assert len(saqc.data["dummy"]) == len(saqc.flags["dummy"])
 
 
@@ -193,9 +189,9 @@ def test_processTargetExistingFail(data_diff):
     dummy   ; processGeneric(field="var2", func=y - 1)
     """
 
-    fobj = writeIO(config)
+    cr = _ConfigReader(data_diff).readString(config)
     with pytest.raises(ValueError):
-        fromConfig(fobj, data_diff)
+        cr.run()
 
 
 def test_flagTargetExistingFail(data_diff):
@@ -205,12 +201,11 @@ def test_flagTargetExistingFail(data_diff):
     dummy   ; flagGeneric(field="var2", func=y > 1)
     """
 
-    fobj = writeIO(config)
+    cr = _ConfigReader(data_diff).readString(config)
     with pytest.raises(ValueError):
-        fromConfig(fobj, data_diff)
+        cr.run()
 
 
-@pytest.mark.slow
 def test_callableArgumentsUnary(data):
     window = 5
 
@@ -222,6 +217,9 @@ def test_callableArgumentsUnary(data):
 
     var = data.columns[0]
 
+    # we slice the data, because the test is very slow otherwise
+    data[var] = data[var].iloc[:100]
+
     config = f"""
     varname ; test
     {var}   ; testFuncUnary(func={{0}})
@@ -233,8 +231,8 @@ def test_callableArgumentsUnary(data):
     ]
 
     for name, func in tests:
-        fobj = writeIO(config.format(name))
-        result_config = fromConfig(fobj, data).data
+        cr = _ConfigReader(data).readString(config.format(name))
+        result_config = cr.run().data
         result_api = SaQC(data).testFuncUnary(var, func=func).data
         expected = data[var].rolling(window=window).apply(func)
         assert (result_config[var].dropna() == expected.dropna()).all(axis=None)
@@ -260,8 +258,8 @@ def test_callableArgumentsBinary(data):
     ]
 
     for name, func in tests:
-        fobj = writeIO(config.format(name))
-        result_config = fromConfig(fobj, data).data
+        cr = _ConfigReader(data).readString(config.format(name))
+        result_config = cr.run().data
         result_api = SaQC(data).testFuncBinary(var1, func=func).data
         expected = func(data[var1], data[var2])
         assert (result_config[var1].dropna() == expected.dropna()).all(axis=None)
-- 
GitLab