From c1bf2e1b2ae72e6ae82bbbba780b48fa310a2ff3 Mon Sep 17 00:00:00 2001 From: David Schaefer <david.schaefer@ufz.de> Date: Thu, 12 Dec 2019 10:41:15 +0100 Subject: [PATCH] removed the need for explicit assignments --- saqc/core/config.py | 1 - saqc/core/core.py | 11 ++++++----- saqc/core/reader.py | 9 +++++---- test/core/test_core.py | 28 +++++----------------------- test/core/test_reader.py | 8 +++----- 5 files changed, 19 insertions(+), 38 deletions(-) diff --git a/saqc/core/config.py b/saqc/core/config.py index 4009618cb..9dbed999d 100644 --- a/saqc/core/config.py +++ b/saqc/core/config.py @@ -6,7 +6,6 @@ class Fields: VARNAME = "varname" START = "start_date" END = "end_date" - ASSIGN = "assign" TESTS = "test*" PLOT = "plot" LINENUMBER = "line" diff --git a/saqc/core/core.py b/saqc/core/core.py index 08a3b48fb..605ed7c29 100644 --- a/saqc/core/core.py +++ b/saqc/core/core.py @@ -20,11 +20,11 @@ def _collectVariables(meta, data): variables = [] for idx, configrow in meta.iterrows(): varname = configrow[Fields.VARNAME] - assign = configrow[Fields.ASSIGN] + # assign = configrow[Fields.ASSIGN] if varname in variables: continue - if (varname in data) or (varname not in variables and assign is True): - variables.append(varname) + # if (varname in data): # or (varname not in variables and assign is True): + variables.append(varname) return variables @@ -84,6 +84,7 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy=" # user-test needs fully prepared flags checkConfig(config, data, flagger, nodata) + # NOTE: # the outer loop runs over the flag tests, the inner one over the # variables. Switching the loop order would complicate the @@ -116,7 +117,7 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy=" try: # actually run the tests - dchunk, flagger_chunk_result = evalExpression( + dchunk_result, flagger_chunk_result = evalExpression( flag_test, data=dchunk, field=varname, @@ -131,7 +132,7 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy=" flagger = flagger.setFlagger(flagger_chunk_result) plotHook( - dchunk, + dchunk_result, flagger_chunk, flagger_chunk_result, varname, diff --git a/saqc/core/reader.py b/saqc/core/reader.py index 97f50969b..8f2f81a91 100644 --- a/saqc/core/reader.py +++ b/saqc/core/reader.py @@ -34,10 +34,12 @@ def checkConfig(config_df, data, flagger, nodata): ) var_name = config_row[F.VARNAME] - if var_name not in data.columns and not config_row[F.ASSIGN]: - _raise(config_row, NameError, f"unknown variable '{var_name}'") + if not var_name: + _raise(config_row, SyntaxError, f"field '{F.VARNAME}' may not be empty") for col, expr in test_fields.iteritems(): + if not expr: + _raise(config_row, SyntaxError, f"field '{col}' may not be empty") try: compileExpression(expr, data, flagger, nodata) except (TypeError, NameError, SyntaxError) as exc: @@ -66,7 +68,7 @@ def prepareConfig(config_df, data): raise SyntaxWarning("config file is empty or all lines are #commented") # fill missing header fields - for field in [F.VARNAME, F.START, F.END, F.ASSIGN, F.PLOT]: + for field in [F.VARNAME, F.START, F.END, F.PLOT]: if field not in config_df: config_df = config_df.assign(**{field: np.nan}) @@ -76,7 +78,6 @@ def prepareConfig(config_df, data): F.VARNAME: np.nan, F.START: data.index.min(), F.END: data.index.max(), - F.ASSIGN: False, F.PLOT: False, } ) diff --git a/test/core/test_core.py b/test/core/test_core.py index a7d34c8b6..a792ef867 100644 --- a/test/core/test_core.py +++ b/test/core/test_core.py @@ -113,23 +113,6 @@ def test_missingConfig(data, flagger, flags): assert var1 in pdata and var2 not in pflagger.getFlags() -@pytest.mark.parametrize("flagger", TESTFLAGGER) -def test_missingVariable(data, flagger): - """ - Test if variables available in the config but not dataset - are handled correctly, i.e. are ignored - """ - var, *_ = data.columns - - metadict = [ - {F.VARNAME: var, F.TESTS: "flagAll()"}, - {F.VARNAME: "empty", F.TESTS: "flagAll()"}, - ] - metafobj, meta = initMetaDict(metadict, data) - with pytest.raises(NameError): - runner(metafobj, flagger, data) - - @pytest.mark.parametrize("flagger", TESTFLAGGER) def test_errorHandling(data, flagger): @@ -160,8 +143,8 @@ def test_duplicatedVariable(flagger): var1, *_ = data.columns metadict = [ - {F.VARNAME: var1, F.ASSIGN: False, F.TESTS: "flagAll()"}, - {F.VARNAME: var1, F.ASSIGN: True, F.TESTS: "flagAll()"}, + {F.VARNAME: var1, F.TESTS: "flagAll()"}, + {F.VARNAME: var1, F.TESTS: "flagAll()"}, ] metafobj, meta = initMetaDict(metadict, data) @@ -178,16 +161,15 @@ def test_duplicatedVariable(flagger): @pytest.mark.parametrize("flagger", TESTFLAGGER) def test_assignVariable(flagger): """ - Test the assign keyword, a variable present in the configuration, but not - dataset will be added to output flags + test implicit assignments """ data = initData(1) var1, *_ = data.columns var2 = "empty" metadict = [ - {F.VARNAME: var1, F.ASSIGN: False, F.TESTS: "flagAll()"}, - {F.VARNAME: var2, F.ASSIGN: True, F.TESTS: "flagAll()"}, + {F.VARNAME: var1, F.TESTS: "flagAll()"}, + {F.VARNAME: var2, F.TESTS: "flagAll()"}, ] metafobj, meta = initMetaDict(metadict, data) diff --git a/test/core/test_reader.py b/test/core/test_reader.py index f7121fc6f..76266b381 100644 --- a/test/core/test_reader.py +++ b/test/core/test_reader.py @@ -20,14 +20,13 @@ def test_configPreparation(data): tests = [ {F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True}, {F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False}, - {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()", F.ASSIGN: True}, + {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"}, {F.VARNAME: var3, F.TESTS: "flagAll()",}, ] defaults = { F.START: data.index.min(), F.END: data.index.max(), - F.ASSIGN: False, F.PLOT: False, F.LINENUMBER: 2, } @@ -65,10 +64,9 @@ def test_configChecks(data, flagger, nodata, caplog): tests = [ ({F.VARNAME: var1, F.TESTS: "range(mn=0)"}, TypeError), - ({F.VARNAME: "temp2", F.TESTS: "range(min=3)"}, NameError), ({F.VARNAME: var3, F.TESTS: "flagNothing()"}, NameError), - ({F.VARNAME: "", F.TESTS: "range(min=3)"}, NameError), - ({F.VARNAME: "", F.TESTS: ""}, NameError), + ({F.VARNAME: "", F.TESTS: "range(min=3)"}, SyntaxError), + ({F.VARNAME: var1, F.TESTS: ""}, SyntaxError), ({F.VARNAME: ""}, SyntaxError), ({F.TESTS: "range(min=3)"}, SyntaxError), ] -- GitLab