removed time slicing support as the reintegration of harmonized

data is not solved yet

removed time slicing support as the reintegration of harmonized
data is not solved yet
9ebedc96 · David Schäfer · 465b4bc8 · 9ebedc96 · 9ebedc96 · 9ebedc96
Commit 9ebedc96 authored 5 years ago by David Schäfer
--- a/saqc/core/core.py
+++ b/saqc/core/core.py
@@ -9,6 +9,7 @@ from saqc.core.reader import readConfig, prepareConfig, checkConfig
 from saqc.core.config import Fields
 from saqc.core.evaluator import evalExpression
 from saqc.lib.plotting import plotHook, plotAllHook
+from saqc.lib.tools import combineDataFrames
 from saqc.flagger import BaseFlagger, CategoricalFlagger, SimpleFlagger, DmpFlagger


@@ -109,17 +110,20 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
            if varname not in data and varname not in flagger.getFlags():
                continue

+            # NOTE:
+            # time slicing support is currently disabled
            # prepare the data for the tests
-            dchunk = data.loc[start_date:end_date]
-            if dchunk.empty:
+            # data_chunk = data.loc[start_date:end_date]
+            data_chunk = data
+            if data_chunk.empty:
                continue
-            flagger_chunk = flagger.getFlagger(loc=dchunk.index)
+            flagger_chunk = flagger.getFlagger(loc=data_chunk.index)

            try:
                # actually run the tests
-                dchunk_result, flagger_chunk_result = evalExpression(
+                data_chunk_result, flagger_chunk_result = evalExpression(
                    func,
-                    data=dchunk,
+                    data=data_chunk,
                    field=varname,
                    flagger=flagger_chunk,
                    nodata=nodata,
@@ -129,10 +133,8 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
                    raise e
                continue

-            flagger = flagger.setFlagger(flagger_chunk_result)
-
            plotHook(
-                dchunk_result,
+                data_chunk_result,
                flagger_chunk,
                flagger_chunk_result,
                varname,
@@ -140,6 +142,14 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
                func,
            )

+            # NOTE:
+            # time slicing support is currently disabled
+            # flagger = flagger.setFlagger(flagger_chunk_result)
+            # data = combineDataFrames(data, data_chunk_result)
+            flagger = flagger_chunk_result
+            data = data_chunk_result
+
+
    plotAllHook(data, flagger)

    return data, flagger

--- a/saqc/core/reader.py
+++ b/saqc/core/reader.py
@@ -19,10 +19,11 @@ def _raise(config_row, exc, msg, field=None):

 def checkConfig(config_df, data, flagger, nodata):
    for _, config_row in config_df.iterrows():
-        if pd.isnull(config_row[F.VARNAME]):
-            # NOTE: better messages needed
+
+        var_name = config_row[F.VARNAME]
+        if pd.isnull(config_row[F.VARNAME]) or not var_name:
            _raise(
-                config_row, SyntaxError, f"non-optional column '{F.VARNAME}' is missing"
+                config_row, SyntaxError, f"non-optional column '{F.VARNAME}' is missing or empty"
            )

        test_fields = config_row.filter(regex=F.TESTS).dropna()
@@ -33,10 +34,6 @@ def checkConfig(config_df, data, flagger, nodata):
                f"at least one test needs to be given for variable",
            )

-        var_name = config_row[F.VARNAME]
-        if not var_name:
-            _raise(config_row, SyntaxError, f"field '{F.VARNAME}' may not be empty")
-
        for col, expr in test_fields.iteritems():
            if not expr:
                _raise(config_row, SyntaxError, f"field '{col}' may not be empty")
@@ -67,11 +64,17 @@ def prepareConfig(config_df, data):
    if config_df.empty:
        raise SyntaxWarning("config file is empty or all lines are #commented")

-    # fill missing header fields
-    for field in [F.VARNAME, F.START, F.END, F.PLOT]:
+    # NOTE:
+    # time slicing support is currently disabled
+    # fill missing columns
+    # for field in [F.VARNAME, F.START, F.END, F.PLOT]:
+    for field in [F.VARNAME, F.PLOT]:
        if field not in config_df:
            config_df = config_df.assign(**{field: np.nan})

+    for field in [F.START, F.END]:
+        config_df = config_df.assign(**{field: np.nan})
+
    # fill nans with default values
    config_df = config_df.fillna(
        {
@@ -84,8 +87,8 @@ def prepareConfig(config_df, data):

    dtype = np.datetime64 if isinstance(data.index, pd.DatetimeIndex) else int

-    config_df[F.START] = config_df[F.START].astype(dtype)
-    config_df[F.END] = config_df[F.END].astype(dtype)
+    # config_df[F.START] = config_df[F.START].astype(dtype)
+    # config_df[F.END] = config_df[F.END].astype(dtype)

    return config_df


--- a/saqc/lib/tools.py
+++ b/saqc/lib/tools.py
@@ -108,8 +108,6 @@ def combineDataFrames(left, right, fill_value=np.nan):
    return combined


-
-
 def retrieveTrustworthyOriginal(data, field, flagger=None, level=None):
    """Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
    differenciating between missng value - nans und fillvalue nans is impossible.

--- a/test/core/test_core.py
+++ b/test/core/test_core.py
@@ -43,6 +43,7 @@ def flags(flagger, data, optional):
 #       within the used fixtures, that is why we need the optional
 #       parametrization without actually using it in the
 #       function
+@pytest.mark.skip(reason="test slicing support is currently disabled")
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("optional", OPTIONAL)
 def test_temporalPartitioning(data, flagger, flags):
@@ -68,6 +69,7 @@ def test_temporalPartitioning(data, flagger, flags):
        assert fchunk.index.max() == end_date, "different end dates"


+@pytest.mark.skip(reason="test slicing support is currently disabled")
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("optional", OPTIONAL)
 def test_positionalPartitioning(data, flagger, flags):

--- a/test/core/test_reader.py
+++ b/test/core/test_reader.py
@@ -17,10 +17,12 @@ def test_configPreparation(data):
    var1, var2, var3, *_ = data.columns
    date = data.index[len(data.index) // 2]

+    # NOTE:
+    # time slicing support is currently disabled
    tests = [
-        {F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
+        # {F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
        {F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False},
-        {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"},
+        # {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"},
        {F.VARNAME: var3, F.TESTS: "flagAll()",},
    ]