Skip to content
Snippets Groups Projects
Commit 9ebedc96 authored by David Schäfer's avatar David Schäfer
Browse files

removed time slicing support as the reintegration of harmonized

data is not solved yet
parent 465b4bc8
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ from saqc.core.reader import readConfig, prepareConfig, checkConfig
from saqc.core.config import Fields
from saqc.core.evaluator import evalExpression
from saqc.lib.plotting import plotHook, plotAllHook
from saqc.lib.tools import combineDataFrames
from saqc.flagger import BaseFlagger, CategoricalFlagger, SimpleFlagger, DmpFlagger
......@@ -109,17 +110,20 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
if varname not in data and varname not in flagger.getFlags():
continue
# NOTE:
# time slicing support is currently disabled
# prepare the data for the tests
dchunk = data.loc[start_date:end_date]
if dchunk.empty:
# data_chunk = data.loc[start_date:end_date]
data_chunk = data
if data_chunk.empty:
continue
flagger_chunk = flagger.getFlagger(loc=dchunk.index)
flagger_chunk = flagger.getFlagger(loc=data_chunk.index)
try:
# actually run the tests
dchunk_result, flagger_chunk_result = evalExpression(
data_chunk_result, flagger_chunk_result = evalExpression(
func,
data=dchunk,
data=data_chunk,
field=varname,
flagger=flagger_chunk,
nodata=nodata,
......@@ -129,10 +133,8 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
raise e
continue
flagger = flagger.setFlagger(flagger_chunk_result)
plotHook(
dchunk_result,
data_chunk_result,
flagger_chunk,
flagger_chunk_result,
varname,
......@@ -140,6 +142,14 @@ def runner(config_file, flagger, data, flags=None, nodata=np.nan, error_policy="
func,
)
# NOTE:
# time slicing support is currently disabled
# flagger = flagger.setFlagger(flagger_chunk_result)
# data = combineDataFrames(data, data_chunk_result)
flagger = flagger_chunk_result
data = data_chunk_result
plotAllHook(data, flagger)
return data, flagger
......
......@@ -19,10 +19,11 @@ def _raise(config_row, exc, msg, field=None):
def checkConfig(config_df, data, flagger, nodata):
for _, config_row in config_df.iterrows():
if pd.isnull(config_row[F.VARNAME]):
# NOTE: better messages needed
var_name = config_row[F.VARNAME]
if pd.isnull(config_row[F.VARNAME]) or not var_name:
_raise(
config_row, SyntaxError, f"non-optional column '{F.VARNAME}' is missing"
config_row, SyntaxError, f"non-optional column '{F.VARNAME}' is missing or empty"
)
test_fields = config_row.filter(regex=F.TESTS).dropna()
......@@ -33,10 +34,6 @@ def checkConfig(config_df, data, flagger, nodata):
f"at least one test needs to be given for variable",
)
var_name = config_row[F.VARNAME]
if not var_name:
_raise(config_row, SyntaxError, f"field '{F.VARNAME}' may not be empty")
for col, expr in test_fields.iteritems():
if not expr:
_raise(config_row, SyntaxError, f"field '{col}' may not be empty")
......@@ -67,11 +64,17 @@ def prepareConfig(config_df, data):
if config_df.empty:
raise SyntaxWarning("config file is empty or all lines are #commented")
# fill missing header fields
for field in [F.VARNAME, F.START, F.END, F.PLOT]:
# NOTE:
# time slicing support is currently disabled
# fill missing columns
# for field in [F.VARNAME, F.START, F.END, F.PLOT]:
for field in [F.VARNAME, F.PLOT]:
if field not in config_df:
config_df = config_df.assign(**{field: np.nan})
for field in [F.START, F.END]:
config_df = config_df.assign(**{field: np.nan})
# fill nans with default values
config_df = config_df.fillna(
{
......@@ -84,8 +87,8 @@ def prepareConfig(config_df, data):
dtype = np.datetime64 if isinstance(data.index, pd.DatetimeIndex) else int
config_df[F.START] = config_df[F.START].astype(dtype)
config_df[F.END] = config_df[F.END].astype(dtype)
# config_df[F.START] = config_df[F.START].astype(dtype)
# config_df[F.END] = config_df[F.END].astype(dtype)
return config_df
......
......@@ -108,8 +108,6 @@ def combineDataFrames(left, right, fill_value=np.nan):
return combined
def retrieveTrustworthyOriginal(data, field, flagger=None, level=None):
"""Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
differenciating between missng value - nans und fillvalue nans is impossible.
......
......@@ -43,6 +43,7 @@ def flags(flagger, data, optional):
# within the used fixtures, that is why we need the optional
# parametrization without actually using it in the
# function
@pytest.mark.skip(reason="test slicing support is currently disabled")
@pytest.mark.parametrize("flagger", TESTFLAGGER)
@pytest.mark.parametrize("optional", OPTIONAL)
def test_temporalPartitioning(data, flagger, flags):
......@@ -68,6 +69,7 @@ def test_temporalPartitioning(data, flagger, flags):
assert fchunk.index.max() == end_date, "different end dates"
@pytest.mark.skip(reason="test slicing support is currently disabled")
@pytest.mark.parametrize("flagger", TESTFLAGGER)
@pytest.mark.parametrize("optional", OPTIONAL)
def test_positionalPartitioning(data, flagger, flags):
......
......@@ -17,10 +17,12 @@ def test_configPreparation(data):
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index) // 2]
# NOTE:
# time slicing support is currently disabled
tests = [
{F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
# {F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False},
{F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"},
# {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"},
{F.VARNAME: var3, F.TESTS: "flagAll()",},
]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment