Newer
Older
from saqc.funcs import register, flagRange
from test.common import initData, initMetaDict, initMetaString, TESTFLAGGER
@pytest.fixture
def data():
return initData(3)
@pytest.fixture
def data():
return initData(3)
@register("flagAll")
def flagAll(data, flags, field, flagger, **kwargs):
# NOTE: remember to rename flag -> flag_values
return data, flagger.setFlags(flags, field, flag=flagger.BAD)
def test_temporalPartitioning(data, flagger):
"""
Check if the time span in meta is respected
"""
var1, var2, var3, *_ = data.columns
split_date = data.index[len(data.index)//2]
David Schäfer
committed
{F.VARNAME: var1, F.TESTS: "flagAll()"},
# {F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_date},
# {F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_date},
meta_file, meta_frame = initMetaDict(metadict, data)
pdata, pflags = runner(meta_file, flagger, data)
fchunk = pflags.loc[flagger.isFlagged(pflags[vname]), vname]
assert fchunk.index.min() == start_date, "different start dates"
assert fchunk.index.max() == end_date, "different end dates"
def test_positionalPartitioning(data, flagger):
data = data.reset_index(drop=True)
var1, var2, var3, *_ = data.columns
split_index = int(len(data.index)//2)
metadict = [
David Schäfer
committed
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_index},
{F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_index},
]
meta_file, meta_frame = initMetaDict(metadict, data)
pdata, pflags = runner(meta_file, flagger, data)
fields = [F.VARNAME, F.START, F.END]
for _, row in meta_frame.iterrows():
vname, start_index, end_index = row[fields]
fchunk = pflags.loc[flagger.isFlagged(pflags[vname]), vname]
assert fchunk.index.min() == start_index, "different start indices"
assert fchunk.index.max() == end_index, f"different end indices: {fchunk.index.max()} vs. {end_index}"
"""
Test if variables available in the dataset but not the config
are handled correctly, i.e. are ignored
"""
var1, var2, *_ = data.columns
David Schäfer
committed
metadict = [{F.VARNAME: var1, F.TESTS: "flagAll()"}]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data)
assert var1 in pdata and var2 not in pflags
def test_missingVariable(flagger):
"""
Test if variables available in the config but not dataset
are handled correctly, i.e. are ignored
"""
data = initData(1)
var, *_ = data.columns
David Schäfer
committed
{F.VARNAME: var, F.TESTS: "flagAll()"},
{F.VARNAME: "empty", F.TESTS: "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data)
def test_assignVariable(flagger):
"""
Test the assign keyword, a variable present in the configuration, but not
dataset will be added to output flags
"""
data = initData(1)
var1, *_ = data.columns
var2 = "empty"
David Schäfer
committed
{F.VARNAME: var1, F.ASSIGN: False, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.ASSIGN: True, F.TESTS: "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data)
if isinstance(pflags.columns, pd.MultiIndex):
cols = (pflags
.columns.get_level_values(0)
.drop_duplicates())
assert (cols == [var1, var2]).all()
else:
assert (pflags.columns == [var1, var2]).all()
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_dtypes(data, flagger):
"""
Test if the categorical dtype is preserved through the core functionality
"""
flags = flagger.initFlags(data)
David Schäfer
committed
var1, var2, *_ = data.columns
David Schäfer
committed
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, "test": "flagAll()"},
]
metafobj, meta = initMetaDict(metadict, data)
pdata, pflags = runner(metafobj, flagger, data, flags)
assert dict(flags.dtypes) == dict(pflags.dtypes)
David Schäfer
committed
def test_plotting(data, flagger):
"""
Test if the plotting code runs, does not show any plot.
NOTE:
This test is ignored if matplotlib is not available on the test-system
"""
pytest.importorskip("matplotlib", reason="requires matplotlib")
David Schäfer
committed
field, *_ = data.columns
flags = flagger.initFlags(data)
_, flagged = flagRange(data, flags, field, flagger, min=10, max=90, flag=flagger.BAD)
_, flagged = flagRange(data, flagged, field, flagger, min=40, max=60, flag=flagger.GOOD)
mask = flagger.getFlags(flags[field]) != flagger.getFlags(flagged[field])
plot(data, flagged, mask, field, flagger, interactive_backend=False)
David Schäfer
committed
David Schäfer
committed
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
tests = [
{F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False},
{F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()", F.ASSIGN: True},
{F.VARNAME: var3, F.TESTS: "flagAll()", },
]
defaults = {
F.START: data.index.min(), F.END: data.index.max(),
F.ASSIGN: False, F.PLOT: False, F.LINENUMBER: 1
David Schäfer
committed
}
David Schäfer
committed
_, meta_frame = initMetaDict([test], data)
result = dict(zip(meta_frame.columns, meta_frame.iloc[0]))
expected = {**defaults, **test}
assert result == expected
def test_configPreparationExcpetion(data):
David Schäfer
committed
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
tests = [
{},
# {F.TESTS: "flagAll()"},
# {F.VARNAME: var2},
# {F.VARNAME: var3, F.END: date, F.ASSIGN: True},
David Schäfer
committed
]
for test in tests:
with pytest.raises(TypeError):
initMetaDict([test], data)
def test_configReaderLineNumbers(data):
config = f"""
{F.VARNAME}|{F.TESTS}
#temp1|flagAll()
temp1|flagAll()
temp2|flagAll()
pre1|flagAll()
pre2|flagAll()
SM|flagAll()
#SM|flagAll()
SM1|flagAll()
"""
meta_fname, meta_frame = initMetaString(config, data)
result = meta_frame[F.LINENUMBER].tolist()
expected = [2, 3, 4, 5, 6, 8]
assert result == expected