Newer
Older
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import pytest
import numpy as np
from saqc.core.evaluator import evalExpression
from saqc.funcs.functions import flagRange, flagSesonalRange, forceFlags, clearFlags, flagIsolated
from saqc.flagger.dmpflagger import DmpFlagger
from test.common import initData, TESTFLAGGER, initMetaDict
return initData(
cols=1,
start_date="2016-01-01", end_date="2018-12-31",
freq="1D")
@pytest.fixture
def field(data):
return data.columns[0]
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_flagAfter(data, field, flagger):
flags = flagger.initFlags(data)
min = data.iloc[int(len(data)*.3), 0]
max = data.iloc[int(len(data)*.6), 0]
_, range_flags = flagRange(data, flags, field, flagger, min, max)
tests = [
(f"flagWindowAfterFlag(window='3D', func=range(min={min}, max={max}))", "3D"),
(f"flagNextAfterFlag(n=4, func=range(min={min}, max={max}))", 4),
]
for expr, window in tests:
_, repeated_flags = evalExpression(expr, data, flags, field, flagger)
flagged = repeated_flags[flagger.isFlagged(flags)].dropna()
flag_groups = (flagged
.rolling(window=window)
.apply(lambda df: flagger.isFlagged(flags).all(), raw=False))
assert np.all(flag_groups)
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_range(data, field, flagger):
min, max = 10, 90
data, flags = flagRange(data, flags, field, flagger, min=min, max=max)
flagged = flagger.isFlagged(flags, field)
expected = (data[field] < min) | (data[field] >= max)
assert np.all(flagged == expected)
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_flagSesonalRange(data, field, flagger):
data.loc[::2] = 0
data.loc[1::2] = 50
nyears = len(data.index.year.unique())
tests = [
({"min": 1, "max": 100, "startmonth": 7, "startday": 1, "endmonth": 8, "endday": 31},
31*2*nyears//2),
({"min": 1, "max": 100, "startmonth": 12, "startday": 16, "endmonth": 1, "endday": 15},
31*nyears//2 + 1)
]
for test, expected in tests:
flags = flagger.initFlags(data)
data, flags = flagSesonalRange(data, flags, field, flagger, **test)
flagged = flagger.isFlagged(flags, field)
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_clearFlags(data, field, flagger):
orig = flagger.initFlags(data)
flags = flagger.setFlags(orig, field, flag=flagger.BAD)
_, cleared = clearFlags(data, flags, field, flagger)
assert np.all(orig != flags)
assert np.all(orig == cleared)
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_forceFlags(data, flagger):
field, *_ = data.columns
flags = flagger.setFlags(flagger.initFlags(data), field)
orig = flags.copy()
_, forced = forceFlags(data, flags, field, flagger, flag=flagger.GOOD)
assert np.all(flagger.getFlags(orig) != flagger.getFlags(forced))
@pytest.mark.parametrize('flagger', TESTFLAGGER)
def test_flagIsolated(data, flagger):
field = data.columns[0]
data.iloc[1:3, 0] = np.nan
data.iloc[4:5, 0] = np.nan
data.iloc[11:13, 0] = np.nan
data.iloc[15:17, 0] = np.nan
flags = flagger.initFlags(data)
flags = flagger.setFlags(flags, field, iloc=slice(5, 6))
data, flags = flagIsolated(data, flags, field, flagger, '2.1D', drop_flags='BAD')
assert flagger.isFlagged(flags,field)[slice(3, 6, 2)].all()
flags = flagger.setFlags(flags, field, iloc=slice(3,4), flag=flagger.UNFLAGGED, force=True)
data, flags = flagIsolated(data, flags, field, flagger, '2.1D', max_isolated_group_size=2,
continuation_range='1.1D', drop_flags='BAD')
assert flagger.isFlagged(flags, field)[[3, 5, 13, 14]].all()