Skip to content
Snippets Groups Projects
test_flagger.py 4.83 KiB
Newer Older
Bert Palm's avatar
Bert Palm committed
#!/usr/bin/env python

__author__ = "Bert Palm"
__email__ = "bert.palm@ufz.de"
__copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"

import pytest
import numpy as np
import pandas as pd

from saqc.flagger.baseflagger import BaseFlagger
from saqc.flagger.dmpflagger import DmpFlagger
from saqc.flagger.simpleflagger import SimpleFlagger

from pandas.core.indexing import IndexingError

from saqc.funcs.functions import flagRange, flagSesonalRange, forceFlags, clearFlags

TESTFLAGGERS = [
    BaseFlagger(['NIL', 'GOOD', 'BAD']),
    DmpFlagger(),
    SimpleFlagger()]


@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_initFlags(flagger):
    field = 'testdata'
    index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
    data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
    flags = flagger.initFlags(data)

    assert len(flags) == 100
    assert isinstance(flags, pd.DataFrame)


@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_getsetFlags(flagger):
    field = 'testdata'
    index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
    data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
    flags = flagger.initFlags(data)

    flags = flagger.setFlags(flags, field, flag=flagger.GOOD)
    flagged = flagger.getFlags(flags)[field]
    assert isinstance(flagged.dtype, pd.CategoricalDtype)
    assert (flagged == flagger.GOOD).all()

    flags = flagger.setFlags(flags, field, flag=flagger.BAD)
    flagged = flagger.getFlags(flags)[field]
    assert (flagged == flagger.BAD).all()

    flags = flagger.setFlags(flags, field, flag=flagger.GOOD)
    flagged = flagger.getFlags(flags)[field]
    assert (flagged == flagger.BAD).all()


@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_setFlags_isFlagged(flagger, **kwargs):
    field = 'testdata'
    index = pd.date_range(start='2011-01-01', end='2011-01-02', periods=100)
    data = pd.DataFrame(data={field: np.linspace(0, index.size - 1, index.size)}, index=index)
    flags = flagger.initFlags(data)

    d = data[field]
    mask = d < (d.max() - d.min()) // 2
    assert len(mask) == len(flags.index)

    f = flagger.setFlags(flags, field, loc=mask.values, flag=flagger.BAD)
    # test isFlagged
    isflagged = flagger.isFlagged(f[field])
    assert (isflagged == mask).all()

    # test setFlag with mask
    flagged = flagger.getFlags(f[field])
    isflagged = flagged == flagger.BAD
    assert (isflagged == mask).all()

    # ok we can use isFlagged now :D

    # test with mask and iloc
    f = flagger.setFlags(flags, field, iloc=mask.values, flag=flagger.BAD)
    isflagged = flagger.isFlagged(f[field])
    assert (isflagged == mask).all()

    try:
        m = mask[mask]
        m.iloc[0:10] = False
        m = m[m]
        f = flagger.setFlags(flags, field, loc=m, flag=flagger.BAD)
    except IndexingError:
        pass
    else:
        raise AssertionError

    # test setFlags with loc and index
    idx = mask[mask].index
    assert len(idx) < len(flags.index)
    f = flagger.setFlags(flags, field, loc=idx, flag=flagger.BAD)
    isflagged = flagger.isFlagged(f[field])
    assert (isflagged == mask).all()

    # test setFlags with iloc and index
    idx = mask[mask].reset_index(drop=True).index
    assert len(idx) < len(flags.index)
    f = flagger.setFlags(flags, field, iloc=idx, flag=flagger.BAD)
    isflagged = flagger.isFlagged(f[field])
    assert (isflagged == mask).all()

    # test passing a series of flags as flag-arg
    every = 5
    flagseries = pd.Series(data=flagger.GOOD, index=flags.index)
    flagseries.iloc[::every] = flagger.BAD
    flagseries = flagseries.astype(flagger.flags)
    idx = mask[mask].index
    assert len(flags) == len(flagseries)
    assert len(flags) != len(idx)
    f = flagger.setFlags(flags, field, loc=idx, flag=flagseries)
    bads = flagger.isFlagged(f[field], flag=flagger.BAD, comparator='==')
    bads = bads[bads]
    valid = mask[mask].iloc[::every]
    assert len(valid) == len(bads) and (valid == bads).all()

    # test passing a series of flags as flag-arg and force
    f = flagger.setFlags(flags, field, flag=flagger.BAD)
    every = 5
    flagseries = pd.Series(data=flagger.GOOD, index=flags.index)
    flagseries.iloc[::every] = flagger.UNFLAGGED
    flagseries = flagseries.astype(flagger.flags)
    idx = mask[mask].index
    assert len(flags) == len(flagseries)
    assert len(flags) != len(idx)
    f = flagger.setFlags(f, field, loc=idx, flag=flagseries, force=True)
    unflagged = flagger.isFlagged(f[field], flag=flagger.UNFLAGGED, comparator='==')
    unflagged = unflagged[unflagged]
    valid = mask[mask].iloc[::every]
    assert len(valid) == len(unflagged) and (valid == unflagged).all()




if __name__ == '__main__':
    flagger = TESTFLAGGERS[0]
    test_setFlags_isFlagged(flagger)
    print('done')