Skip to content
Snippets Groups Projects
test_generic_functions.py 7.59 KiB
Newer Older
David Schäfer's avatar
David Schäfer committed
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import pytest
import numpy as np
import pandas as pd

David Schäfer's avatar
David Schäfer committed
from test.common import initData, TESTFLAGGER, TESTNODATA
David Schäfer's avatar
David Schäfer committed
from saqc.core.core import run
from saqc.core.config import Fields as F

from test.common import initData, TESTFLAGGER, TESTNODATA, initMetaDict, initMetaString
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
from saqc.core.evaluator import (
David Schäfer's avatar
David Schäfer committed
    DslTransformer,
    initLocalEnv,
David Schäfer's avatar
David Schäfer committed
    parseExpression,
    evalExpression,
David Schäfer's avatar
David Schäfer committed
    compileTree,
    evalCode,
)
def _evalDslExpression(expr, data, field, flagger, nodata=np.nan):
    env = initLocalEnv(data, field, flagger, nodata)
David Schäfer's avatar
David Schäfer committed
    tree = parseExpression(expr)
    transformed_tree = DslTransformer(env).visit(tree)
David Schäfer's avatar
David Schäfer committed
    code = compileTree(transformed_tree)
    return evalCode(code, local_env=env)
David Schäfer's avatar
David Schäfer committed
@pytest.fixture
def data():
    return initData()


# @pytest.mark.parametrize("flagger", TESTFLAGGER)
# def test_flagPropagation(data, flagger):
#     var1, var2, *_ = data.columns
#     this = var1
#     flagger = flagger.initFlags(data).setFlags(var2, iloc=slice(None, None, 5))
#     var2_flags = flagger.isFlagged(var2)
#     var2_data = data[var2].mask(var2_flags)
#     data, flagger_result = evalExpression(
#         "flagGeneric(func=var2 < mean(var2))", data, this, flagger, np.nan
#     )
#     expected = var2_flags | (var2_data < var2_data.mean())
#     result = flagger_result.isFlagged(this)
#     assert (result == expected).all()


@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_missingIdentifier(data, flagger):
David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
        "flagGeneric(func=fff(var2) < 5)",
        "flagGeneric(func=var3 != NODATA)"
    for expr in tests:
        with pytest.raises(NameError):
David Schäfer's avatar
David Schäfer committed
            evalExpression(expr, data, data.columns[0], flagger, np.nan)
David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_comparisonOperators(data, flagger):
David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed
    var1, var2, *_ = data.columns
    this = var1

    tests = [
        ("this > 100", data[this] > 100),
        (f"10 >= {var2}", 10 >= data[var2]),
        (f"{var2} < 100", data[var2] < 100),
        (f"this <= {var2}", data[this] <= data[var2]),
        (f"{var1} == {var2}", data[this] == data[var2]),
        (f"{var1} != {var2}", data[this] != data[var2]),
David Schäfer's avatar
David Schäfer committed

    # check within the usually enclosing scope
    for expr, mask in tests:
David Schäfer's avatar
David Schäfer committed
        _, result_flagger = evalExpression(
            f"flagGeneric(func={expr})", data, this, flagger, np.nan
        expected_flagger = flagger.setFlags(this, loc=mask, test="generic")
Bert Palm's avatar
Bert Palm committed
        assert (result_flagger.isFlagged() == expected_flagger.isFlagged()).all(None)
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_arithmeticOperators(data, flagger):
    flagger = flagger.initFlags(data)
    var1, *_ = data.columns
    this = data[var1]

    tests = [
        ("this + 100 > 110", this + 100 > 110),
        ("this - 100 > 0", this - 100 > 0),
        ("this * 100 > 200", this * 100 > 200),
        ("this / 100 > .1", this / 100 > .1),
        ("this % 2 == 1", this % 2 == 1),
        ("this ** 2 == 0", this ** 2 == 0),
    ]

    # check within the usually enclosing scope
    for expr, mask in tests:
        _, result_flagger = evalExpression(
            f"flagGeneric(func={expr})", data, var1, flagger, np.nan
        )
        expected_flagger = flagger.setFlags(var1, loc=mask, test="generic")
Bert Palm's avatar
Bert Palm committed
        assert (result_flagger.isFlagged() == expected_flagger.isFlagged()).all(None)
David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_nonReduncingBuiltins(data, flagger):
David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed
    var1, *_ = data.columns
David Schäfer's avatar
David Schäfer committed
    this = var1

    tests = [
        ("abs(this)", np.abs(data[this])),
    ]

    for expr, expected in tests:
        result = _evalDslExpression(expr, data, this, flagger)
David Schäfer's avatar
David Schäfer committed
        assert (result == expected).all()


David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("nodata", TESTNODATA)
def test_reduncingBuiltins(data, flagger, nodata):
    data.loc[::4] = nodata
David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed
    var1, var2, *_ = data.columns
    this = var1

    tests = [
        ("min(this)", np.min(data[this])),
        (f"max({var1})", np.max(data[var1])),
        (f"sum({var2})", np.sum(data[var2])),
        ("mean(this)", np.mean(data[this])),
        (f"std({var1})", np.std(data[var1])),
        (f"len({var2})", len(data[var2])),
    ]

    for expr, expected in tests:
        result = _evalDslExpression(expr, data, this, flagger, nodata)
David Schäfer's avatar
David Schäfer committed
        assert result == expected
David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("nodata", TESTNODATA)
David Schäfer's avatar
David Schäfer committed
def test_ismissing(data, flagger, nodata):

Bert Palm's avatar
Bert Palm committed
    data.iloc[: data.lengths[0] // 2, 0] = np.nan
    data.iloc[(data.lengths[0] // 2) + 1 :, 0] = -9999
David Schäfer's avatar
David Schäfer committed
    var1, *_ = data.columns
David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed

    tests = [
Bert Palm's avatar
Bert Palm committed
        (f"ismissing({var1})", lambda data: (data.isna() | (data == nodata)).all()),
David Schäfer's avatar
David Schäfer committed
        (f"~ismissing({var1})", lambda data: ~(data.isna() | (data == nodata)).all(),),
David Schäfer's avatar
David Schäfer committed
    ]

    for expr, checkFunc in tests:
        idx = _evalDslExpression(expr, data, var1, flagger, nodata)
David Schäfer's avatar
David Schäfer committed
        assert checkFunc(data.loc[idx, var1])


@pytest.mark.parametrize("flagger", TESTFLAGGER)
@pytest.mark.parametrize("nodata", TESTNODATA)
def test_bitOps(data, flagger, nodata):
David Schäfer's avatar
David Schäfer committed
    var1, var2, *_ = data.columns
David Schäfer's avatar
David Schäfer committed
    this = var1
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
    tests = [
        (f"flagGeneric(func=~(this > mean(this)))", ~(data[this] > np.nanmean(data[this]))),
            f"flagGeneric(func=(this <= 0) | (0 < {var1}))",
            (data[this] <= 0) | (0 < data[var1]),
        ),
        (
            f"flagGeneric(func=({var2} >= 0) & (0 > this))",
            (data[var2] >= 0) & (0 > data[this]),
        ),
David Schäfer's avatar
David Schäfer committed
    ]

    for expr, expected in tests:
David Schäfer's avatar
David Schäfer committed
        _, flagger_result = evalExpression(expr, data, this, flagger, nodata)
        assert (flagger_result.isFlagged(this) == expected).all()
David Schäfer's avatar
David Schäfer committed


@pytest.mark.parametrize("flagger", TESTFLAGGER)
David Schäfer's avatar
David Schäfer committed
def test_isflagged(data, flagger):
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
    flagger = flagger.initFlags(data)
David Schäfer's avatar
David Schäfer committed
    var1, var2, *_ = data.columns

David Schäfer's avatar
David Schäfer committed
    flagger = flagger.setFlags(var1, iloc=slice(None, None, 2))
    flagger = flagger.setFlags(var2, iloc=slice(None, None, 2))
David Schäfer's avatar
David Schäfer committed

    idx = _evalDslExpression(f"isflagged({var1})", data, var2, flagger)
David Schäfer's avatar
David Schäfer committed

David Schäfer's avatar
David Schäfer committed
    flagged = flagger.isFlagged(var1)
David Schäfer's avatar
David Schäfer committed
    assert (flagged == idx).all


David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_invertIsFlagged(data, flagger):

    flagger = flagger.initFlags(data)
    var1, var2, *_ = data.columns

    flagger = flagger.setFlags(var2, iloc=slice(None, None, 2))

    tests = [
        (f"~isflagged({var2})", ~flagger.isFlagged(var2)),
        (f"~({var2}>999) & (~isflagged({var2}))", ~(data[var2] > 999) & (~flagger.isFlagged(var2)))
    ]
David Schaefer's avatar
David Schaefer committed

    for expr, flags_expected in tests:
        _, flagger_result = evalExpression(
            f"flagGeneric(func={expr})", data, var1, flagger, np.nan
        )
        flags_result = flagger_result.isFlagged(var1)
Bert Palm's avatar
Bert Palm committed
        assert (flags_result == flags_expected).all(None)
David Schäfer's avatar
David Schäfer committed


David Schäfer's avatar
David Schäfer committed
@pytest.mark.parametrize("flagger", TESTFLAGGER)
David Schäfer's avatar
David Schäfer committed
def test_isflaggedArgument(data, flagger):
David Schäfer's avatar
David Schäfer committed

    var1, var2, *_ = data.columns

    flagger = flagger.initFlags(data).setFlags(
        var1, iloc=slice(None, None, 2), flag=flagger.BAD
    )
David Schäfer's avatar
David Schäfer committed

    tests = [
        (_evalDslExpression(f"isflagged({var1}, BAD)", data, var2, flagger),
         flagger.isFlagged(var1, flag=flagger.BAD)
        ),
        (_evalDslExpression(f"isflagged({var1}, UNFLAGGED, '==')", data, var2, flagger),
         flagger.isFlagged(var1, flag=flagger.UNFLAGGED, comparator="==")),
    ]

    for result, expected in tests:
Bert Palm's avatar
Bert Palm committed
        assert (result == expected).all(None)
David Schäfer's avatar
David Schäfer committed