Skip to content
Snippets Groups Projects
Commit 93f41f4c authored by David Schäfer's avatar David Schäfer
Browse files

heavy repo restructring

parent 39d932e4
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,7 @@ import numpy as np
import pandas as pd
from .config import Fields, Params
from ..dsl.parser import evalExpression
from .evaluator import evalExpression
from ..lib.plotting import plot
from ..lib.tools import setup
......
File moved
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# from .evaluator import evalExpression
# from .parser import parseFlag
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import ast
import copy
import numbers
import operator as op
from numbers import Number
import numpy as np
import pandas as pd
from ..flagger import BaseFlagger
# supported operators
OPERATORS = {
ast.Add: op.add, ast.Sub: op.sub,
ast.Mult: op.mul, ast.Div: op.truediv,
ast.Pow: op.pow,
ast.USub: op.neg,
ast.NotEq: op.ne, ast.Eq: op.eq,
ast.Gt: op.gt, ast.GtE: op.ge,
ast.Lt: op.lt, ast.LtE: op.le,
ast.BitAnd: op.and_, ast.BitOr: op.or_, ast.BitXor: op.xor,
ast.Invert: op.invert
}
def initFunctionNamespace(nodata, flagger):
def isflagged(flags, flag=None, comparator=">"):
if flag is None:
return flagger.isFlagged(flags, flag, comparator)
return flagger.isFlagged(flags, flag, "==")
return {
"abs": (abs, "data"),
"max": (max, "data"),
"min": (min, "data"),
"mean": (np.mean, "data"),
"sum": (np.sum, "data"),
"std": (np.std, "data"),
"len": (len, "data"),
"ismissing": (lambda d: ((d == nodata) | pd.isnull(d)), "data"),
"isflagged": (isflagged, "flags")
}
def _raiseNameError(name, expr):
raise NameError(
"name '{:}' is not definied (failing expression: '{:}')"
.format(name, expr))
def evalExpression(expr: str, flagger: BaseFlagger,
data: pd.DataFrame, flags: pd.DataFrame,
field: str, nodata: Number = np.nan,
**namespace: dict) -> np.ndarray:
def _eval(node, namespace):
# type: (ast.Node, dict) -> None
# the namespace dictionary should provide the data frame for the device
# being processed and any additional variables (e.g. NODTA, this)
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.UnaryOp):
return OPERATORS[type(node.op)](
_eval(node.operand, namespace))
elif isinstance(node, ast.BinOp):
return OPERATORS[type(node.op)](
_eval(node.left, namespace),
_eval(node.right, namespace))
elif isinstance(node, ast.Compare):
# NOTE: chained comparison not supported yet
op = OPERATORS[node.ops[0].__class__]
out = op(_eval(node.left, namespace),
_eval(node.comparators[0], namespace))
return out
elif isinstance(node, ast.Call):
# functions out of math are allowed
# kwargs not supported yet
try:
func, target = FUNCTIONS[node.func.id]
except KeyError:
_raiseNameError(node.func.id, expr)
args = [_eval(n, {**namespace, **{"target": target}}) for n in node.args]
return func(*args)
elif isinstance(node, ast.Name): # <variable>
field = namespace.get(node.id, node.id)
if isinstance(field, numbers.Number):
# name is not referring to an DataFrame field
return field
fidx = namespace["flags"].columns
if isinstance(fidx, pd.MultiIndex):
fcols = fidx.get_level_values(0).unique()
else:
fcols = fidx.values
dcols = namespace["data"].columns.values
try:
if field in fcols:
flagcol = namespace["flags"][field]
out = flagcol
if namespace.get("target") != "flags":
if field in dcols:
datacol = namespace["data"][field]
out = datacol
if field in dcols and field in fcols:
out = np.ma.masked_array(datacol, mask=flagger.isFlagged(flagcol))
except KeyError:
_raiseNameError(field, expr)
return out
else:
raise TypeError(node)
FUNCTIONS = initFunctionNamespace(nodata, flagger)
namespace = {**namespace,
**{"data": data, "flags": flags, "this": field}}
return _eval(ast.parse(expr, mode='eval').body, namespace)
# field = namespace["this"]
# flags = flag_func(flags=namespace["flags"].loc[to_flag_idx, field])
# namespace["flags"].loc[to_flag_idx, field] = flags
# return namespace
from .common import *
from .test_core import *
from .flagger import *
from .dsl import *
from .funcs import *
File moved
......@@ -7,7 +7,7 @@ import pandas as pd
from saqc.funcs import register
from saqc.core.core import runner, flagNext, flagPeriod
from saqc.core.config import Fields as F
from .common import initData, initMetaDict, TESTFLAGGER
from test.common import initData, initMetaDict, TESTFLAGGER
@pytest.fixture
......
......@@ -4,7 +4,7 @@
import pytest
import numpy as np
from saqc.dsl.parser import (
from saqc.core.evaluator import (
compileTree,
parseExpression,
initDslFuncMap,
......
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import pytest
import numpy as np
from ..common import initData
from saqc.flagger.simpleflagger import SimpleFlagger
from saqc.dsl.parser import evalExpression
@pytest.fixture
def data():
return initData(3)
# def test_missingIdentifier():
# data = initData()
# flagger = SimpleFlagger()
# flags = flagger.initFlags(data)
# tests = ["func(var2) < 5", "var3 != NODATA"]
# for test in tests:
# with pytest.raises(NameError):
# evalExpression(test, flagger, data, flags, data.columns[0])
# def test_flagPropagation():
# data = initData()
# flagger = SimpleFlagger()
# flags = flagger.initFlags(data)
# flags = flagger.setFlags(flags, 'var2', iloc=slice(None, None, 5))
# var1, var2, *_ = data.columns
# var2_flags = flagger.isFlagged(flags[var2])
# var2_data = data[var2].mask(var2_flags)
# result = evalExpression("var2 < mean(var2)",
# flagger,
# data, flags,
# data.columns[0])
# expected = (var2_flags | (var2_data < var2_data.mean()))
# assert (result.filled(True) == expected).all()
# def test_isflagged():
# data = initData(cols=1)
# flagger = SimpleFlagger()
# flags = flagger.initFlags(data)
# flags = flagger.setFlags(flags, 'var1', iloc=slice(None, None, 5), flag=flagger.BAD)
# flags = flagger.setFlags(flags, 'var1', iloc=slice(1, None, 5), flag=flagger.GOOD)
# tests = {
# "isflagged(this)" : flagger.isFlagged(flags, flagger.GOOD, ">"),
# f"isflagged(this, {flagger.GOOD})" : flagger.isFlagged(flags, flagger.GOOD, "=="),
# # NOTE: _ast.Str is not implemented, not sure if we should do so
# # f"isflagged(this, {flagger.GOOD}, '<')" : flagger.isFlagged(flags, flagger.GOOD, "<"),
# }
# for expr, right in tests.items():
# left = evalExpression(expr, flagger, data, flags, data.columns[0])
# assert np.all(left.to_frame() == right)
......@@ -5,9 +5,9 @@ import pytest
import numpy as np
import pandas as pd
from ..common import initData, TESTFLAGGER
from test.common import initData, TESTFLAGGER
from saqc.dsl.parser import (
from saqc.core.evaluator import (
DslTransformer,
initDslFuncMap,
parseExpression,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment