Skip to content
Snippets Groups Projects
Commit 73f2602a authored by David Schäfer's avatar David Schäfer
Browse files

add experimental method to write ISO 19157 compatible flags

parent 53c7e616
No related branches found
No related tags found
1 merge request!712Draft: add experimental method to write ISO 19157 compatible flags
Pipeline #178458 passed with stages
in 6 minutes and 53 seconds
......@@ -6,6 +6,7 @@
from __future__ import annotations
import json
import typing
import warnings
from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union, overload
......@@ -13,7 +14,8 @@ from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union, ove
import numpy as np
import pandas as pd
from saqc.core import DictOfSeries, History
from saqc.constants import UNFLAGGED
from saqc.core import DictOfSeries, History, history
_VAL = Union[pd.Series, History]
DictLike = Union[
......@@ -505,6 +507,44 @@ class Flags:
def __repr__(self) -> str:
return str(DictOfSeries(self)).replace("DictOfSeries", type(self).__name__)
def _toISO19157(self):
"""
ISO 19157 comapatible flags.
NOTE: This feature is experimental and might change without further notice.
"""
from saqc import SaQC
MEASURES = {"flagMissing": 4, "flagRange": 14}
out = DictOfSeries()
for col, history in self._data.items():
flags = history._hist.astype(float).fillna(UNFLAGGED)
meta = history._meta
cols = {}
for i, meta_element in enumerate(meta):
func = meta_element["func"]
dfilter = meta_element["kwargs"]["dfilter"]
measure_id = MEASURES.get(func, 8)
# as soon as we defined catorgires, this should be done by a dictionary lookup
measure_description = (
getattr(SaQC, func).__qualname__.split(".")[0].replace("Mixin", "")
)
timestamp = pd.Timestamp.now().strftime("%Y-%m-%dT%H-%M-%S")
# we consider every flag > dfilter to be a failing test
cols[i] = (flags[i] <= dfilter).apply(
lambda f: {
"measureIdentification": measure_id,
"measureDescription": measure_description,
"dateTime": timestamp,
"evaluationMethodType": "directInternal",
"result": {"explanation": meta_element["kwargs"], "pass": f},
}
)
out[col] = pd.DataFrame(cols).apply(
lambda row: json.dumps(row.tolist()), axis=1
)
return out
def initFlagsLike(
reference: Union[pd.Series, DictLike, Flags],
......
......@@ -4,6 +4,7 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
import json
from typing import Dict, Union
import numpy as np
......@@ -13,6 +14,7 @@ import pytest
import tests.core.test_history as test_hist
from saqc import UNFLAGGED
from saqc.core import DictOfSeries, Flags, History
from saqc.core.core import SaQC
_arrays = [
np.array([[]]),
......@@ -382,3 +384,44 @@ def test__getitem__listlike_and_slice(data, key, expected):
expected = Flags({k: pd.Series(v, dtype=float) for k, v in expected.items()})
is_equal(result, expected)
@pytest.mark.parametrize(
"data, pass_1, pass_2",
[
(
[1, np.nan, 3, 4, 5],
[True, False, True, True, True],
[False, True, True, True, False],
),
(
[5, np.nan, np.nan, 5, 8],
[True, False, False, True, True],
[False, True, True, False, False],
),
],
)
def test_ISO19157(data, pass_1, pass_2):
expected_1 = {
"measureIdentification": 4,
"measureDescription": "Breaks",
"evaluationMethodType": "directInternal",
"result": {"explanation": {"dfilter": -np.inf, "field": "a"}},
}
expected_2 = {
"measureIdentification": 14,
"measureDescription": "Outliers",
"evaluationMethodType": "directInternal",
"result": {
"explanation": {"min": 2, "max": 4, "dfilter": -np.inf, "field": "a"},
},
}
data = pd.DataFrame({"a": data})
qc = SaQC(data).flagMissing("a").flagRange("a", min=2, max=4)
flags = qc._flags._toISO19157()["a"].apply(json.loads)
for (f1, f2), p1, p2 in zip(flags, pass_1, pass_2):
expected_1["result"]["pass"] = p1
expected_2["result"]["pass"] = p2
assert expected_1.items() <= f1.items()
assert expected_2.items() <= f2.items()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment