add experimental method to write ISO 19157 compatible flags

73f2602a · David Schäfer · 53c7e616 · 73f2602a · 73f2602a
Commit 73f2602a authored 1 year ago by David Schäfer
--- a/saqc/core/flags.py
+++ b/saqc/core/flags.py
@@ -6,6 +6,7 @@

 from __future__ import annotations

+import json
 import typing
 import warnings
 from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union, overload
@@ -13,7 +14,8 @@ from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union, ove
 import numpy as np
 import pandas as pd

-from saqc.core import DictOfSeries, History
+from saqc.constants import UNFLAGGED
+from saqc.core import DictOfSeries, History, history

 _VAL = Union[pd.Series, History]
 DictLike = Union[
@@ -505,6 +507,44 @@ class Flags:
    def __repr__(self) -> str:
        return str(DictOfSeries(self)).replace("DictOfSeries", type(self).__name__)

+    def _toISO19157(self):
+        """
+        ISO 19157 comapatible flags.
+
+        NOTE: This feature is experimental and might change without further notice.
+        """
+        from saqc import SaQC
+
+        MEASURES = {"flagMissing": 4, "flagRange": 14}
+        out = DictOfSeries()
+        for col, history in self._data.items():
+            flags = history._hist.astype(float).fillna(UNFLAGGED)
+            meta = history._meta
+            cols = {}
+            for i, meta_element in enumerate(meta):
+                func = meta_element["func"]
+                dfilter = meta_element["kwargs"]["dfilter"]
+                measure_id = MEASURES.get(func, 8)
+                # as soon as we defined catorgires, this should be done by a dictionary lookup
+                measure_description = (
+                    getattr(SaQC, func).__qualname__.split(".")[0].replace("Mixin", "")
+                )
+                timestamp = pd.Timestamp.now().strftime("%Y-%m-%dT%H-%M-%S")
+                # we consider every flag > dfilter to be a failing test
+                cols[i] = (flags[i] <= dfilter).apply(
+                    lambda f: {
+                        "measureIdentification": measure_id,
+                        "measureDescription": measure_description,
+                        "dateTime": timestamp,
+                        "evaluationMethodType": "directInternal",
+                        "result": {"explanation": meta_element["kwargs"], "pass": f},
+                    }
+                )
+            out[col] = pd.DataFrame(cols).apply(
+                lambda row: json.dumps(row.tolist()), axis=1
+            )
+        return out
+

 def initFlagsLike(
    reference: Union[pd.Series, DictLike, Flags],

--- a/tests/core/test_flags.py
+++ b/tests/core/test_flags.py
@@ -4,6 +4,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later

+import json
 from typing import Dict, Union

 import numpy as np
@@ -13,6 +14,7 @@ import pytest
 import tests.core.test_history as test_hist
 from saqc import UNFLAGGED
 from saqc.core import DictOfSeries, Flags, History
+from saqc.core.core import SaQC

 _arrays = [
    np.array([[]]),
@@ -382,3 +384,44 @@ def test__getitem__listlike_and_slice(data, key, expected):

    expected = Flags({k: pd.Series(v, dtype=float) for k, v in expected.items()})
    is_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data, pass_1, pass_2",
+    [
+        (
+            [1, np.nan, 3, 4, 5],
+            [True, False, True, True, True],
+            [False, True, True, True, False],
+        ),
+        (
+            [5, np.nan, np.nan, 5, 8],
+            [True, False, False, True, True],
+            [False, True, True, False, False],
+        ),
+    ],
+)
+def test_ISO19157(data, pass_1, pass_2):
+    expected_1 = {
+        "measureIdentification": 4,
+        "measureDescription": "Breaks",
+        "evaluationMethodType": "directInternal",
+        "result": {"explanation": {"dfilter": -np.inf, "field": "a"}},
+    }
+    expected_2 = {
+        "measureIdentification": 14,
+        "measureDescription": "Outliers",
+        "evaluationMethodType": "directInternal",
+        "result": {
+            "explanation": {"min": 2, "max": 4, "dfilter": -np.inf, "field": "a"},
+        },
+    }
+
+    data = pd.DataFrame({"a": data})
+    qc = SaQC(data).flagMissing("a").flagRange("a", min=2, max=4)
+    flags = qc._flags._toISO19157()["a"].apply(json.loads)
+    for (f1, f2), p1, p2 in zip(flags, pass_1, pass_2):
+        expected_1["result"]["pass"] = p1
+        expected_2["result"]["pass"] = p2
+        assert expected_1.items() <= f1.items()
+        assert expected_2.items() <= f2.items()