From e0f35eb3d3a186fec02ee3fb49e29a41e8364c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Sch=C3=A4fer?= <david.schaefer@ufz.de> Date: Tue, 20 Dec 2022 09:56:04 +0100 Subject: [PATCH] Translation cleanups --- CHANGELOG.md | 3 + saqc/core/core.py | 2 +- saqc/core/flags.py | 6 +- saqc/core/translation/__init__.py | 1 + saqc/core/translation/basescheme.py | 69 ++++++++++++++++------- saqc/core/translation/dmpscheme.py | 10 ++-- saqc/core/translation/positionalscheme.py | 8 +-- tests/core/test_translator.py | 20 +++---- 8 files changed, 74 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b28a550b0..5991caea1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ SPDX-License-Identifier: GPL-3.0-or-later ### Added - add option to not overwrite existing flags to `concatFlags` ### Changed +- Remove all flag value restrictions from the default flagging scheme `FloatTranslator` +- Renamed `TranslationScheme.forward` to `TranslationScheme.toInternal` +- Renamed `TranslationScheme.backward` to `TranslationScheme.toExternal` ### Removed ### Fixed diff --git a/saqc/core/core.py b/saqc/core/core.py index abb47c7a5..021f22f08 100644 --- a/saqc/core/core.py +++ b/saqc/core/core.py @@ -110,7 +110,7 @@ class SaQC(FunctionsMixin): @property def flags(self) -> MutableMapping: - flags = self._scheme.backward(self._flags, attrs=self._attrs, raw=True) + flags = self._scheme.toExternal(self._flags, attrs=self._attrs) flags.attrs = self._attrs.copy() return flags diff --git a/saqc/core/flags.py b/saqc/core/flags.py index a303c05f8..a16025075 100644 --- a/saqc/core/flags.py +++ b/saqc/core/flags.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import DefaultDict, Dict, Iterable, Mapping, Optional, Tuple, Type, Union +from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union import numpy as np import pandas as pd @@ -191,9 +191,7 @@ class Flags: 2 -inf 25.0 25.0 0.0 99.0 """ - def __init__( - self, raw_data: Optional[Union[DictLike, Flags]] = None, copy: bool = False - ): + def __init__(self, raw_data: DictLike | Flags | None = None, copy: bool = False): self._data: dict[str, History] diff --git a/saqc/core/translation/__init__.py b/saqc/core/translation/__init__.py index 45aa07796..c40914611 100644 --- a/saqc/core/translation/__init__.py +++ b/saqc/core/translation/__init__.py @@ -7,6 +7,7 @@ # -*- coding: utf-8 -*- from saqc.core.translation.basescheme import ( FloatScheme, + MappingScheme, SimpleScheme, TranslationScheme, ) diff --git a/saqc/core/translation/basescheme.py b/saqc/core/translation/basescheme.py index 1b42a8eff..c9b2b3864 100644 --- a/saqc/core/translation/basescheme.py +++ b/saqc/core/translation/basescheme.py @@ -8,6 +8,7 @@ from __future__ import annotations +from abc import abstractmethod, abstractproperty from typing import Any, Dict import numpy as np @@ -22,7 +23,26 @@ ForwardMap = Dict[ExternalFlag, float] BackwardMap = Dict[float, ExternalFlag] -class TranslationScheme: +class TranslationScheme: # pragma: no cover + @property + @abstractmethod + def DFILTER_DEFAULT(self): + pass + + @abstractmethod + def __call__(self, flag: ExternalFlag) -> float: + pass + + @abstractmethod + def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags: + pass + + @abstractmethod + def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries: + pass + + +class MappingScheme(TranslationScheme): """ This class provides the basic translation mechanism and should serve as a base class for every other translation scheme. @@ -81,7 +101,7 @@ class TranslationScheme: @staticmethod def _translate( - flags: Flags | pd.DataFrame | pd.Series, + flags: Flags | pd.DataFrame | pd.Series | DictOfSeries, trans_map: ForwardMap | BackwardMap, ) -> DictOfSeries: """ @@ -95,7 +115,7 @@ class TranslationScheme: Returns ------- - pd.DataFrame, Flags + DictOfSeries """ if isinstance(flags, pd.Series): flags = flags.to_frame() @@ -128,9 +148,9 @@ class TranslationScheme: if flag not in self._backward: raise ValueError(f"invalid flag: {flag}") return float(flag) - return self._forward[flag] + return float(self._forward[flag]) - def forward(self, flags: pd.DataFrame) -> Flags: + def toInternal(self, flags: pd.DataFrame | DictOfSeries | pd.Series) -> Flags: """ Translate from 'external flags' to 'internal flags' @@ -145,13 +165,11 @@ class TranslationScheme: """ return Flags(self._translate(flags, self._forward)) - def backward( + def toExternal( self, flags: Flags, - raw: bool = False, attrs: dict | None = None, - **kwargs, - ) -> pd.DataFrame | DictOfSeries: + ) -> DictOfSeries: """ Translate from 'internal flags' to 'external flags' @@ -160,9 +178,6 @@ class TranslationScheme: flags : pd.DataFrame The external flags to translate - raw: bool, default False - if True return data as DictOfSeries, otherwise as pandas DataFrame. - attrs : dict or None, default None global meta information of saqc-object @@ -172,8 +187,6 @@ class TranslationScheme: """ out = self._translate(flags, self._backward) out.attrs = attrs or {} - if not raw: - out = out.to_df() return out @@ -184,16 +197,30 @@ class FloatScheme(TranslationScheme): internal float flags """ - _MAP = { - -np.inf: -np.inf, - **{k: k for k in np.arange(0, 256, dtype=float)}, - } + DFILTER_DEFAULT: float = FILTER_ALL - def __init__(self): - super().__init__(self._MAP, self._MAP) + def __call__(self, flag: float | int) -> float: + + try: + return float(flag) + except (TypeError, ValueError, OverflowError): + raise ValueError(f"invalid flag, expected a numerical value, got: {flag}") + + def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags: + try: + return Flags(flags.astype(float)) + except (TypeError, ValueError, OverflowError): + raise ValueError( + f"invalid flag(s), expected a collection of numerical values, got: {flags}" + ) + + def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries: + out = flags.toDios() + out.attrs = attrs or {} + return out -class SimpleScheme(TranslationScheme): +class SimpleScheme(MappingScheme): """ Acts as the default Translator, provides a changeable subset of the diff --git a/saqc/core/translation/dmpscheme.py b/saqc/core/translation/dmpscheme.py index f5354871f..c8598a8fa 100644 --- a/saqc/core/translation/dmpscheme.py +++ b/saqc/core/translation/dmpscheme.py @@ -17,7 +17,7 @@ import pandas as pd from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED from saqc.core.flags import Flags from saqc.core.history import History -from saqc.core.translation.basescheme import BackwardMap, ForwardMap, TranslationScheme +from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme _QUALITY_CAUSES = [ "", @@ -40,7 +40,7 @@ _QUALITY_LABELS = [ ] -class DmpScheme(TranslationScheme): +class DmpScheme(MappingScheme): """ Implements the translation from and to the flagging scheme implemented in @@ -91,7 +91,7 @@ class DmpScheme(TranslationScheme): field_history.append(histcol, meta=meta) return field_history - def forward(self, df: pd.DataFrame) -> Flags: + def toInternal(self, df: pd.DataFrame) -> Flags: """ Translate from 'external flags' to 'internal flags' @@ -114,7 +114,7 @@ class DmpScheme(TranslationScheme): return Flags(data) - def backward( + def toExternal( self, flags: Flags, attrs: dict | None = None, **kwargs ) -> pd.DataFrame: """ @@ -131,7 +131,7 @@ class DmpScheme(TranslationScheme): ------- translated flags """ - tflags = super().backward(flags, raw=True, attrs=attrs) + tflags = super().toExternal(flags, attrs=attrs) out = pd.DataFrame( index=reduce(lambda x, y: x.union(y), tflags.indexes).sort_values(), diff --git a/saqc/core/translation/positionalscheme.py b/saqc/core/translation/positionalscheme.py index 1fb581d56..e4dea64b3 100644 --- a/saqc/core/translation/positionalscheme.py +++ b/saqc/core/translation/positionalscheme.py @@ -12,10 +12,10 @@ import pandas as pd from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED from saqc.core.flags import Flags, History -from saqc.core.translation.basescheme import BackwardMap, ForwardMap, TranslationScheme +from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme -class PositionalScheme(TranslationScheme): +class PositionalScheme(MappingScheme): """ Implements the translation from and to the flagging scheme implemented by CHS @@ -43,7 +43,7 @@ class PositionalScheme(TranslationScheme): def __init__(self): super().__init__(forward=self._FORWARD, backward=self._BACKWARD) - def forward(self, flags: pd.DataFrame) -> Flags: + def toInternal(self, flags: pd.DataFrame) -> Flags: """ Translate from 'external flags' to 'internal flags' @@ -75,7 +75,7 @@ class PositionalScheme(TranslationScheme): return Flags(data) - def backward(self, flags: Flags, **kwargs) -> pd.DataFrame: + def toExternal(self, flags: Flags, **kwargs) -> pd.DataFrame: """ Translate from 'internal flags' to 'external flags' diff --git a/tests/core/test_translator.py b/tests/core/test_translator.py index 61d789e40..9d4a6c706 100644 --- a/tests/core/test_translator.py +++ b/tests/core/test_translator.py @@ -16,7 +16,7 @@ import pytest from saqc.constants import BAD, DOUBTFUL, FILTER_NONE, UNFLAGGED from saqc.core.core import SaQC from saqc.core.flags import Flags -from saqc.core.translation import DmpScheme, PositionalScheme, TranslationScheme +from saqc.core.translation import DmpScheme, MappingScheme, PositionalScheme from tests.common import initData @@ -27,7 +27,7 @@ def _genTranslators(): dtype(-1): BAD, **{dtype(f * 10): float(f) for f in range(10)}, } - scheme = TranslationScheme(flags, {v: k for k, v in flags.items()}) + scheme = MappingScheme(flags, {v: k for k, v in flags.items()}) yield flags, scheme @@ -60,7 +60,7 @@ def test_backwardTranslation(): for _, scheme in _genTranslators(): keys = tuple(scheme._backward.keys()) flags = _genFlags({field: np.array(keys)}) - translated = scheme.backward(flags) + translated = scheme.toExternal(flags) expected = set(scheme._backward.values()) assert not (set(translated[field]) - expected) @@ -72,7 +72,7 @@ def test_backwardTranslationFail(): # add an scheme invalid value to the flags flags = _genFlags({field: np.array(keys + (max(keys) + 1,))}) with pytest.raises(ValueError): - scheme.backward(flags) + scheme.toExternal(flags) def test_dmpTranslator(): @@ -94,7 +94,7 @@ def test_dmpTranslator(): {"func": "flagFoo", "kwargs": {"cause": "BELOW_OR_ABOVE_MIN_MAX"}} ) - tflags = scheme.backward(flags) + tflags = scheme.toExternal(flags) assert set(tflags.columns.get_level_values(1)) == { "quality_flag", @@ -137,7 +137,7 @@ def test_positionalTranslator(): flags[1::3, "var1"] = DOUBTFUL flags[2::3, "var1"] = BAD - tflags = scheme.backward(flags) + tflags = scheme.toExternal(flags) assert (tflags["var2"].replace(-9999, np.nan).dropna() == 90).all(axis=None) assert (tflags["var1"].iloc[1::3] == 90210).all(axis=None) assert (tflags["var1"].iloc[2::3] == 90002).all(axis=None) @@ -156,7 +156,7 @@ def test_positionalTranslatorIntegration(): for field in flags.columns: assert flags[field].astype(str).str.match("^9[012]*$").all() - round_trip = scheme.backward(scheme.forward(flags)) + round_trip = scheme.toExternal(scheme.toInternal(flags)) assert (flags.values == round_trip.values).all() assert (flags.index == round_trip.index).all() @@ -183,7 +183,7 @@ def test_dmpTranslatorIntegration(): assert qfunc.isin({"", "flagMissing", "flagRange"}).all(axis=None) assert (qcause[qflags[col] == "BAD"] == "OTHER").all(axis=None) - round_trip = scheme.backward(scheme.forward(flags)) + round_trip = scheme.toExternal(scheme.toInternal(flags)) assert round_trip.xs("quality_flag", axis="columns", level=1).equals(qflags) @@ -276,8 +276,8 @@ def test_positionalMulitcallsPreserveState(): scheme = PositionalScheme() flags1 = saqc1._flags flags2 = saqc2._flags - tflags1 = scheme.backward(flags1).astype(str) - tflags2 = scheme.backward(flags2).astype(str) + tflags1 = scheme.toExternal(flags1).astype(str) + tflags2 = scheme.toExternal(flags2).astype(str) for k in flags2.columns: expected = tflags1[k].str.slice(start=1) * 2 -- GitLab