Skip to content
Snippets Groups Projects
Commit e0f35eb3 authored by David Schäfer's avatar David Schäfer
Browse files

Translation cleanups

parent 59f8b387
No related branches found
No related tags found
3 merge requests!685Release 2.4,!684Release 2.4,!579Translation cleanups
......@@ -11,6 +11,9 @@ SPDX-License-Identifier: GPL-3.0-or-later
### Added
- add option to not overwrite existing flags to `concatFlags`
### Changed
- Remove all flag value restrictions from the default flagging scheme `FloatTranslator`
- Renamed `TranslationScheme.forward` to `TranslationScheme.toInternal`
- Renamed `TranslationScheme.backward` to `TranslationScheme.toExternal`
### Removed
### Fixed
......
......@@ -110,7 +110,7 @@ class SaQC(FunctionsMixin):
@property
def flags(self) -> MutableMapping:
flags = self._scheme.backward(self._flags, attrs=self._attrs, raw=True)
flags = self._scheme.toExternal(self._flags, attrs=self._attrs)
flags.attrs = self._attrs.copy()
return flags
......
......@@ -6,7 +6,7 @@
from __future__ import annotations
from typing import DefaultDict, Dict, Iterable, Mapping, Optional, Tuple, Type, Union
from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union
import numpy as np
import pandas as pd
......@@ -191,9 +191,7 @@ class Flags:
2 -inf 25.0 25.0 0.0 99.0
"""
def __init__(
self, raw_data: Optional[Union[DictLike, Flags]] = None, copy: bool = False
):
def __init__(self, raw_data: DictLike | Flags | None = None, copy: bool = False):
self._data: dict[str, History]
......
......@@ -7,6 +7,7 @@
# -*- coding: utf-8 -*-
from saqc.core.translation.basescheme import (
FloatScheme,
MappingScheme,
SimpleScheme,
TranslationScheme,
)
......
......@@ -8,6 +8,7 @@
from __future__ import annotations
from abc import abstractmethod, abstractproperty
from typing import Any, Dict
import numpy as np
......@@ -22,7 +23,26 @@ ForwardMap = Dict[ExternalFlag, float]
BackwardMap = Dict[float, ExternalFlag]
class TranslationScheme:
class TranslationScheme: # pragma: no cover
@property
@abstractmethod
def DFILTER_DEFAULT(self):
pass
@abstractmethod
def __call__(self, flag: ExternalFlag) -> float:
pass
@abstractmethod
def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags:
pass
@abstractmethod
def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries:
pass
class MappingScheme(TranslationScheme):
"""
This class provides the basic translation mechanism and should serve as
a base class for every other translation scheme.
......@@ -81,7 +101,7 @@ class TranslationScheme:
@staticmethod
def _translate(
flags: Flags | pd.DataFrame | pd.Series,
flags: Flags | pd.DataFrame | pd.Series | DictOfSeries,
trans_map: ForwardMap | BackwardMap,
) -> DictOfSeries:
"""
......@@ -95,7 +115,7 @@ class TranslationScheme:
Returns
-------
pd.DataFrame, Flags
DictOfSeries
"""
if isinstance(flags, pd.Series):
flags = flags.to_frame()
......@@ -128,9 +148,9 @@ class TranslationScheme:
if flag not in self._backward:
raise ValueError(f"invalid flag: {flag}")
return float(flag)
return self._forward[flag]
return float(self._forward[flag])
def forward(self, flags: pd.DataFrame) -> Flags:
def toInternal(self, flags: pd.DataFrame | DictOfSeries | pd.Series) -> Flags:
"""
Translate from 'external flags' to 'internal flags'
......@@ -145,13 +165,11 @@ class TranslationScheme:
"""
return Flags(self._translate(flags, self._forward))
def backward(
def toExternal(
self,
flags: Flags,
raw: bool = False,
attrs: dict | None = None,
**kwargs,
) -> pd.DataFrame | DictOfSeries:
) -> DictOfSeries:
"""
Translate from 'internal flags' to 'external flags'
......@@ -160,9 +178,6 @@ class TranslationScheme:
flags : pd.DataFrame
The external flags to translate
raw: bool, default False
if True return data as DictOfSeries, otherwise as pandas DataFrame.
attrs : dict or None, default None
global meta information of saqc-object
......@@ -172,8 +187,6 @@ class TranslationScheme:
"""
out = self._translate(flags, self._backward)
out.attrs = attrs or {}
if not raw:
out = out.to_df()
return out
......@@ -184,16 +197,30 @@ class FloatScheme(TranslationScheme):
internal float flags
"""
_MAP = {
-np.inf: -np.inf,
**{k: k for k in np.arange(0, 256, dtype=float)},
}
DFILTER_DEFAULT: float = FILTER_ALL
def __init__(self):
super().__init__(self._MAP, self._MAP)
def __call__(self, flag: float | int) -> float:
try:
return float(flag)
except (TypeError, ValueError, OverflowError):
raise ValueError(f"invalid flag, expected a numerical value, got: {flag}")
def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags:
try:
return Flags(flags.astype(float))
except (TypeError, ValueError, OverflowError):
raise ValueError(
f"invalid flag(s), expected a collection of numerical values, got: {flags}"
)
def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries:
out = flags.toDios()
out.attrs = attrs or {}
return out
class SimpleScheme(TranslationScheme):
class SimpleScheme(MappingScheme):
"""
Acts as the default Translator, provides a changeable subset of the
......
......@@ -17,7 +17,7 @@ import pandas as pd
from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED
from saqc.core.flags import Flags
from saqc.core.history import History
from saqc.core.translation.basescheme import BackwardMap, ForwardMap, TranslationScheme
from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme
_QUALITY_CAUSES = [
"",
......@@ -40,7 +40,7 @@ _QUALITY_LABELS = [
]
class DmpScheme(TranslationScheme):
class DmpScheme(MappingScheme):
"""
Implements the translation from and to the flagging scheme implemented in
......@@ -91,7 +91,7 @@ class DmpScheme(TranslationScheme):
field_history.append(histcol, meta=meta)
return field_history
def forward(self, df: pd.DataFrame) -> Flags:
def toInternal(self, df: pd.DataFrame) -> Flags:
"""
Translate from 'external flags' to 'internal flags'
......@@ -114,7 +114,7 @@ class DmpScheme(TranslationScheme):
return Flags(data)
def backward(
def toExternal(
self, flags: Flags, attrs: dict | None = None, **kwargs
) -> pd.DataFrame:
"""
......@@ -131,7 +131,7 @@ class DmpScheme(TranslationScheme):
-------
translated flags
"""
tflags = super().backward(flags, raw=True, attrs=attrs)
tflags = super().toExternal(flags, attrs=attrs)
out = pd.DataFrame(
index=reduce(lambda x, y: x.union(y), tflags.indexes).sort_values(),
......
......@@ -12,10 +12,10 @@ import pandas as pd
from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED
from saqc.core.flags import Flags, History
from saqc.core.translation.basescheme import BackwardMap, ForwardMap, TranslationScheme
from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme
class PositionalScheme(TranslationScheme):
class PositionalScheme(MappingScheme):
"""
Implements the translation from and to the flagging scheme implemented by CHS
......@@ -43,7 +43,7 @@ class PositionalScheme(TranslationScheme):
def __init__(self):
super().__init__(forward=self._FORWARD, backward=self._BACKWARD)
def forward(self, flags: pd.DataFrame) -> Flags:
def toInternal(self, flags: pd.DataFrame) -> Flags:
"""
Translate from 'external flags' to 'internal flags'
......@@ -75,7 +75,7 @@ class PositionalScheme(TranslationScheme):
return Flags(data)
def backward(self, flags: Flags, **kwargs) -> pd.DataFrame:
def toExternal(self, flags: Flags, **kwargs) -> pd.DataFrame:
"""
Translate from 'internal flags' to 'external flags'
......
......@@ -16,7 +16,7 @@ import pytest
from saqc.constants import BAD, DOUBTFUL, FILTER_NONE, UNFLAGGED
from saqc.core.core import SaQC
from saqc.core.flags import Flags
from saqc.core.translation import DmpScheme, PositionalScheme, TranslationScheme
from saqc.core.translation import DmpScheme, MappingScheme, PositionalScheme
from tests.common import initData
......@@ -27,7 +27,7 @@ def _genTranslators():
dtype(-1): BAD,
**{dtype(f * 10): float(f) for f in range(10)},
}
scheme = TranslationScheme(flags, {v: k for k, v in flags.items()})
scheme = MappingScheme(flags, {v: k for k, v in flags.items()})
yield flags, scheme
......@@ -60,7 +60,7 @@ def test_backwardTranslation():
for _, scheme in _genTranslators():
keys = tuple(scheme._backward.keys())
flags = _genFlags({field: np.array(keys)})
translated = scheme.backward(flags)
translated = scheme.toExternal(flags)
expected = set(scheme._backward.values())
assert not (set(translated[field]) - expected)
......@@ -72,7 +72,7 @@ def test_backwardTranslationFail():
# add an scheme invalid value to the flags
flags = _genFlags({field: np.array(keys + (max(keys) + 1,))})
with pytest.raises(ValueError):
scheme.backward(flags)
scheme.toExternal(flags)
def test_dmpTranslator():
......@@ -94,7 +94,7 @@ def test_dmpTranslator():
{"func": "flagFoo", "kwargs": {"cause": "BELOW_OR_ABOVE_MIN_MAX"}}
)
tflags = scheme.backward(flags)
tflags = scheme.toExternal(flags)
assert set(tflags.columns.get_level_values(1)) == {
"quality_flag",
......@@ -137,7 +137,7 @@ def test_positionalTranslator():
flags[1::3, "var1"] = DOUBTFUL
flags[2::3, "var1"] = BAD
tflags = scheme.backward(flags)
tflags = scheme.toExternal(flags)
assert (tflags["var2"].replace(-9999, np.nan).dropna() == 90).all(axis=None)
assert (tflags["var1"].iloc[1::3] == 90210).all(axis=None)
assert (tflags["var1"].iloc[2::3] == 90002).all(axis=None)
......@@ -156,7 +156,7 @@ def test_positionalTranslatorIntegration():
for field in flags.columns:
assert flags[field].astype(str).str.match("^9[012]*$").all()
round_trip = scheme.backward(scheme.forward(flags))
round_trip = scheme.toExternal(scheme.toInternal(flags))
assert (flags.values == round_trip.values).all()
assert (flags.index == round_trip.index).all()
......@@ -183,7 +183,7 @@ def test_dmpTranslatorIntegration():
assert qfunc.isin({"", "flagMissing", "flagRange"}).all(axis=None)
assert (qcause[qflags[col] == "BAD"] == "OTHER").all(axis=None)
round_trip = scheme.backward(scheme.forward(flags))
round_trip = scheme.toExternal(scheme.toInternal(flags))
assert round_trip.xs("quality_flag", axis="columns", level=1).equals(qflags)
......@@ -276,8 +276,8 @@ def test_positionalMulitcallsPreserveState():
scheme = PositionalScheme()
flags1 = saqc1._flags
flags2 = saqc2._flags
tflags1 = scheme.backward(flags1).astype(str)
tflags2 = scheme.backward(flags2).astype(str)
tflags1 = scheme.toExternal(flags1).astype(str)
tflags2 = scheme.toExternal(flags2).astype(str)
for k in flags2.columns:
expected = tflags1[k].str.slice(start=1) * 2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment