diff --git a/.gitignore b/.gitignore index dfacde84c06ace3bf99af6b853b13350a6a5cd06..90558b0d9b52ae581d58614ff3c054f8dc348fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.automodapi docs/_api docs/_build +docs/resources/temp/* coverage.xml venv*/ **/.* diff --git a/dios/dios/dios.py b/dios/dios/dios.py index ea2af4561149220ced6c0ed230df559e51bc0d46..8a5ee9433be9e59d1e81ed4ec9d1c19012a092b1 100644 --- a/dios/dios/dios.py +++ b/dios/dios/dios.py @@ -1278,6 +1278,7 @@ def _to_aligned_df(dios, no_value=" "): def to_dios(obj) -> DictOfSeries: + """try cast obj to DictOfSeries.""" if isinstance(obj, DictOfSeries): return obj return DictOfSeries(data=obj) diff --git a/docs/conf.py b/docs/conf.py index 11ebee06bf571f1995e3c2607094824a4d4d386c..bfe7e814164dd79dd837e219e598c82490623516 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,7 +32,7 @@ version = vdict["__version__"] # -- Customize logging ------------------------------------------------------- -# couldn't get rid of a ignorable warning, so filter it +# couldn't get rid of an ignorable warning, so filter it # also see: https://issuemode.com/issues/sphinx-doc/sphinx/73994507 diff --git a/docs/documentation/Customizations.rst b/docs/documentation/Customizations.rst index 3c76088f20a08079c12087d5f9a263124009e6a0..989e4d4e1770e1b82a3ae4e6326d04ca612e523a 100644 --- a/docs/documentation/Customizations.rst +++ b/docs/documentation/Customizations.rst @@ -38,7 +38,6 @@ implement the following function interface .. code-block:: python import pandas - import dios import saqc def yourTestFunction( diff --git a/docs/modules/SaQCCore.rst b/docs/modules/SaQCCore.rst index 197da183d6809feed687dcf07a360f7fc76c5aaf..93569cdb1b17699415c1b5455ef15685a6bb9bcf 100644 --- a/docs/modules/SaQCCore.rst +++ b/docs/modules/SaQCCore.rst @@ -4,6 +4,17 @@ SaQC ==== +.. currentmodule:: saqc + +.. HACK: add 'our' external imported objects to core, but dont make it show up here + .. autosummary:: + :toctree: ../_api + + saqc.core.to_dios + saqc.core.DictOfSeries + .. automodapi:: saqc.core :include-all-objects: + + diff --git a/saqc/__init__.py b/saqc/__init__.py index c6db9151c878416e559f1b78824df83596dea397..082baf5bc8a209dd8163b2d35eab33ae2aaf82a4 100644 --- a/saqc/__init__.py +++ b/saqc/__init__.py @@ -1,15 +1,14 @@ #! /usr/bin/env python - # SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ -# # SPDX-License-Identifier: GPL-3.0-or-later - # -*- coding: utf-8 -*- +# isort: skip_file + """The System for automated Quality Control package.""" from saqc.constants import BAD, DOUBTFUL, FILTER_ALL, FILTER_NONE, GOOD, UNFLAGGED - -# import order: from small to big, to a void cycles -from saqc.core import Flags, SaQC, fromConfig +from saqc.core import Flags, SaQC +from saqc.core.translation import DmpScheme, FloatScheme, PositionalScheme, SimpleScheme +from saqc.parsing.reader import fromConfig from saqc.version import __version__ diff --git a/saqc/__main__.py b/saqc/__main__.py index 24ba5b8b09ac4ce7ee049da2b177728580b72c3d..eadfef4b6d53f149472919833a648bd79facae62 100644 --- a/saqc/__main__.py +++ b/saqc/__main__.py @@ -15,9 +15,9 @@ import numpy as np import pandas as pd import pyarrow as pa -from dios.dios.dios import DictOfSeries +from saqc.core import DictOfSeries from saqc.core.core import TRANSLATION_SCHEMES -from saqc.core.reader import fromConfig +from saqc.parsing.reader import fromConfig logger = logging.getLogger("SaQC") diff --git a/saqc/constants.py b/saqc/constants.py index 985bcbdc12348c96def01dd41def071559dc28d4..5cc9ae0bc378d7ba72fddd91597ce2d3a0c544f1 100644 --- a/saqc/constants.py +++ b/saqc/constants.py @@ -29,16 +29,12 @@ __all__ = [ "DOUBTFUL", "BAD", "GOOD", - "ENVIRONMENT", "FILTER_ALL", "FILTER_NONE", ] import numpy as np -import scipy.stats as st - -import saqc.lib.ts_operators as ts_ops # ---------------------------------------------------------------------- # global flag constants @@ -55,65 +51,3 @@ BAD = 255.0 FILTER_ALL = -np.inf FILTER_NONE = np.inf - - -# ---------------------------------------------------------------------- -# other -# ---------------------------------------------------------------------- -def clip(series, lower=None, upper=None): - return series.clip(lower=lower, upper=upper) - - -ENVIRONMENT = { - # Infinity constant - "inf": np.inf, - "INF": np.inf, - # Not A number Constant. - "NAN": np.nan, - "nan": np.nan, - # Pointwise absolute Value Function. - "abs": np.abs, - # Maximum Value Function. Ignores NaN. - "max": np.nanmax, - # Minimum Value Function. Ignores NaN. - "min": np.nanmin, - # Mean Value Function. Ignores NaN. - "mean": np.nanmean, - # Summation. Ignores NaN. - "sum": np.nansum, - # Standart Deviation. Ignores NaN. - "len": len, - # Pointwise Exponential. - "exp": np.exp, - # Pointwise Logarithm. - "log": np.log, - # Logarithm, returning NaN for zero input, instead of -inf. - "nanLog": ts_ops.zeroLog, - # Standart Deviation. Ignores NaN. - "std": np.nanstd, - # Variance. Ignores NaN. - "var": np.nanvar, - # Median. Ignores NaN. - "median": np.nanmedian, - # Count Number of values. Ignores NaNs. - "count": ts_ops.count, - # Identity. - "id": ts_ops.identity, - # Returns a Series` diff. - "diff": ts_ops.difference, - # Scales data to [0,1] Interval. - "scale": ts_ops.normScale, - # Standardize with Standart Deviation. - "zScore": lambda x: st.zscore(x, nan_policy="omit"), - # Standardize with Median and MAD. - "madScore": ts_ops.standardizeByMedian, - # Standardize with Median and inter quantile range. - "iqsScore": ts_ops.standardizeByIQR, - "clip": clip, - "GOOD": GOOD, - "BAD": BAD, - "UNFLAGGED": UNFLAGGED, - "DOUBTFUL": DOUBTFUL, - "FILTER_ALL": FILTER_ALL, - "FILTER_NONE": FILTER_NONE, -} diff --git a/saqc/core/__init__.py b/saqc/core/__init__.py index 7d30f6416abb5611e17cbfb7fae6025e48243296..775e7f23fda3afc45663371042f418fd924dde21 100644 --- a/saqc/core/__init__.py +++ b/saqc/core/__init__.py @@ -1,14 +1,11 @@ #! /usr/bin/env python - # SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ -# # SPDX-License-Identifier: GPL-3.0-or-later - # -*- coding: utf-8 -*- -from saqc.core.core import SaQC -from saqc.core.flags import Flags, initFlagsLike +# isort: skip_file +from saqc.core.frame import DictOfSeries, to_dios # noqa from saqc.core.history import History -from saqc.core.reader import fromConfig +from saqc.core.flags import Flags, initFlagsLike from saqc.core.register import flagging, processing, register -from saqc.core.translation import DmpScheme, FloatScheme, PositionalScheme, SimpleScheme +from saqc.core.core import SaQC diff --git a/saqc/core/core.py b/saqc/core/core.py index ebce7c049c1ed9ded44bbc04202106e46b6f84f4..c719375bc4e99bcd911e4b8961c63b8b71909663 100644 --- a/saqc/core/core.py +++ b/saqc/core/core.py @@ -15,8 +15,8 @@ from typing import Any, Hashable, MutableMapping import numpy as np import pandas as pd -from dios import DictOfSeries, to_dios from saqc.core.flags import Flags, initFlagsLike +from saqc.core.frame import DictOfSeries, concatDios, to_dios from saqc.core.history import History from saqc.core.register import FUNC_MAP from saqc.core.translation import ( @@ -27,7 +27,6 @@ from saqc.core.translation import ( TranslationScheme, ) from saqc.funcs import FunctionsMixin -from saqc.lib.tools import concatDios # warnings pd.set_option("mode.chained_assignment", "warn") diff --git a/saqc/core/flags.py b/saqc/core/flags.py index 34042c8a9c2b505c79d5cae097b9ac6b824bd18b..ebff8349b01683f05d297c54824d07a8f99c467f 100644 --- a/saqc/core/flags.py +++ b/saqc/core/flags.py @@ -11,13 +11,12 @@ from typing import DefaultDict, Dict, Iterable, Mapping, Tuple, Type, Union import numpy as np import pandas as pd -import dios -from saqc.core.history import History +from saqc.core import DictOfSeries, History _VAL = Union[pd.Series, History] DictLike = Union[ pd.DataFrame, - dios.DictOfSeries, + DictOfSeries, Dict[str, _VAL], DefaultDict[str, _VAL], ] @@ -77,8 +76,8 @@ class Flags: .. doctest:: exampleFlags - >>> from saqc.constants import UNFLAGGED, BAD, DOUBTFUL - >>> flags = saqc.Flags() + >>> from saqc import UNFLAGGED, BAD, DOUBTFUL, Flags + >>> flags = Flags() >>> flags Empty Flags Columns: [] @@ -394,7 +393,7 @@ class Flags: Access via ``flags.history['var']``. To set a new history use ``flags.history['var'] = value``. - The passed value must be a instance of History or must be convertible to a + The passed value must be an instance of History or must be convertible to a history. Returns @@ -444,15 +443,15 @@ class Flags: # ---------------------------------------------------------------------- # transformation and representation - def toDios(self) -> dios.DictOfSeries: + def toDios(self) -> DictOfSeries: """ - Transform the flags container to a ``dios.DictOfSeries``. + Transform the flags container to a ``DictOfSeries``. Returns ------- - dios.DictOfSeries + DictOfSeries """ - di = dios.DictOfSeries(columns=self.columns) + di = DictOfSeries(columns=self.columns) for k in self._data.keys(): di[k] = self[k] @@ -478,11 +477,11 @@ def initFlagsLike( name: str = None, ) -> Flags: """ - Create empty Flags, from an reference data structure. + Create empty Flags, from a reference data structure. Parameters ---------- - reference : pd.DataFrame, pd.Series, dios.DictOfSeries, dict of pd.Series + reference : pd.DataFrame, pd.Series, DictOfSeries, dict of pd.Series The reference structure to initialize for. name : str, default None diff --git a/saqc/core/frame.py b/saqc/core/frame.py new file mode 100644 index 0000000000000000000000000000000000000000..225ec64144c1730f50750ccfdf7ff784468b233c --- /dev/null +++ b/saqc/core/frame.py @@ -0,0 +1,62 @@ +#! /usr/bin/env python +# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ +# SPDX-License-Identifier: GPL-3.0-or-later +# -*- coding: utf-8 -*- + +import warnings +from typing import List + +from dios import DictOfSeries, to_dios # noqa + + +def mergeDios(left: DictOfSeries, right: DictOfSeries, subset=None, join="merge"): + # use dios.merge() as soon as it implemented + # see https://git.ufz.de/rdm/dios/issues/15 + + merged = left.copy() + if subset is not None: + right_subset_cols = right.columns.intersection(subset) + else: + right_subset_cols = right.columns + + shared_cols = left.columns.intersection(right_subset_cols) + + for c in shared_cols: + l, r = left[c], right[c] + if join == "merge": + # NOTE: + # our merge behavior is nothing more than an + # outer join, where the right join argument + # overwrites the left at the shared indices, + # while on a normal outer join common indices + # hold the values from the left join argument + r, l = l.align(r, join="outer") + else: + l, r = l.align(r, join=join) + merged[c] = l.combine_first(r) + + newcols = right_subset_cols.difference(left.columns) + for c in newcols: + merged[c] = right[c].copy() + + return merged + + +def concatDios(data: List[DictOfSeries], warn: bool = True, stacklevel: int = 2): + # fast path for most common case + if len(data) == 1 and data[0].columns.is_unique: + return data[0] + + result = DictOfSeries() + for di in data: + for c in di.columns: + if c in result.columns: + if warn: + warnings.warn( + f"Column {c} already exist. Data is overwritten. " + f"Avoid duplicate columns names over all inputs.", + stacklevel=stacklevel, + ) + result[c] = di[c] + + return result diff --git a/saqc/core/history.py b/saqc/core/history.py index 480c9593a56fa0bfcdea15db7375ef8711c2ca6b..16abacdf57feb2a039de6148f59af31762dd8a7b 100644 --- a/saqc/core/history.py +++ b/saqc/core/history.py @@ -6,15 +6,14 @@ from __future__ import annotations -from copy import copy as shallowcopy -from copy import deepcopy +import copy as _copy from typing import Any, Callable, Dict, List, Tuple import numpy as np import pandas as pd from pandas.api.types import is_categorical_dtype, is_float_dtype -from saqc.constants import UNFLAGGED +from saqc import UNFLAGGED class History: @@ -70,7 +69,7 @@ class History: @meta.setter def meta(self, value: list[dict[str, Any]]) -> None: self._validateMetaList(value, self._hist) - self._meta = deepcopy(value) + self._meta = _copy.deepcopy(value) @property def index(self) -> pd.Index: @@ -428,7 +427,7 @@ class History: copy : History the copied FH """ - copyfunc = deepcopy if deep else shallowcopy + copyfunc = _copy.deepcopy if deep else _copy.copy new = History(self.index) new._hist = self._hist.copy(deep) new._meta = copyfunc(self._meta) @@ -564,7 +563,7 @@ class History: if copy: hist = hist.copy() - meta = deepcopy(meta) + meta = _copy.deepcopy(meta) history = cls(index=None) # noqa history._hist = hist.astype("category", copy=False) diff --git a/saqc/core/register.py b/saqc/core/register.py index 7ac2c33ec8bae3386878d1fbd3c6d56b3d59e1d5..7f364ce5e72e36facf5648849131deba37a6732a 100644 --- a/saqc/core/register.py +++ b/saqc/core/register.py @@ -3,7 +3,6 @@ # SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ # # SPDX-License-Identifier: GPL-3.0-or-later - from __future__ import annotations import functools @@ -15,15 +14,20 @@ import numpy as np import pandas as pd from typing_extensions import ParamSpec -import dios -from saqc.constants import FILTER_ALL, FILTER_NONE, UNFLAGGED -from saqc.core.flags import Flags, History +from saqc import FILTER_ALL, FILTER_NONE +from saqc.core import DictOfSeries, Flags, History from saqc.core.translation.basescheme import TranslationScheme -from saqc.lib.tools import squeezeSequence, toSequence +from saqc.lib.tools import isflagged, squeezeSequence, toSequence from saqc.lib.types import ExternalFlag, OptionalNone if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC + +__all__ = [ + "register", + "processing", + "flagging", +] # NOTE: # the global SaQC function store, @@ -153,24 +157,24 @@ def _squeezeFlags(old_flags, new_flags: Flags, columns: pd.Index, meta) -> Flags def _maskData( - data: dios.DictOfSeries, flags: Flags, columns: Sequence[str], thresh: float -) -> Tuple[dios.DictOfSeries, dios.DictOfSeries]: + data: DictOfSeries, flags: Flags, columns: Sequence[str], thresh: float +) -> Tuple[DictOfSeries, DictOfSeries]: """ Mask data with Nans, if the flags are worse than a threshold. - mask only passed `columns` (preselected by `datamask`-kw from decorator) Returns ------- - masked : dios.DictOfSeries + masked : DictOfSeries masked data, same dim as original - mask : dios.DictOfSeries + mask : DictOfSeries dios holding iloc-data-pairs for every column in `data` """ - mask = dios.DictOfSeries(columns=columns) + mask = DictOfSeries(columns=columns) # we use numpy here because it is faster for c in columns: - col_mask = _isflagged(flags[c], thresh) + col_mask = isflagged(flags[c], thresh) if col_mask.any(): col_data = data[c].to_numpy(dtype=np.float64) @@ -184,8 +188,8 @@ def _maskData( def _unmaskData( - data: dios.DictOfSeries, mask: dios.DictOfSeries, columns: pd.Index | None = None -) -> dios.DictOfSeries: + data: DictOfSeries, mask: DictOfSeries, columns: pd.Index | None = None +) -> DictOfSeries: """ Restore the masked data. @@ -462,19 +466,3 @@ def processing(**kwargs): if kwargs: raise ValueError("use '@register' to pass keywords") return register(mask=[], demask=[], squeeze=[]) - - -A = TypeVar("A", np.ndarray, pd.Series) - - -def _isflagged(flagscol: A, thresh: float) -> A: - """ - Return a mask of flags accordingly to `thresh`. Return type is same as flags. - """ - if not isinstance(thresh, (float, int)): - raise TypeError(f"thresh must be of type float, not {repr(type(thresh))}") - - if thresh == FILTER_ALL: - return flagscol > UNFLAGGED - - return flagscol >= thresh diff --git a/saqc/core/translation/__init__.py b/saqc/core/translation/__init__.py index c40914611bbe1fad26132d3520f2e39e4a803dc6..fe2d85790a1f5d516c832527931c10ece45464a0 100644 --- a/saqc/core/translation/__init__.py +++ b/saqc/core/translation/__init__.py @@ -8,8 +8,8 @@ from saqc.core.translation.basescheme import ( FloatScheme, MappingScheme, - SimpleScheme, TranslationScheme, ) from saqc.core.translation.dmpscheme import DmpScheme from saqc.core.translation.positionalscheme import PositionalScheme +from saqc.core.translation.simplescheme import SimpleScheme diff --git a/saqc/core/translation/basescheme.py b/saqc/core/translation/basescheme.py index b7a3d67f4d8df76d996aa9dcee77144fbc77e957..d469e02b33edbb8467e7471a982d8fc3128611a8 100644 --- a/saqc/core/translation/basescheme.py +++ b/saqc/core/translation/basescheme.py @@ -8,15 +8,14 @@ from __future__ import annotations -from abc import abstractmethod, abstractproperty +from abc import abstractmethod from typing import Any, Dict import numpy as np import pandas as pd -from dios import DictOfSeries -from saqc.constants import BAD, FILTER_ALL, GOOD, UNFLAGGED -from saqc.core.flags import Flags +from saqc import BAD, FILTER_ALL, GOOD, UNFLAGGED +from saqc.core import DictOfSeries, Flags from saqc.lib.types import ExternalFlag ForwardMap = Dict[ExternalFlag, float] @@ -35,25 +34,52 @@ class TranslationScheme: # pragma: no cover @abstractmethod def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags: + """ + Translate from 'external flags' to 'internal flags' + + Parameters + ---------- + flags : pd.DataFrame + The external flags to translate + + Returns + ------- + Flags object + """ pass @abstractmethod def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries: + """ + Translate from 'internal flags' to 'external flags' + + Parameters + ---------- + flags : pd.DataFrame + The external flags to translate + + attrs : dict or None, default None + global meta information of saqc-object + + Returns + ------- + pd.DataFrame + """ pass class MappingScheme(TranslationScheme): """ This class provides the basic translation mechanism and should serve as - a base class for every other translation scheme. + a base class for most other translation scheme. - The general translation is realized through dictionary lookups, altough + The general translation is realized through dictionary lookups, although we might need to extend this logic to also allow calls to translation - functions in the future. Currently at least one `dict` defining the + functions in the future. Currently, at least one `dict` defining the 'forward' translation from 'user flags' -> 'internal flags' needs to be provided. Optionally a second `dict` can be passed to map 'internal flags' -> 'user flags', - if the latter is not given, this 'backward' translation will inferred as + if the latter is not given, this 'backward' translation is inferred as the inverse of the 'forward' translation. The translation mechanism imposes a few restrictions: @@ -217,27 +243,3 @@ class FloatScheme(TranslationScheme): out = flags.toDios() out.attrs = attrs or {} return out - - -class SimpleScheme(MappingScheme): - - """ - Acts as the default Translator, provides a changeable subset of the - internal float flags - """ - - _FORWARD = { - "UNFLAGGED": UNFLAGGED, - "BAD": BAD, - "OK": GOOD, - } - - _BACKWARD = { - UNFLAGGED: "UNFLAGGED", - np.nan: "UNFLAGGED", - BAD: "BAD", - GOOD: "OK", - } - - def __init__(self): - super().__init__(forward=self._FORWARD, backward=self._BACKWARD) diff --git a/saqc/core/translation/dmpscheme.py b/saqc/core/translation/dmpscheme.py index 17f958484643941cc1641d0cebb1ad69a4e93d67..bf35b7b895c04cda129f698763ebaf9f711ae3ab 100644 --- a/saqc/core/translation/dmpscheme.py +++ b/saqc/core/translation/dmpscheme.py @@ -14,9 +14,8 @@ from functools import reduce import numpy as np import pandas as pd -from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED -from saqc.core.flags import Flags -from saqc.core.history import History +from saqc import BAD, DOUBTFUL, GOOD, UNFLAGGED +from saqc.core import Flags, History from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme _QUALITY_CAUSES = [ diff --git a/saqc/core/translation/positionalscheme.py b/saqc/core/translation/positionalscheme.py index 23b724293b121a15ba0ba3988ea60e250e1b085a..f503b0fe379019d62c40c9ce9d76bbaa21545f09 100644 --- a/saqc/core/translation/positionalscheme.py +++ b/saqc/core/translation/positionalscheme.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd from saqc.constants import BAD, DOUBTFUL, GOOD, UNFLAGGED -from saqc.core.flags import Flags, History +from saqc.core import Flags, History from saqc.core.translation.basescheme import BackwardMap, ForwardMap, MappingScheme diff --git a/saqc/core/translation/simplescheme.py b/saqc/core/translation/simplescheme.py new file mode 100644 index 0000000000000000000000000000000000000000..aabd0472b55817696156ec156d036484a7f7264b --- /dev/null +++ b/saqc/core/translation/simplescheme.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python +# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ +# SPDX-License-Identifier: GPL-3.0-or-later +# -*- coding: utf-8 -*- + +import numpy as np + +from saqc.constants import BAD, GOOD, UNFLAGGED +from saqc.core.translation import MappingScheme + + +class SimpleScheme(MappingScheme): + + """ + Acts as the default Translator, provides a changeable subset of the + internal float flags + """ + + _FORWARD = { + "UNFLAGGED": UNFLAGGED, + "BAD": BAD, + "OK": GOOD, + } + + _BACKWARD = { + UNFLAGGED: "UNFLAGGED", + np.nan: "UNFLAGGED", + BAD: "BAD", + GOOD: "OK", + } + + def __init__(self): + super().__init__(forward=self._FORWARD, backward=self._BACKWARD) diff --git a/saqc/funcs/breaks.py b/saqc/funcs/breaks.py index 0b700d6ae5a3c80c38e5987468fe8642d91ce195..5d9e1fc58b44b234311475a12e0e459c9064bfc4 100644 --- a/saqc/funcs/breaks.py +++ b/saqc/funcs/breaks.py @@ -22,9 +22,10 @@ from typing import TYPE_CHECKING import numpy as np import pandas as pd -from saqc.constants import BAD, FILTER_ALL -from saqc.core.register import _isflagged, flagging, register +from saqc import BAD, FILTER_ALL +from saqc.core import flagging, register from saqc.funcs.changepoints import _assignChangePointCluster +from saqc.lib.tools import isflagged if TYPE_CHECKING: from saqc.core.core import SaQC @@ -64,7 +65,7 @@ class BreaksMixin: datacol = self._data[field] mask = datacol.isna() - mask = ~_isflagged(self._flags[field], dfilter) & mask + mask = ~isflagged(self._flags[field], dfilter) & mask self._flags[mask, field] = flag return self diff --git a/saqc/funcs/changepoints.py b/saqc/funcs/changepoints.py index 544f07f756d830d4d6dad6ed43d1d21ed66d40fb..9b9267c380826ab92d3af89bd94554b37570c1a9 100644 --- a/saqc/funcs/changepoints.py +++ b/saqc/funcs/changepoints.py @@ -12,16 +12,13 @@ from typing import TYPE_CHECKING, Callable, Tuple import numba import numpy as np import pandas as pd -from typing_extensions import Literal -from dios import DictOfSeries -from saqc.constants import BAD, UNFLAGGED -from saqc.core.flags import Flags -from saqc.core.register import flagging, register +from saqc import BAD, UNFLAGGED +from saqc.core import DictOfSeries, Flags, flagging, register from saqc.lib.tools import customRoller, filterKwargs if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class ChangepointsMixin: diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index fc1a77f99b0435496ea607c1922522848038c464..ffd2acb07ba45d64e9efd3a4daa205e1f9a6e622 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -14,13 +14,13 @@ from typing import TYPE_CHECKING import numpy as np import pandas as pd -from saqc.constants import BAD -from saqc.core.register import flagging +from saqc import BAD +from saqc.core import flagging from saqc.lib.tools import customRoller, getFreqDelta, statPass from saqc.lib.ts_operators import varQC if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class ConstantsMixin: diff --git a/saqc/funcs/curvefit.py b/saqc/funcs/curvefit.py index 0444d7983ddf695d1458757f1b506ea5b756c292..c0a86fd5185869c181af20b45b7259b1cc28801a 100644 --- a/saqc/funcs/curvefit.py +++ b/saqc/funcs/curvefit.py @@ -13,9 +13,7 @@ import numpy as np import pandas as pd from typing_extensions import Literal -from dios import DictOfSeries -from saqc.core.flags import Flags -from saqc.core.register import register +from saqc.core import DictOfSeries, Flags, register from saqc.lib.tools import getFreqDelta from saqc.lib.ts_operators import ( butterFilter, @@ -27,7 +25,7 @@ from saqc.lib.ts_operators import ( ) if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC _FILL_METHODS = Literal[ "linear", diff --git a/saqc/funcs/drift.py b/saqc/funcs/drift.py index 4560c2058237589ea9ab545f519b8ab437b1adc6..d3555ac51ab8d7989aec5b34440a40e18677432b 100644 --- a/saqc/funcs/drift.py +++ b/saqc/funcs/drift.py @@ -19,16 +19,15 @@ from scipy.optimize import curve_fit from scipy.spatial.distance import pdist from typing_extensions import Literal -from dios import DictOfSeries -from saqc.constants import BAD -from saqc.core.register import Flags, flagging, register +from saqc import BAD +from saqc.core import DictOfSeries, Flags, flagging, register from saqc.funcs.changepoints import _assignChangePointCluster from saqc.lib.tools import detectDeviants, filterKwargs, toSequence from saqc.lib.ts_operators import expDriftModel, linearDriftModel from saqc.lib.types import CurveFitter if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC LinkageString = Literal[ diff --git a/saqc/funcs/flagtools.py b/saqc/funcs/flagtools.py index d378c70af9e58009eaedcde790da754b5a3c4a95..208aecd667a1b67ded026150bd0025eb700e0db1 100644 --- a/saqc/funcs/flagtools.py +++ b/saqc/funcs/flagtools.py @@ -15,13 +15,12 @@ import numpy as np import pandas as pd from typing_extensions import Literal -from dios import DictOfSeries -from saqc.constants import BAD, FILTER_ALL, UNFLAGGED -from saqc.core.register import _isflagged, flagging, register -from saqc.lib.tools import toSequence +from saqc import BAD, FILTER_ALL, UNFLAGGED +from saqc.core import DictOfSeries, flagging, register +from saqc.lib.tools import isflagged, toSequence if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class FlagtoolsMixin: @@ -518,7 +517,7 @@ class FlagtoolsMixin: # get dfilter from meta or get of rid of this and # consider everything != np.nan as flag - flagged = _isflagged(hc, dfilter) + flagged = isflagged(hc, dfilter) repeated = ( flagged.rolling(window, min_periods=1, closed="left") @@ -656,13 +655,13 @@ def _groupOperation( qcs_items: list[tuple["SaQC", list[str]]] = list(group.items()) # generate initial mask from the first `qc` object on the popped first field - mask = _isflagged(qcs_items[0][0]._flags[qcs_items[0][1].pop(0)], thresh=dfilter) + mask = isflagged(qcs_items[0][0]._flags[qcs_items[0][1].pop(0)], thresh=dfilter) for qc, fields in qcs_items: if field not in qc._flags: raise KeyError(f"variable {field} is missing in given SaQC object") for field in fields: - mask = func(mask, _isflagged(qc._flags[field], thresh=FILTER_ALL)) + mask = func(mask, isflagged(qc._flags[field], thresh=FILTER_ALL)) if target not in base._data: base = base.copyField(field=field, target=target) diff --git a/saqc/funcs/generic.py b/saqc/funcs/generic.py index 556f6bfb29f6d47ff1e75159bb40849fcbb756ef..e70b4c17e4ff9a41f5170acff21f1ed05be14e3a 100644 --- a/saqc/funcs/generic.py +++ b/saqc/funcs/generic.py @@ -12,16 +12,15 @@ from typing import TYPE_CHECKING, Sequence, Tuple, Union import numpy as np import pandas as pd -from dios import DictOfSeries -from saqc.constants import BAD, ENVIRONMENT, FILTER_ALL -from saqc.core.flags import Flags -from saqc.core.history import History -from saqc.core.register import _isflagged, _maskData, register -from saqc.lib.tools import toSequence +from saqc import BAD, FILTER_ALL +from saqc.core import DictOfSeries, Flags, History, register +from saqc.core.register import _maskData +from saqc.lib.tools import isflagged, toSequence from saqc.lib.types import GenericFunction, PandasLike +from saqc.parsing.environ import ENVIRONMENT if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC def _flagSelect(field, flags, label=None): @@ -64,7 +63,7 @@ def _execGeneric( dfilter: float = FILTER_ALL, ) -> DictOfSeries: globs = { - "isflagged": lambda data, label=None: _isflagged( + "isflagged": lambda data, label=None: isflagged( _flagSelect(data.name, flags, label), thresh=dfilter ), **ENVIRONMENT, diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index d4b16bbd1fc0a9e7b723e6b36ee3165a3fe39031..8aae82a04882c6c766f7b576ae93dfedfc41a1d3 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -13,12 +13,13 @@ import numpy as np import pandas as pd from typing_extensions import Literal -from saqc.constants import UNFLAGGED -from saqc.core.register import _isflagged, register +from saqc import UNFLAGGED +from saqc.core import register +from saqc.lib.tools import isflagged from saqc.lib.ts_operators import interpolateNANs if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC _SUPPORTED_METHODS = Literal[ @@ -267,7 +268,7 @@ class InterpolationMixin: # TODO: # in future we could use `register(mask=[field], [], [])` # and dont handle masking manually here - flagged = _isflagged(self._flags[field], kwargs["dfilter"]) + flagged = isflagged(self._flags[field], kwargs["dfilter"]) # drop all points that hold no relevant grid information datcol = datcol[~flagged].dropna() diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py index aeed5d91a84d09dc8d0239b001ecd5f784cd147d..feca72039d0fb1808daf2f6bb97bd3e398cfc182 100644 --- a/saqc/funcs/noise.py +++ b/saqc/funcs/noise.py @@ -13,12 +13,12 @@ from typing import TYPE_CHECKING, Callable import numpy as np import pandas as pd -from saqc.constants import BAD -from saqc.core.register import flagging +from saqc import BAD +from saqc.core import flagging from saqc.lib.tools import statPass if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class NoiseMixin: diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index 05ee5fe85f59b1bba4b8f0f5b61471d02d0a26e7..182205ec39429b316fb9dba93ef7acfc7f1c25fd 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -15,19 +15,17 @@ import numba import numpy as np import numpy.polynomial.polynomial as poly import pandas as pd -from outliers import smirnov_grubbs +from outliers import smirnov_grubbs # noqa, on pypi as outlier-utils from scipy.stats import median_abs_deviation from typing_extensions import Literal -from dios import DictOfSeries -from saqc.constants import BAD, UNFLAGGED -from saqc.core.flags import Flags -from saqc.core.register import flagging, register +from saqc import BAD, UNFLAGGED +from saqc.core import DictOfSeries, Flags, flagging, register from saqc.funcs.scores import _univarScoring from saqc.lib.tools import customRoller, getFreqDelta, toSequence if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class OutliersMixin: diff --git a/saqc/funcs/pattern.py b/saqc/funcs/pattern.py index c3d02be0a55ad27ed4a3551858a8334de9f34155..39b15dc63ca20ea10173ecaf98a76036045d5fda 100644 --- a/saqc/funcs/pattern.py +++ b/saqc/funcs/pattern.py @@ -12,12 +12,12 @@ from typing import TYPE_CHECKING import dtw import pandas as pd -from saqc.constants import BAD -from saqc.core.register import flagging +from saqc import BAD +from saqc.core import flagging from saqc.lib.tools import customRoller if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC def calculateDistanceByDTW( diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py index c27aa1f1318da55b54d4cdca7509b73794a8df5c..4d2a59e96a06ac8b0eba3e9cf8898436c8f47412 100644 --- a/saqc/funcs/resampling.py +++ b/saqc/funcs/resampling.py @@ -14,14 +14,13 @@ import numpy as np import pandas as pd from typing_extensions import Literal -from dios import DtItype -from saqc.core.register import _isflagged, register +from saqc.core import register from saqc.funcs.interpolation import _SUPPORTED_METHODS -from saqc.lib.tools import evalFreqStr, filterKwargs, getFreqDelta +from saqc.lib.tools import evalFreqStr, filterKwargs, getFreqDelta, isflagged from saqc.lib.ts_operators import aggregate2Freq, shift2Freq if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC METHOD2ARGS = { @@ -272,8 +271,12 @@ class ResamplingMixin: datcol = self._data[field] # workaround for #GL-333 - if datcol.empty and self._data.itype in [None, DtItype]: - datcol = pd.Series(index=pd.DatetimeIndex([]), dtype=datcol.dtype) + if datcol.empty: + if self._data.itype is None: + index = pd.DatetimeIndex([]) + else: + index = self._data.itype.min_pdindex + datcol = pd.Series(index=index, dtype=datcol.dtype) freq = evalFreqStr(freq, freq_check, datcol.index) @@ -407,7 +410,7 @@ class ResamplingMixin: func_kws = dict(freq=tolerance, method=projection_method, target=dummy) elif method[-5:] == "shift": - drop_mask = target_datcol.isna() | _isflagged( + drop_mask = target_datcol.isna() | isflagged( target_flagscol, kwargs["dfilter"] ) projection_method = METHOD2ARGS[method][0] @@ -431,7 +434,7 @@ class ResamplingMixin: history = self._flags.history[field].apply(dummy.index, func, func_kws) if overwrite is False: - mask = _isflagged(self._flags[target], thresh=kwargs["dfilter"]) + mask = isflagged(self._flags[target], thresh=kwargs["dfilter"]) history._hist[mask] = np.nan if squeeze: diff --git a/saqc/funcs/residuals.py b/saqc/funcs/residuals.py index dec6681e2be5c2f3b16e112bbe2b4984703e99da..635b71da51e27edee8130f42b35bbdef05d62ce1 100644 --- a/saqc/funcs/residuals.py +++ b/saqc/funcs/residuals.py @@ -12,12 +12,12 @@ from typing import TYPE_CHECKING, Callable, Optional, Union import numpy as np import pandas as pd -from saqc.core.register import register +from saqc.core import register from saqc.funcs.curvefit import _fitPolynomial from saqc.funcs.rolling import _roll if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class ResidualsMixin: diff --git a/saqc/funcs/rolling.py b/saqc/funcs/rolling.py index 80699200d866e7bf498426d0c214de3e0e815fae..125991c4f8d5e16bde0adc2703f7646a113500a3 100644 --- a/saqc/funcs/rolling.py +++ b/saqc/funcs/rolling.py @@ -11,13 +11,11 @@ from typing import TYPE_CHECKING, Callable, Union import numpy as np import pandas as pd -from dios import DictOfSeries -from saqc.core.flags import Flags -from saqc.core.register import register +from saqc.core import DictOfSeries, Flags, register from saqc.lib.tools import getFreqDelta if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class RollingMixin: diff --git a/saqc/funcs/scores.py b/saqc/funcs/scores.py index 8e0be0b36f6d2bad5810096798eaf23470b606ae..1846cab2b40f8b4837acce6b0ff11c581d2691f3 100644 --- a/saqc/funcs/scores.py +++ b/saqc/funcs/scores.py @@ -13,13 +13,13 @@ import numpy as np import pandas as pd from typing_extensions import Literal -import saqc.lib.ts_operators as ts_ops -from saqc.constants import UNFLAGGED -from saqc.core.register import register +from saqc import UNFLAGGED +from saqc.core import register from saqc.lib.tools import getApply, toSequence +from saqc.lib.ts_operators import kNN if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC def _univarScoring( @@ -210,7 +210,7 @@ class ScoresMixin: sample_size = partition.shape[0] nn_neighbors = min(n, max(sample_size, 2) - 1) - dist, *_ = ts_ops.kNN( + dist, *_ = kNN( partition.values, nn_neighbors, algorithm=method, metric=metric, p=p ) try: diff --git a/saqc/funcs/tools.py b/saqc/funcs/tools.py index 9888bb1ff43b98b84627fd7f51170b676bba2267..1036b7207c674f116ed24a34645b23478ead73ad 100644 --- a/saqc/funcs/tools.py +++ b/saqc/funcs/tools.py @@ -15,13 +15,13 @@ import matplotlib.pyplot as plt import numpy as np from typing_extensions import Literal -from saqc.constants import FILTER_NONE, UNFLAGGED -from saqc.core.register import processing, register +from saqc import FILTER_NONE, UNFLAGGED +from saqc.core import processing, register from saqc.lib.plotting import makeFig from saqc.lib.tools import periodicMask if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC _MPL_DEFAULT_BACKEND = mpl.get_backend() diff --git a/saqc/funcs/transformation.py b/saqc/funcs/transformation.py index ee6d4292442795a24753bcb551ff2900e8ab3b22..0fe642131f21b2279bffa3ffc89b43dc3a060e3a 100644 --- a/saqc/funcs/transformation.py +++ b/saqc/funcs/transformation.py @@ -12,10 +12,10 @@ from typing import TYPE_CHECKING, Callable, Optional, Union import numpy as np import pandas as pd -from saqc.core.register import register +from saqc.core import register if TYPE_CHECKING: - from saqc.core.core import SaQC + from saqc import SaQC class TransformationMixin: diff --git a/saqc/lib/docurator.py b/saqc/lib/docurator.py index 59b27d7393d7943c7818e018914cf8f1911aeca4..3fd272b07b7c2bb58a38e1639f1e98487504b020 100644 --- a/saqc/lib/docurator.py +++ b/saqc/lib/docurator.py @@ -145,7 +145,7 @@ def saqcMethodsTemplate(doc_string: str, source="function_string"): out_para = mkParameter( parameter_name="out", parameter_type="saqc.SaQC", - parameter_doc="An :py:meth:`saqc.SaQC` object, holding the (possibly) modified data", + parameter_doc="An :py:meth:`saqc.SaQC` object, holding the data", indent_str=indent_string, ) returns_section["Returns"] += out_para["out"] diff --git a/saqc/lib/plotting.py b/saqc/lib/plotting.py index d3f20acb791cac6a7c78d87854a42066c71af37b..56a27e8f29828f7c6b65c207c800c2e18f8fb6ea 100644 --- a/saqc/lib/plotting.py +++ b/saqc/lib/plotting.py @@ -9,7 +9,6 @@ from __future__ import annotations import itertools -from typing import Optional, Union import matplotlib as mpl import matplotlib.pyplot as plt @@ -17,7 +16,7 @@ import numpy as np import pandas as pd from typing_extensions import Literal -from saqc.core.flags import Flags +from saqc.core import Flags from saqc.lib.tools import toSequence from saqc.lib.types import DiosLikeT diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 41a3cd96172b0c25bd06eb706c5e1b7425a140ec..3a454bd181f6c938ee21c136c40cd25a7cdc7772 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -18,11 +18,9 @@ import pandas as pd from scipy import fft from scipy.cluster.hierarchy import fcluster, linkage -import dios - # keep this for external imports # TODO: fix the external imports -from saqc.lib.rolling import customRoller +from saqc.lib.rolling import customRoller # noqa from saqc.lib.types import CompT T = TypeVar("T", str, float, int) @@ -171,59 +169,6 @@ def periodicMask(dtindex, season_start, season_end, include_bounds): return out -def concatDios(data: List[dios.DictOfSeries], warn: bool = True, stacklevel: int = 2): - # fast path for most common case - if len(data) == 1 and data[0].columns.is_unique: - return data[0] - - result = dios.DictOfSeries() - for di in data: - for c in di.columns: - if c in result.columns: - if warn: - warnings.warn( - f"Column {c} already exist. Data is overwritten. " - f"Avoid duplicate columns names over all inputs.", - stacklevel=stacklevel, - ) - result[c] = di[c] - - return result - - -def mergeDios(left, right, subset=None, join="merge"): - # use dios.merge() as soon as it implemented - # see https://git.ufz.de/rdm/dios/issues/15 - - merged = left.copy() - if subset is not None: - right_subset_cols = right.columns.intersection(subset) - else: - right_subset_cols = right.columns - - shared_cols = left.columns.intersection(right_subset_cols) - - for c in shared_cols: - l, r = left[c], right[c] - if join == "merge": - # NOTE: - # our merge behavior is nothing more than an - # outer join, where the right join argument - # overwrites the left at the shared indices, - # while on a normal outer join common indices - # hold the values from the left join argument - r, l = l.align(r, join="outer") - else: - l, r = l.align(r, join=join) - merged[c] = l.combine_first(r) - - newcols = right_subset_cols.difference(left.columns) - for c in newcols: - merged[c] = right[c].copy() - - return merged - - def isQuoted(string): return bool(re.search(r"'.*'|\".*\"", string)) @@ -594,3 +539,21 @@ def filterKwargs( ) kwargs.pop(key, None) return kwargs + + +from saqc import FILTER_ALL, UNFLAGGED + +A = TypeVar("A", np.ndarray, pd.Series) + + +def isflagged(flagscol: A, thresh: float) -> A: + """ + Return a mask of flags accordingly to `thresh`. Return type is same as flags. + """ + if not isinstance(thresh, (float, int)): + raise TypeError(f"thresh must be of type float, not {repr(type(thresh))}") + + if thresh == FILTER_ALL: + return flagscol > UNFLAGGED + + return flagscol >= thresh diff --git a/saqc/lib/types.py b/saqc/lib/types.py index 2c11b3c7768342192a431d0b7ce94027892b6ab2..4bdb7e4e2841dd3dbd6e8d01151de86c6e2961bc 100644 --- a/saqc/lib/types.py +++ b/saqc/lib/types.py @@ -7,7 +7,14 @@ # -*- coding: utf-8 -*- from __future__ import annotations -from abc import abstractmethod +import abc +from typing import Any, Dict, TypeVar, Union + +import numpy as np +import pandas as pd +from typing_extensions import Protocol + +from saqc.core import DictOfSeries __all__ = [ "T", @@ -19,15 +26,6 @@ __all__ = [ "OptionalNone", ] - -from typing import Any, Dict, TypeVar, Union - -import numpy as np -import pandas as pd -from typing_extensions import Protocol - -from dios import DictOfSeries - T = TypeVar("T") ArrayLike = TypeVar("ArrayLike", np.ndarray, pd.Series, pd.DataFrame) PandasLike = Union[pd.Series, pd.DataFrame, DictOfSeries] @@ -51,7 +49,7 @@ class GenericFunction(Protocol): class Comparable(Protocol): - @abstractmethod + @abc.abstractmethod def __gt__(self: CompT, other: CompT) -> bool: pass diff --git a/saqc/parsing/__init__.py b/saqc/parsing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d74ada495ad00a20887cdee50a426347f37efae --- /dev/null +++ b/saqc/parsing/__init__.py @@ -0,0 +1,7 @@ +#! /usr/bin/env python + +# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# -*- coding: utf-8 -*- diff --git a/saqc/parsing/environ.py b/saqc/parsing/environ.py new file mode 100644 index 0000000000000000000000000000000000000000..65bc04fa4795cff0f04c6423e3b4280ffc02a124 --- /dev/null +++ b/saqc/parsing/environ.py @@ -0,0 +1,76 @@ +#! /usr/bin/env python + +# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# -*- coding: utf-8 -*- + +import numpy as np +import scipy.stats as st + +import saqc.lib.ts_operators as ts_ops +from saqc import BAD, DOUBTFUL, FILTER_ALL, FILTER_NONE, GOOD, UNFLAGGED + + +def clip(series, lower=None, upper=None): + return series.clip(lower=lower, upper=upper) + + +def zscore(obj): + return st.zscore(obj, nan_policy="omit") + + +ENVIRONMENT = { + # Infinity constant + "inf": np.inf, + "INF": np.inf, + # Not a number constant. + "NAN": np.nan, + "nan": np.nan, + # Absolute value function. + "abs": np.abs, + # Maximum value function. Ignores NaN. + "max": np.nanmax, + # Minimum Value function. Ignores NaN. + "min": np.nanmin, + # Mean value function. Ignores NaN. + "mean": np.nanmean, + # Summation. Ignores NaN. + "sum": np.nansum, + # Standard deviation. Ignores NaN. + "len": len, + # exponential function. + "exp": np.exp, + # Logarithm. + "log": np.log, + # Logarithm, returning NaN for zero input, instead of -inf. + "nanLog": ts_ops.zeroLog, + # Standard deviation. Ignores NaN. + "std": np.nanstd, + # Variance. Ignores NaN. + "var": np.nanvar, + # Median. Ignores NaN. + "median": np.nanmedian, + # Count Number of values. Ignores NaNs. + "count": ts_ops.count, + # Identity. + "id": ts_ops.identity, + # Returns a series` diff. + "diff": ts_ops.difference, + # Scales data to [0,1] interval. + "scale": ts_ops.normScale, + # Standardize with standard deviation. + "zScore": zscore, + # Standardize with median and MAD. + "madScore": ts_ops.standardizeByMedian, + # Standardize with median and inter quantile range. + "iqsScore": ts_ops.standardizeByIQR, + "clip": clip, + "GOOD": GOOD, + "BAD": BAD, + "UNFLAGGED": UNFLAGGED, + "DOUBTFUL": DOUBTFUL, + "FILTER_ALL": FILTER_ALL, + "FILTER_NONE": FILTER_NONE, +} diff --git a/saqc/core/reader.py b/saqc/parsing/reader.py similarity index 97% rename from saqc/core/reader.py rename to saqc/parsing/reader.py index b5ff437875f2bb225a05f5a2ae082b7a8ee1db3e..8c8673e31817361b16b64daaf20889ca2335d975 100644 --- a/saqc/core/reader.py +++ b/saqc/parsing/reader.py @@ -15,9 +15,9 @@ from urllib.request import urlopen import pandas as pd -from saqc.core.core import SaQC -from saqc.core.visitor import ConfigFunctionParser +from saqc import SaQC from saqc.lib.tools import isQuoted +from saqc.parsing.visitor import ConfigFunctionParser COMMENT = "#" SEPARATOR = ";" diff --git a/saqc/core/visitor.py b/saqc/parsing/visitor.py similarity index 99% rename from saqc/core/visitor.py rename to saqc/parsing/visitor.py index 294a5a812807d91326ce8ea635e1f4b353c84f4c..91f086e2f4aaa81065d2c0430d65aa9ad7ce8c7a 100644 --- a/saqc/core/visitor.py +++ b/saqc/parsing/visitor.py @@ -8,8 +8,8 @@ import ast -from saqc.constants import ENVIRONMENT from saqc.core.register import FUNC_MAP +from saqc.parsing.environ import ENVIRONMENT class ConfigExpressionParser(ast.NodeVisitor): diff --git a/tests/api/test_creation.py b/tests/api/test_creation.py index 85fbebeb871c5d1d379fae84859b0162a463a8ac..60ae95a4ab7f66305a1f7cf420a05a4cba55e3e9 100644 --- a/tests/api/test_creation.py +++ b/tests/api/test_creation.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd +# directly import container class to avoid importing +# saqc here. import dios diff --git a/tests/common.py b/tests/common.py index 3a973e4c9f5d60d3ced0a5f1068588ef96ea8d0a..c82b5d2bf791a499dd9f5345ae9b3f69d0f18580 100644 --- a/tests/common.py +++ b/tests/common.py @@ -11,8 +11,8 @@ import io import numpy as np import pandas as pd -import dios from saqc.core import Flags +from saqc.core.frame import DictOfSeries from saqc.core.history import History, createHistoryFromData @@ -22,7 +22,7 @@ def initData( if rows is None: freq = freq or "1h" - di = dios.DictOfSeries(itype=dios.DtItype) + di = DictOfSeries(itype="datetime") dates = pd.date_range(start=start_date, end=end_date, freq=freq, periods=rows) dummy = np.arange(len(dates)) @@ -74,7 +74,7 @@ def checkDataFlagsInvariants(data, flags, field, identical=True): whether to check indexes of data and flags to be identical (True, default) of just for equality. """ - assert isinstance(data, dios.DictOfSeries) + assert isinstance(data, DictOfSeries) assert isinstance(flags, Flags) # all columns in data are in flags diff --git a/tests/core/test_core.py b/tests/core/test_core.py index 82b30186491a2ec8c1540902e8f530806087e57a..e0dedae95b498008aa52b4e345c68ff2d7c214c2 100644 --- a/tests/core/test_core.py +++ b/tests/core/test_core.py @@ -12,11 +12,8 @@ import numpy as np import pandas as pd import pytest -import saqc -from saqc.constants import BAD, FILTER_ALL, FILTER_NONE, UNFLAGGED -from saqc.core import SaQC, initFlagsLike -from saqc.core.flags import Flags -from saqc.core.register import flagging, processing, register +from saqc import BAD, FILTER_ALL, FILTER_NONE, UNFLAGGED, SaQC +from saqc.core import Flags, flagging, initFlagsLike, processing, register from saqc.lib.types import OptionalNone from tests.common import initData @@ -68,12 +65,12 @@ def test_dtypes(data, flags): def test_new_call(data): - qc = saqc.SaQC(data) + qc = SaQC(data) qc = qc.flagRange("var1", max=5) def test_copy(data): - qc = saqc.SaQC(data) + qc = SaQC(data) qc = qc.flagRange("var1").flagRange("var1", min=0, max=0) diff --git a/tests/core/test_flags.py b/tests/core/test_flags.py index 6be9479869c464c7de609ca1618479fdf1668a30..fbb09e0d3c335401050535201ebea0f15568eabe 100644 --- a/tests/core/test_flags.py +++ b/tests/core/test_flags.py @@ -10,11 +10,9 @@ import numpy as np import pandas as pd import pytest -import dios -from saqc.constants import UNFLAGGED -from saqc.core.flags import Flags -from tests.core.test_history import History -from tests.core.test_history import is_equal as hist_equal +import tests.core.test_history as test_hist +from saqc import UNFLAGGED +from saqc.core import DictOfSeries, Flags, History _arrays = [ np.array([[]]), @@ -42,7 +40,7 @@ testdata = [] for d in _arrays: columns = list("abcdefgh")[: d.shape[1]] df = pd.DataFrame(d, dtype=float, columns=columns) - dis = dios.DictOfSeries(df) + dis = DictOfSeries(df) di = {} di.update(df.items()) testdata.append(df) @@ -53,7 +51,7 @@ for d in _arrays: def is_equal(f1, f2): assert f1.columns.equals(f2.columns) for c in f1.columns: - assert hist_equal(f1.history[c], f2.history[c]) + assert test_hist.is_equal(f1.history[c], f2.history[c]) @pytest.mark.parametrize("data", testdata) @@ -103,7 +101,7 @@ def test_init_raise_TypeError(data, msg): @pytest.mark.parametrize("data", testdata) -def test_copy(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]]): +def test_copy(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) shallow = flags.copy(deep=False) deep = flags.copy(deep=True) @@ -131,9 +129,7 @@ def test_copy(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]] @pytest.mark.parametrize("data", testdata) -def test_flags_history( - data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]] -): +def test_flags_history(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) # get @@ -153,7 +149,7 @@ def test_flags_history( @pytest.mark.parametrize("data", testdata) -def test_get_flags(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]]): +def test_get_flags(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) for c in flags.columns: @@ -172,7 +168,7 @@ def test_get_flags(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Ser @pytest.mark.parametrize("data", testdata) -def test_set_flags(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]]): +def test_set_flags(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) for c in flags.columns: @@ -202,7 +198,7 @@ def test_set_flags(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Ser @pytest.mark.parametrize("data", testdata) def test_set_flags_with_mask( - data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]] + data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]] ): flags = Flags(data) @@ -249,7 +245,7 @@ def test_set_flags_with_mask( @pytest.mark.parametrize("data", testdata) def test_set_flags_with_index( - data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]] + data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]] ): flags = Flags(data) @@ -292,16 +288,16 @@ def _validate_flags_equals_frame(flags, df): @pytest.mark.parametrize("data", testdata) -def test_to_dios(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]]): +def test_to_dios(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) df = flags.toDios() - assert isinstance(df, dios.DictOfSeries) + assert isinstance(df, DictOfSeries) _validate_flags_equals_frame(flags, df) @pytest.mark.parametrize("data", testdata) -def test_to_frame(data: Union[pd.DataFrame, dios.DictOfSeries, Dict[str, pd.Series]]): +def test_to_frame(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) df = flags.toFrame() diff --git a/tests/core/test_frame.py b/tests/core/test_frame.py new file mode 100644 index 0000000000000000000000000000000000000000..267e522e0b5cb4f509572a5b7572a2c41a1d8549 --- /dev/null +++ b/tests/core/test_frame.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python +# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ +# SPDX-License-Identifier: GPL-3.0-or-later +# -*- coding: utf-8 -*- + + +import pytest + +from saqc.core.frame import DictOfSeries as DoS +from saqc.core.frame import concatDios + + +@pytest.mark.parametrize( + "data, expected", + [ + # 2c + 1c -> 3c + ([DoS(dict(a=[1], b=[2])), DoS(dict(c=[3]))], DoS(dict(a=[1], b=[2], c=[3]))), + # 1c + 1c + 1c -> 3c + ( + [DoS(dict(a=[1])), DoS(dict(b=[1])), DoS(dict(c=[1]))], + DoS(dict(a=[1], b=[1], c=[1])), + ), + # 2c + 1c (overwrite) = 2c + ([DoS(dict(a=[1], b=[2])), DoS(dict(b=[22]))], DoS(dict(a=[1], b=[22]))), + # 1c + 1c + 1c (all overwrite) -> 1c + ( + [DoS(dict(a=[1])), DoS(dict(a=[11])), DoS(dict(a=[111]))], + DoS(dict(a=[111])), + ), + ], +) +def test_concatDios(data, expected): + result = concatDios(data, warn=False) + assert result == expected + + +@pytest.mark.parametrize("data", [[DoS(dict(a=[1], b=[2])), DoS(dict(b=[22]))]]) +def test_concatDios_warning(data): + with pytest.warns(UserWarning): + concatDios(data, warn=True, stacklevel=0) diff --git a/tests/core/test_reader.py b/tests/core/test_reader.py index f6b55f31b240a396e662663acc7533c8e2724bfe..c3de90c160f046d9dd860cbeda11574a6335651f 100644 --- a/tests/core/test_reader.py +++ b/tests/core/test_reader.py @@ -9,15 +9,13 @@ import numpy as np import pytest -import dios -from saqc.core.flags import Flags -from saqc.core.reader import fromConfig, readFile -from saqc.core.register import flagging +from saqc.core import DictOfSeries, Flags, flagging +from saqc.parsing.reader import fromConfig, readFile from tests.common import initData, writeIO @pytest.fixture -def data() -> dios.DictOfSeries: +def data() -> DictOfSeries: return initData(3) diff --git a/tests/core/test_translator.py b/tests/core/test_translator.py index cfacdbd12135c363c6145e458b0694524878da00..1da0075e90d378ba77465faa2f826fc8ee5f2158 100644 --- a/tests/core/test_translator.py +++ b/tests/core/test_translator.py @@ -13,9 +13,8 @@ import numpy as np import pandas as pd import pytest -from saqc.constants import BAD, DOUBTFUL, FILTER_NONE, UNFLAGGED -from saqc.core.core import SaQC -from saqc.core.flags import Flags +from saqc import BAD, DOUBTFUL, FILTER_NONE, UNFLAGGED, SaQC +from saqc.core import Flags from saqc.core.translation import DmpScheme, MappingScheme, PositionalScheme from tests.common import initData diff --git a/tests/fixtures.py b/tests/fixtures.py index ea11559296a8b59b4cfbd8d3ae18ab5cd8ccc010..b8d05ceb5d12d07a41ab27a8e8cd7a67b00e222e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pytest -from dios import DictOfSeries +from saqc.core import DictOfSeries # TODO: this is odd # Why not simple fixtures with talking-names, diff --git a/tests/funcs/test_constants_detection.py b/tests/funcs/test_constants_detection.py index 381d5260fafb2d3ac3382d9c0110e6342cce5922..29a5a251458cc225fa012652b2c509cfd39b866f 100644 --- a/tests/funcs/test_constants_detection.py +++ b/tests/funcs/test_constants_detection.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from saqc.constants import BAD, UNFLAGGED +from saqc import BAD, UNFLAGGED from saqc.core import SaQC, initFlagsLike from tests.common import initData diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py index 53c13a6deac9909b23ae0c85b591fa6a7aa301e8..9ae18b532101c1822140aaf502913719644ddd33 100644 --- a/tests/funcs/test_functions.py +++ b/tests/funcs/test_functions.py @@ -10,13 +10,11 @@ import numpy as np import pandas as pd import pytest -import dios import saqc -from saqc.constants import BAD, DOUBTFUL, UNFLAGGED -from saqc.core import initFlagsLike -from saqc.core.core import SaQC +from saqc import BAD, DOUBTFUL, UNFLAGGED, SaQC +from saqc.core import DictOfSeries, initFlagsLike from tests.common import initData -from tests.fixtures import char_dict, course_1 +from tests.fixtures import char_dict, course_1 # noqa, todo: fix fixtures @pytest.fixture @@ -34,7 +32,7 @@ def test_statPass(): noise = [-1, 1] * 10 data[100:120] = noise data[200:210] = noise[:10] - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) qc = SaQC(data, flags).flagByStatLowPass( "data", np.std, "20D", 0.999, "5D", 0.999, 0, flag=BAD @@ -287,7 +285,7 @@ def test_transferFlags(): qc = saqc.SaQC(data) qc = qc.flagRange("a", max=1.5) with pytest.deprecated_call(): - qc = qc.transferFlags(["a", "a"], ["b", "c"]) + qc = qc.transferFlags(["a", "a"], ["b", "c"]) # noqa assert np.all(qc.flags["b"].values == np.array([UNFLAGGED, BAD])) assert np.all(qc.flags["c"].values == np.array([UNFLAGGED, BAD])) diff --git a/tests/funcs/test_generic_api_functions.py b/tests/funcs/test_generic_api_functions.py index dd1c7f4aea68e8cea28cd8fdd6172707bea1ab21..e2181e9bd30d5c3c75472b8a06cd3b4cc062a321 100644 --- a/tests/funcs/test_generic_api_functions.py +++ b/tests/funcs/test_generic_api_functions.py @@ -9,10 +9,8 @@ import pandas as pd import pytest -from dios.dios.dios import DictOfSeries -from saqc import SaQC -from saqc.constants import BAD, FILTER_ALL, UNFLAGGED -from saqc.core.flags import Flags +from saqc import BAD, FILTER_ALL, UNFLAGGED, SaQC +from saqc.core import DictOfSeries, Flags from saqc.lib.tools import toSequence from tests.common import initData diff --git a/tests/funcs/test_generic_config_functions.py b/tests/funcs/test_generic_config_functions.py index 3f61e775647c705512cb21ed572bfcf2c9237a9c..0dc9d20ec672887a4977ad7c2cead67f1511473b 100644 --- a/tests/funcs/test_generic_config_functions.py +++ b/tests/funcs/test_generic_config_functions.py @@ -12,15 +12,11 @@ import numpy as np import pandas as pd import pytest -import dios -from saqc import SaQC -from saqc.constants import BAD, UNFLAGGED -from saqc.core import initFlagsLike -from saqc.core.flags import Flags -from saqc.core.reader import fromConfig -from saqc.core.register import register -from saqc.core.visitor import ConfigFunctionParser +from saqc import BAD, UNFLAGGED, SaQC +from saqc.core import DictOfSeries, Flags, initFlagsLike, register from saqc.funcs.generic import _execGeneric +from saqc.parsing.reader import fromConfig +from saqc.parsing.visitor import ConfigFunctionParser from tests.common import initData, writeIO @@ -36,7 +32,7 @@ def data_diff(): col1 = data[data.columns[1]] mid = len(col0) // 2 offset = len(col0) // 8 - return dios.DictOfSeries( + return DictOfSeries( data={ col0.name: col0.iloc[: mid + offset], col1.name: col1.iloc[mid - offset :], diff --git a/tests/funcs/test_outlier_detection.py b/tests/funcs/test_outlier_detection.py index 921a82e5300c426217669c800b5521c90ec57839..4fdc17ec0e310526281357d8249bb39836a48a10 100644 --- a/tests/funcs/test_outlier_detection.py +++ b/tests/funcs/test_outlier_detection.py @@ -12,10 +12,9 @@ import pandas as pd # see test/functs/fixtures.py for global fixtures "course_..." import pytest -import dios import saqc -from saqc.constants import BAD, UNFLAGGED -from saqc.core import SaQC, initFlagsLike +from saqc import BAD, UNFLAGGED +from saqc.core import DictOfSeries, SaQC, initFlagsLike from tests.fixtures import char_dict, course_1, course_2, course_3, course_4 @@ -26,7 +25,7 @@ def spiky_data(): s.iloc[100] = 100 s.iloc[1000] = -100 flag_assertion = [100, 1000] - return dios.DictOfSeries(s), flag_assertion + return DictOfSeries(s), flag_assertion def test_flagMad(spiky_data): @@ -99,7 +98,7 @@ def test_flagMVScores(dat): s1, s2 = data1.squeeze(), data2.squeeze() s1 = pd.Series(data=s1.values, index=s1.index) s2 = pd.Series(data=s2.values, index=s1.index) - data = dios.DictOfSeries([s1, s2], columns=["field1", "field2"]) + data = DictOfSeries([s1, s2], columns=["field1", "field2"]) flags = initFlagsLike(data) qc = SaQC(data, flags).flagMVScores( field=fields, @@ -135,7 +134,7 @@ def test_flagCrossStatistics(dat): s1, s2 = data1.squeeze(), data2.squeeze() s1 = pd.Series(data=s1.values, index=s1.index) s2 = pd.Series(data=s2.values, index=s1.index) - data = dios.DictOfSeries([s1, s2], columns=["field1", "field2"]) + data = DictOfSeries([s1, s2], columns=["field1", "field2"]) flags = initFlagsLike(data) qc = SaQC(data, flags).flagCrossStatistics( diff --git a/tests/funcs/test_pattern_rec.py b/tests/funcs/test_pattern_rec.py index 7b9e087164e7227f0a7ac58ec741150b02d6215e..f885522c42e040e99837dab58c4c24569fa66789 100644 --- a/tests/funcs/test_pattern_rec.py +++ b/tests/funcs/test_pattern_rec.py @@ -9,9 +9,8 @@ import pandas as pd import pytest -import dios -from saqc.constants import BAD, UNFLAGGED -from saqc.core import SaQC, initFlagsLike +from saqc import BAD, UNFLAGGED, SaQC +from saqc.core import DictOfSeries, initFlagsLike from tests.common import initData @@ -32,7 +31,7 @@ def test_flagPattern_dtw(plot, normalize): data.iloc[10:18] = [0, 5, 6, 7, 6, 8, 5, 0] pattern = data.iloc[10:18] - data = dios.DictOfSeries(dict(data=data, pattern_data=pattern)) + data = DictOfSeries(dict(data=data, pattern_data=pattern)) flags = initFlagsLike(data, name="data") qc = SaQC(data, flags).flagPatternByDTW( "data", diff --git a/tests/funcs/test_proc_functions.py b/tests/funcs/test_proc_functions.py index b1dd896078b14ca9dafb9aec2bfc03619c6ecf8b..4843842f37c4e31c5991e0a14231630aba7cb319 100644 --- a/tests/funcs/test_proc_functions.py +++ b/tests/funcs/test_proc_functions.py @@ -13,18 +13,17 @@ import pandas as pd # see test/functs/fixtures.py for global fixtures "course_..." import pytest -import dios import saqc -from saqc.constants import UNFLAGGED -from saqc.core import SaQC, initFlagsLike +from saqc import UNFLAGGED, SaQC +from saqc.core import DictOfSeries, initFlagsLike from saqc.lib.ts_operators import linearInterpolation, polynomialInterpolation -from tests.fixtures import char_dict, course_3, course_5 +from tests.fixtures import char_dict, course_3, course_5 # noqa, todo: fix fixtures def test_rollingInterpolateMissing(course_5): data, characteristics = course_5(periods=10, nan_slice=[5, 6]) field = data.columns[0] - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) qc = SaQC(data, flags).interpolateByRolling( field, @@ -49,7 +48,7 @@ def test_rollingInterpolateMissing(course_5): def test_interpolateMissing(course_5): data, characteristics = course_5(periods=10, nan_slice=[5]) field = data.columns[0] - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) qc = SaQC(data, flags) @@ -73,7 +72,7 @@ def test_interpolateMissing(course_5): def test_transform(course_5): data, characteristics = course_5(periods=10, nan_slice=[5, 6]) field = data.columns[0] - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) qc = SaQC(data, flags) @@ -93,7 +92,7 @@ def test_transform(course_5): def test_resample(course_5): data, _ = course_5(freq="1min", periods=30, nan_slice=[1, 11, 12, 22, 24, 26]) field = data.columns[0] - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) qc = SaQC(data, flags).resample( field, diff --git a/tests/funcs/test_resampling.py b/tests/funcs/test_resampling.py index c5dbd1570b9eb3b34990c37844312a20284126cc..2eb7316afc7a5faca5dfd7796c12c486aef17add 100644 --- a/tests/funcs/test_resampling.py +++ b/tests/funcs/test_resampling.py @@ -10,9 +10,8 @@ import numpy as np import pandas as pd import pytest -import dios -from saqc.constants import BAD, UNFLAGGED -from saqc.core import SaQC, initFlagsLike +from saqc import BAD, UNFLAGGED, SaQC +from saqc.core import DictOfSeries, initFlagsLike from tests.common import checkDataFlagsInvariants @@ -30,7 +29,7 @@ def data(): dat = pd.Series(np.linspace(-50, 50, index.size), index=index, name="data") # good to have some nan dat[-3] = np.nan - data = dios.DictOfSeries(dat) + data = DictOfSeries(dat) return data @@ -88,7 +87,7 @@ def test_gridInterpolation(data, method, fill_history): field = "data" data = data[field] data = pd.concat([data * np.sin(data), data.shift(1, "2h")]).shift(1, "3s") - data = dios.DictOfSeries(data) + data = DictOfSeries(data) flags = initFlagsLike(data) if fill_history == "none": diff --git a/tests/funcs/test_tools.py b/tests/funcs/test_tools.py index 6c63f5ec4358f44d3ddc81d44e8341f820b54a2d..75e51273734a2dcfb0ca8b5253fff0616219bdec 100644 --- a/tests/funcs/test_tools.py +++ b/tests/funcs/test_tools.py @@ -8,14 +8,14 @@ import numpy as np import pandas as pd import pytest -import dios import saqc +from saqc.core import DictOfSeries @pytest.mark.slow def test_makeFig(tmp_path): # just testing for no errors to occure... - data = dios.DictOfSeries( + data = DictOfSeries( pd.Series( np.linspace(0, 1000, 1000), pd.date_range("2000", "2001", periods=1000), diff --git a/tests/fuzzy/lib.py b/tests/fuzzy/lib.py index 6210389fb18e13333da604ea6f82c207af2ac672..a1bf9ed274e8b8e069ab4945e49ff92f331f0bdd 100644 --- a/tests/fuzzy/lib.py +++ b/tests/fuzzy/lib.py @@ -23,9 +23,8 @@ from hypothesis.strategies import ( ) from hypothesis.strategies._internal.types import _global_type_lookup -import dios -from saqc.constants import BAD -from saqc.core import initFlagsLike +from saqc import BAD +from saqc.core import DictOfSeries, initFlagsLike from saqc.core.register import FUNC_MAP MAX_EXAMPLES = 50 @@ -46,7 +45,7 @@ def dioses(draw, min_cols=1): cols = draw(lists(columnNames(), unique=True, min_size=min_cols)) columns = {c: draw(dataSeries(min_size=3)) for c in cols} - return dios.DictOfSeries(columns) + return DictOfSeries(columns) @composite diff --git a/tests/fuzzy/test_masking.py b/tests/fuzzy/test_masking.py index 0d0a49e92e88e932227e28bb6a4f0487fc8149b7..472601037279556a5f069ff703ea4cb95a789c6e 100644 --- a/tests/fuzzy/test_masking.py +++ b/tests/fuzzy/test_masking.py @@ -10,7 +10,7 @@ import pandas as pd import pytest from hypothesis import given, settings -from saqc.constants import BAD, UNFLAGGED +from saqc import BAD, UNFLAGGED from saqc.core.register import _maskData, _unmaskData from tests.fuzzy.lib import MAX_EXAMPLES, dataFieldFlags diff --git a/tests/lib/test_tools.py b/tests/lib/test_tools.py index b282f24758ce4735f42dce82c69c43dddaf5af1c..2e1534ce39616add875fb4ab6a40ae05cfe8c7fe 100644 --- a/tests/lib/test_tools.py +++ b/tests/lib/test_tools.py @@ -6,7 +6,6 @@ import pandas as pd import pytest import saqc.lib.tools as tools -from dios import DictOfSeries as DoS @pytest.mark.parametrize("optional", [False, True]) @@ -69,33 +68,3 @@ def test_toSequence(value, expected): def test_squeezeSequence(value, expected): result = tools.squeezeSequence(value) assert result == expected - - -@pytest.mark.parametrize( - "data, expected", - [ - # 2c + 1c -> 3c - ([DoS(dict(a=[1], b=[2])), DoS(dict(c=[3]))], DoS(dict(a=[1], b=[2], c=[3]))), - # 1c + 1c + 1c -> 3c - ( - [DoS(dict(a=[1])), DoS(dict(b=[1])), DoS(dict(c=[1]))], - DoS(dict(a=[1], b=[1], c=[1])), - ), - # 2c + 1c (overwrite) = 2c - ([DoS(dict(a=[1], b=[2])), DoS(dict(b=[22]))], DoS(dict(a=[1], b=[22]))), - # 1c + 1c + 1c (all overwrite) -> 1c - ( - [DoS(dict(a=[1])), DoS(dict(a=[11])), DoS(dict(a=[111]))], - DoS(dict(a=[111])), - ), - ], -) -def test_concatDios(data, expected): - result = tools.concatDios(data, warn=False) - assert result == expected - - -@pytest.mark.parametrize("data", [[DoS(dict(a=[1], b=[2])), DoS(dict(b=[22]))]]) -def test_concatDios_warning(data): - with pytest.warns(UserWarning): - tools.concatDios(data, warn=True, stacklevel=0) diff --git a/tests/lib/test_ts_operators.py b/tests/lib/test_ts_operators.py index c7288ce2e3f0b9ca62128c4bb312d3808eea7af2..96fed103586c828ca5ac4003d0d6dc2fd349aa76 100644 --- a/tests/lib/test_ts_operators.py +++ b/tests/lib/test_ts_operators.py @@ -9,7 +9,6 @@ import pytest from pandas.testing import assert_series_equal import saqc.lib.ts_operators as tsops -from saqc.lib.ts_operators import interpolateNANs def test_butterFilter(): @@ -251,7 +250,7 @@ def test_rateOfChange(data, expected): ], ) def test_interpolatNANs(limit, extrapolate, data, expected): - got = interpolateNANs( + got = tsops.interpolateNANs( pd.Series(data), gap_limit=limit, method="linear", extrapolate=extrapolate ) try: