Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • berntm/saqc
  • rdm-software/saqc
  • schueler/saqc
3 results
Show changes
Commits on Source (8)
......@@ -10,30 +10,34 @@ SPDX-License-Identifier: GPL-3.0-or-later
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
### Added
- add multivariate plotting options to `plot`
- added `plot_kwargs` keyword to `plot` function
- added `plot_kwargs` keyword to `plot` function
- added checks and unified error message for common inputs.
- added command line `--version` option
- `-ll` CLI option as a shorthand for `--log-level`
- basic json support for CLI config files, which are detected by `.json`-extension.
- `--json-field` CLI option to use a non-root element of a json file.
- String Selection Options for function selection in `flagScatterLowPass`
### Changed
- pin pandas to versions >= 2.0
- parameter `fill_na` of `SaQC.flagUniLOF` and `SaQC.assignUniLOF` is now of type
- parameter `fill_na` of `SaQC.flagUniLOF` and `SaQC.assignUniLOF` is now of type
`bool` instead of one of `[None, "linear"]`
- in `plot` function: changed default color for single variables to `black` with `80% transparency`
- in `plot` function: added seperate legend for flags
- deprecated `flagStatLowPass` in favor of `flagScatterLowPass`
### Removed
- removed deprecated `DictOfSeries.to_df`
- removed plotting option with complete history (`history="complete"`)
### Fixed
- Bug in `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed
- Bug in `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed
to `min_period` was only recognised if also `window` was a tuple.
- `SaQC.propagateFlags` was overwriting existing flags
### Deprecated
- Deprecate `plot` parameter `phaseplot` in favor of usage with `mode="biplot"`
- Deprecate `plot` parameter `cyclestart` in favor of usage with `marker_kwargs`
- Deprecate option `"complete"` for `plot` funcs parameter `history`
- `SaQC.andGroup`: option to pass dictionaries to the parameter `group`.
- `SaQC.orGroup`: option to pass dictionaries to the parameter `group`.
- `plot`: parameter `phaseplot` in favor of usage with `mode="biplot"`
- `plot`: parameter `cyclestart` in favor of usage with `marker_kwargs`
- `plot`: option `"complete"` for parameter `history`
## [2.4.1](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
......@@ -63,7 +67,7 @@ SPDX-License-Identifier: GPL-3.0-or-later
- `func` arguments in text configurations were not parsed correctly
- fail on duplicated arguments to test methods
- `reample` was not writing meta entries
- `flagByStatLowPass` was overwriting existing flags
- `flagByScatterLowpass` was overwriting existing flags
- `flagUniLOF` and `flagLOF` were overwriting existing flags
### Deprecated
- Deprecate `flagMVScore` parameters: `partition` in favor of `window`, `partition_min` in favor of `min_periods`, `min_periods` in favor of `min_periods_r`
......
......@@ -13,4 +13,4 @@ Change Points and Noise
~SaQC.flagChangePoints
~SaQC.assignChangePointCluster
~SaQC.flagByStatLowPass
~SaQC.flagByScatterLowpass
......@@ -9,7 +9,7 @@ from __future__ import annotations
import operator
import warnings
from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
from typing import TYPE_CHECKING, Any, Callable, Sequence
import numpy as np
import pandas as pd
......@@ -18,7 +18,7 @@ from typing_extensions import Literal
from saqc import BAD, FILTER_ALL, UNFLAGGED
from saqc.core import DictOfSeries, flagging, register
from saqc.lib.checking import validateChoice, validateWindow
from saqc.lib.tools import isflagged, isunflagged, toSequence
from saqc.lib.tools import initializeTargets, isflagged, isunflagged, toSequence
if TYPE_CHECKING:
from saqc import SaQC
......@@ -93,7 +93,7 @@ class FlagtoolsMixin:
def flagManual(
self: "SaQC",
field: str,
mdata: pd.Series | pd.DataFrame | DictOfSeries | list | np.ndarray,
mdata: str | pd.Series | np.ndarray | list | pd.DataFrame | DictOfSeries,
method: Literal[
"left-open", "right-open", "closed", "plain", "ontime"
] = "left-open",
......@@ -103,61 +103,64 @@ class FlagtoolsMixin:
**kwargs,
) -> "SaQC":
"""
Flag data by given, "manually generated" data.
Include flags listed in external data.
The data is flagged at locations where `mdata` is equal to a provided
flag (`mflag`). The format of mdata can be an indexed object,
like pd.Series, pd.Dataframe or dios.DictOfSeries, but also can
be a plain list- or array-like. How indexed mdata is aligned to
data is specified via the `method` parameter.
The method allows to integrate pre-existing flagging information.
Parameters
----------
mdata :
The Data determining, wich intervals are to be flagged, or a
string, denoting under which field the data is
accessable.
Determines which values or intervals will be flagged. Supported input types:
method :
Defines how mdata is projected on data. Except for the 'plain'
method, the methods assume mdata to have an index.
* 'plain': mdata must have the same length as data and is
projected one-to-one on data.
* 'ontime': works only with indexed mdata. mdata entries are
matched with data entries that have the same index.
* 'right-open': mdata defines intervals, values are to be
projected on. The intervals are defined,
(1) Either, by any two consecutive timestamps t_1 and 1_2
where t_1 is valued with mflag, or by a series,
(2) Or, a Series, where the index contains in the t1 timestamps
and the values the respective t2 stamps.
* ``pd.Series``: Needs a datetime index and values of type:
The value at t_1 gets projected onto all data timestamps t,
with t_1 <= t < t_2.
* 'left-open': like 'right-open', but the projected interval
now covers all t with t_1 < t <= t_2.
* 'closed': like 'right-open', but the projected interval
now covers all t with t_1 <= t <= t_2.
- datetime, for :py:attr:`method` values ``"right-closed"``, ``"left-closed"``, ``"closed"``
- or any scalar, for :py:attr:`method` values ``"plain"``, ``"ontime"``
* ``str``: Variable holding the manual flag information.
* ``pd.DataFrame``, ``DictOfSeries``: Need to provide a ``pd.Series`` with column name
:py:attr:`field`.
* ``list``, ``np.ndarray``: Only supported with :py:attr:`method` value ``"plain"`` and
:py:attr:`mformat` value ``"mflag"``
method :
Defines how :py:attr:`mdata` is projected to data:
* ``"plain"``: :py:attr:`mdata` must have the same length as :py:attr:`field`, flags
are set, where the values in :py:attr:`mdata` equal :py:attr:`mflag`.
* ``"ontime"``: Expects datetime indexed :py:attr:`mdata` (types ``pd.Series``,
``pd.DataFrame``, ``DictOfSeries``). Flags are set, where the values in
:py:attr:`mdata` equal :py:attr:`mflag` and the indices of :py:attr:`field` and
:py:attr:`mdata` match.
* ``"right-open"``: Expects datetime indexed :py:attr:`mdata`, which will be interpreted
as a number of time intervals ``t_1, t_2``. Flags are set to all timestamps ``t`` of
:py:attr:`field` with ``t_1 <= t < t_2``.
* ``"left-open"``: like ``"right-open"``, but the interval covers all ``t`` with
``t_1 < t <= t_2``.
* ``"closed"``: like ``"right-open"``, but the interval now covers all ``t`` with
``t_1 <= t <= t_2``.
mformat :
Controls the interval definition in :py:attr:`mdata` (see examples):
* "start-end": mdata is a Series, where every entry indicates
an interval to-flag. The index defines the left bound,
the value defines the right bound.
* "mflag": mdata is an array like, with entries containing
'mflag',where flags shall be set. See documentation for
examples.
* ``"start-end"``: expects datetime indexed :py:attr:`mdata` (types ``pd.Series``,
``pd.DataFrame``, ``DictOfSeries``) with values of type datetime. Each
index-value pair is interpreted as an interval to flag, the index defines the
left bound, the respective value the right bound.
* ``"mflag"``:
- :py:attr:`mdata` of type ``pd.Series``, ``pd.DataFrame``, ``DictOfSeries``:
Two successive index values ``i_1, i_2`` will be interpreted as an interval
``t_1, t_2`` to flag, if the value of ``t_1`` equals :py:attr:`mflag`
- :py:attr:`mdata` of type ``list``, ``np.ndarray``: Flags all :py:attr:`field`
where :py:attr:`mdata` euqals :py:attr:`mflag`.
mflag :
The flag that indicates data points in `mdata`, of wich the
projection in data should be flagged.
Value in :py:attr:`mdata` indicating that a flag should be set at the respective
position, timestamp or interval. Ignored if :py:attr:`mformat` is set to ``"start-end"``.
Examples
--------
An example for mdata
Usage of :py:attr:`mdata`
.. doctest:: ExampleFlagManual
......@@ -169,9 +172,8 @@ class FlagtoolsMixin:
2000-05-01 1
dtype: int64
On *dayly* data, with the 'ontime' method, only the provided timestamps
are used. Bear in mind that only exact timestamps apply, any offset
will result in ignoring the timestamp.
On *daily* data, with :py:attr:`method` ``"ontime"``, only the provided timestamps
are used. Only exact matches apply, offsets will be ignored.
.. doctest:: ExampleFlagManual
......@@ -186,7 +188,7 @@ class FlagtoolsMixin:
2000-05-01 True
dtype: bool
With the 'right-open' method, the mdata is forward fill:
With :py:attr:`method` ``"right-open"`` , :py:attr:`mdata` is forward filled:
.. doctest:: ExampleFlagManual
......@@ -199,7 +201,7 @@ class FlagtoolsMixin:
2000-05-01 True
dtype: bool
With the 'left-open' method, backward filling is used:
With :py:attr:`method` ``"left-open"`` , :py:attr:`mdata` is backward filled:
.. doctest:: ExampleFlagManual
......@@ -482,9 +484,9 @@ class FlagtoolsMixin:
)
def andGroup(
self: "SaQC",
field: str | list[str],
group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
target: str | None = None,
field: str | list[str | list[str]],
group: Sequence["SaQC"] | None = None,
target: str | list[str | list[str]] | None = None,
flag: float = BAD,
**kwargs,
) -> "SaQC":
......@@ -494,15 +496,12 @@ class FlagtoolsMixin:
Parameters
----------
group:
A collection of ``SaQC`` objects to check for flags, defaults to the current object.
1. If given as a sequence of ``SaQC`` objects, all objects are checked for flags of a
variable named :py:attr:`field`.
2. If given as dictionary the keys are interpreted as ``SaQC`` objects and the corresponding
values as variables of the respective ``SaQC`` object to check for flags.
A collection of ``SaQC`` objects. Flag checks are performed on all ``SaQC`` objects
based on the variables specified in :py:attr:`field`. Whenever all monitored variables
are flagged, the associated timestamps will receive a flag.
"""
return _groupOperation(
base=self,
saqc=self,
field=field,
target=target,
func=operator.and_,
......@@ -520,9 +519,9 @@ class FlagtoolsMixin:
)
def orGroup(
self: "SaQC",
field: str | list[str],
group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
target: str | None = None,
field: str | list[str | list[str]],
group: Sequence["SaQC"] | None = None,
target: str | list[str | list[str]] | None = None,
flag: float = BAD,
**kwargs,
) -> "SaQC":
......@@ -532,15 +531,12 @@ class FlagtoolsMixin:
Parameters
----------
group:
A collection of ``SaQC`` objects to check for flags, defaults to the current object.
1. If given as a sequence of ``SaQC`` objects, all objects are checked for flags of a
variable named :py:attr:`field`.
2. If given as dictionary the keys are interpreted as ``SaQC`` objects and the corresponding
values as variables of the respective ``SaQC`` object to check for flags.
A collection of ``SaQC`` objects. Flag checks are performed on all ``SaQC`` objects
based on the variables specified in :py:attr:`field`. Whenever any of monitored variables
is flagged, the associated timestamps will receive a flag.
"""
return _groupOperation(
base=self,
saqc=self,
field=field,
target=target,
func=operator.or_,
......@@ -551,57 +547,101 @@ class FlagtoolsMixin:
def _groupOperation(
base: "SaQC",
field: str | list[str],
saqc: "SaQC",
field: str | Sequence[str | Sequence[str]],
func: Callable[[pd.Series, pd.Series], pd.Series],
group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
target: str | list[str] | None = None,
group: Sequence["SaQC"] | None = None,
target: str | Sequence[str | Sequence[str]] | None = None,
flag: float = BAD,
**kwargs,
) -> "SaQC":
"""
Perform a group operation on a collection of ``SaQC`` objects.
This function applies a specified function to perform a group operation on a collection
of `SaQC` objects. The operation involves checking specified :py:attr:`field` for flags,
and if satisfied, assigning a flag value to corresponding timestamps.
Parameters
----------
saqc :
The main `SaQC` object on which the output flags will be set.
field :
The field(s) to be checked for flags for all mebers of :py:attr:`group`.
func :
The function used to combine flags across the specified :py:attr:`field`
and :py:attr:`group`.
group :
A sequence of ``SaQC`` objects forming the group for the group operation.
If not provided, the operation is performed on the main ``SaQC`` object.
Raises
------
ValueError
If input lengths or conditions are invalid.
Notes
-----
- The `func` parameter should be a function that takes two boolean ``pd.Series`` objects,
representing information on existing flags, and return a boolean ``pd.Series`` that
representing the result od the elementwise logical combination of both.
"""
def _flatten(seq: Sequence[str | Sequence[str]]) -> list[str]:
out = []
for e in seq:
if isinstance(e, str):
out.append(e)
else: # Sequence[str]
out.extend(e)
return out
if target is None:
target = field
field, target = toSequence(field), toSequence(target)
if len(target) != 1 and len(target) != len(field):
raise ValueError(
"'target' needs to be a string or a sequence of the same length as 'field'"
if isinstance(group, dict):
warnings.warn(
"The option to pass dictionaries to 'group' is deprecated and will be removed in version 2.7",
DeprecationWarning,
)
group = list(group.keys())
fields = list(group.values())
fields = toSequence(field)
targets = toSequence(target)
# harmonise `group` to type dict[SaQC, list[str]]
if group is None:
group = {base: field}
if not isinstance(group, dict):
group = {base if isinstance(qc, str) else qc: field for qc in group}
for k, v in group.items():
group[k] = toSequence(v)
if group is None or not group:
group = [saqc]
fields_ = fields[:]
if len(fields_) == 1:
# to simplify the retrieval from all groups...
fields_ = fields * len(group)
if len(fields_) != len(group):
raise ValueError(
"'field' needs to be a string or a sequence of the same length as 'group'"
)
# generate mask
mask = pd.Series(dtype=bool)
dfilter = kwargs.get("dfilter", FILTER_ALL)
for qc, fields in group.items():
if set(field) - qc._flags.keys():
for qc, flds in zip(group, fields_):
if set(flds := toSequence(flds)) - qc._flags.keys():
raise KeyError(
f"one or more variable(s) in {field} are missing in given SaQC object"
f"Failed to find one or more of the given variable(s), got {field}"
)
for f in fields:
for f in flds:
flagged = isflagged(qc._flags[f], thresh=dfilter)
if mask.empty:
mask = flagged
mask = func(mask, flagged)
# initialize target(s)
if len(target) == 1:
if target[0] not in base._data:
base._data[target[0]] = pd.Series(np.nan, index=mask.index, name=target[0])
base._flags[target[0]] = pd.Series(np.nan, index=mask.index, name=target[0])
else:
for f, t in zip(field, target):
if t not in base._data:
base = base.copyField(field=f, target=t)
targets = _flatten(targets)
saqc = initializeTargets(saqc, _flatten(fields), targets, mask.index)
# write flags
for t in target:
base._flags[mask, t] = flag
for t in targets:
saqc._flags[mask, t] = flag
return base
return saqc
......@@ -8,28 +8,37 @@
from __future__ import annotations
import operator
from typing import TYPE_CHECKING, Callable
import warnings
from typing import TYPE_CHECKING, Callable, Literal
import numpy as np
import pandas as pd
from scipy.stats import median_abs_deviation
from saqc.constants import BAD
from saqc.core.register import flagging
from saqc.lib.checking import validateCallable, validateMinPeriods, validateWindow
from saqc.lib.checking import (
isCallable,
validateChoice,
validateMinPeriods,
validateWindow,
)
from saqc.lib.tools import isunflagged, statPass
STATS_DICT = {"std": np.std, "var": np.var, "mad": median_abs_deviation}
if TYPE_CHECKING:
from saqc import SaQC
class NoiseMixin:
@flagging()
def flagByStatLowPass(
self: "SaQC",
field: str,
func: Callable[[np.ndarray, pd.Series], float],
window: str | pd.Timedelta,
thresh: float,
func: Literal["std", "var", "mad"]
| Callable[[np.ndarray, pd.Series], float] = "std",
sub_window: str | pd.Timedelta | None = None,
sub_thresh: float | None = None,
min_periods: int | None = None,
......@@ -37,7 +46,9 @@ class NoiseMixin:
**kwargs,
) -> "SaQC":
"""
Flag data chunks of length ``window``, if:
Flag data chunks of length ``window`` dependent on the data deviation.
Flag data chunks of length ``window`` if
1. they excexceed ``thresh`` with regard to ``func`` and
2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``,
......@@ -46,7 +57,11 @@ class NoiseMixin:
Parameters
----------
func :
Aggregation function applied on every chunk.
Either a String value, determining the aggregation function applied on every chunk.
* 'std': standard deviation
* 'var': variance
* 'mad': median absolute deviation
Or a Callable function mapping 1 dimensional arraylikes onto scalars.
window :
Window (i.e. chunk) size.
......@@ -65,13 +80,86 @@ class NoiseMixin:
Minimum number of values needed in a chunk to perfom the test.
Ignored if ``window`` is an integer.
"""
validateCallable(func, "func")
warnings.warn(
"function 'flagByStatLowPass' is deprecated and will be removed in a future release, "
"use 'flagByScatterLowpass' instead.",
DeprecationWarning,
)
return self.flagByScatterLowpass(
field=field,
window=window,
thresh=thresh,
func=func,
sub_window=sub_window,
sub_thresh=sub_thresh,
min_periods=min_periods,
flag=flag,
)
@flagging()
def flagByScatterLowpass(
self: "SaQC",
field: str,
window: str | pd.Timedelta,
thresh: float,
func: Literal["std", "var", "mad"]
| Callable[[np.ndarray, pd.Series], float] = "std",
sub_window: str | pd.Timedelta | None = None,
sub_thresh: float | None = None,
min_periods: int | None = None,
flag: float = BAD,
**kwargs,
) -> "SaQC":
"""
Flag data chunks of length ``window`` dependent on the data deviation.
Flag data chunks of length ``window`` if
1. they excexceed ``thresh`` with regard to ``func`` and
2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``,
exceed ``sub_thresh`` with regard to ``func``
Parameters
----------
func :
Either a string, determining the aggregation function applied on every chunk
* 'std': standard deviation
* 'var': variance
* 'mad': median absolute deviation
Or a Callable, mapping 1 dimensional array likes onto scalars.
window :
Window (i.e. chunk) size.
thresh :
Threshold. A given chunk is flagged, if the return value of ``func`` excceeds ``thresh``.
sub_window :
Window size of sub chunks, that are additionally tested for exceeding ``sub_thresh``
with respect to ``func``.
sub_thresh :
Threshold. A given sub chunk is flagged, if the return value of ``func` excceeds ``sub_thresh``.
min_periods :
Minimum number of values needed in a chunk to perfom the test.
Ignored if ``window`` is an integer.
"""
if (not isCallable(func)) and (func not in ["std", "var", "mad"]):
raise TypeError(
f"Parameter 'func' must either be of type 'Callable' or one out of ['std', 'var', 'mad']. Got {func}."
)
validateWindow(window, allow_int=False)
validateMinPeriods(min_periods)
if sub_window is not None:
validateWindow(sub_window, "sub_window", allow_int=False)
sub_window = pd.Timedelta(sub_window)
if not isCallable(func):
func = STATS_DICT[func]
to_set = statPass(
datcol=self._data[field],
stat=func,
......
......@@ -179,7 +179,7 @@ class OutliersMixin:
thresh: Literal["auto"] | float = 1.5,
algorithm: Literal["ball_tree", "kd_tree", "brute", "auto"] = "ball_tree",
p: int = 1,
density: Literal["auto"] | float | Callable = "auto",
density: Literal["auto"] | float = "auto",
fill_na: bool = True,
flag: float = BAD,
**kwargs,
......@@ -245,8 +245,6 @@ class OutliersMixin:
equal to the median of the absolute diff of the variable to flag.
* ``float`` - introduces linear density with an increment
equal to :py:attr:`density`
* Callable - calculates the density by applying the function
passed onto the variable to flag (passed as Series).
fill_na :
If True, NaNs in the data are filled with a linear interpolation.
......
......@@ -425,7 +425,7 @@ class ScoresMixin:
n: int = 20,
algorithm: Literal["ball_tree", "kd_tree", "brute", "auto"] = "ball_tree",
p: int = 1,
density: Literal["auto"] | float | Callable = "auto",
density: Literal["auto"] | float = "auto",
fill_na: bool = True,
**kwargs,
) -> "SaQC":
......@@ -463,8 +463,6 @@ class ScoresMixin:
density :
How to calculate the temporal distance/density for the variable-to-be-flagged.
* `auto` - introduces linear density with an increment equal to the median of the absolute diff of the
variable to be flagged
* float - introduces linear density with an increment equal to `density`
* Callable - calculates the density by applying the function passed onto the variable to be flagged
(passed as Series).
......
......@@ -176,6 +176,8 @@ def makeFig(
ax_kwargs.pop("fontsize", None) or plt.rcParams["font.size"]
)
plt.rcParams["figure.figsize"] = FIG_KWARGS["figsize"]
# set default axis sharing behavior (share x axis over rows if not explicitly opted sharex=False):
sharex = False
if len(d) > 1:
......
......@@ -10,7 +10,6 @@ from __future__ import annotations
import collections
import functools
import itertools
import operator as op
import re
import warnings
from typing import (
......@@ -21,11 +20,9 @@ from typing import (
List,
Literal,
Sequence,
Tuple,
TypeVar,
Union,
get_args,
get_origin,
overload,
)
......@@ -34,11 +31,10 @@ import pandas as pd
from scipy import fft
from scipy.cluster.hierarchy import fcluster, linkage
from saqc import FILTER_ALL, UNFLAGGED
from saqc.lib.checking import _isLiteral
from saqc.lib.types import CompT
T = TypeVar("T")
def extractLiteral(lit: type(Literal)) -> List:
"""Return a list of values from a typing.Literal[...] at runtime."""
......@@ -47,12 +43,13 @@ def extractLiteral(lit: type(Literal)) -> List:
return list(get_args(lit))
T = TypeVar("T")
# fmt: off
@overload
def toSequence(value: T) -> List[T]:
def toSequence(value: Sequence[T]) -> List[T]:
...
@overload
def toSequence(value: Sequence[T]) -> List[T]:
def toSequence(value: T) -> List[T]:
...
def toSequence(value) -> List:
if value is None or isinstance(value, (str, float, int)):
......@@ -526,14 +523,13 @@ def filterKwargs(
return kwargs
from saqc import FILTER_ALL, UNFLAGGED
A = TypeVar("A", np.ndarray, pd.Series)
def isflagged(flagscol: A, thresh: float) -> A:
"""
Return a mask of flags accordingly to `thresh`. Return type is same as flags.
Check :py:attr:`flagscol` for flags according to :py:attr:`thresh`
Returns a boolean sequnce of the same type as :py:attr:`flagscol`
"""
if not isinstance(thresh, (float, int)):
raise TypeError(f"thresh must be of type float, not {repr(type(thresh))}")
......@@ -548,6 +544,34 @@ def isunflagged(flagscol: A, thresh: float) -> A:
return ~isflagged(flagscol, thresh)
def initializeTargets(
saqc,
fields: Sequence[str],
targets: Sequence[str],
index: pd.Index,
):
"""
Initialize all targets based on field.
Note
----
The following behavior is implemented:
1. n 'field', n 'target', n > 0 -> direct copy
2. n 'field', m 'target' mit n != m -> empty targets
"""
if len(fields) == len(targets):
for f, t in zip(fields, targets):
if f in saqc._data and t not in saqc._data:
# we might not have field in 'saqc'
saqc = saqc.copyField(field=f, target=t)
for t in targets:
if t not in saqc._data:
saqc._data[t] = pd.Series(np.nan, index=index, name=t)
saqc._flags[t] = pd.Series(np.nan, index=index, name=t)
return saqc
def getUnionIndex(obj, default: pd.DatetimeIndex | None = None):
assert hasattr(obj, "columns")
if default is None:
......
......@@ -4,6 +4,7 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
import itertools
import operator
import numpy as np
......@@ -14,6 +15,7 @@ from saqc import BAD as B
from saqc import UNFLAGGED as U
from saqc import SaQC
from saqc.funcs.flagtools import _groupOperation
from saqc.lib.tools import toSequence
N = np.nan
......@@ -140,64 +142,35 @@ def test_orGroup(left, right, expected):
@pytest.mark.parametrize(
"left,right,expected",
"field, target, expected, copy",
[
([B, U, U, B], [B, B, U, U], [B, B, U, B]),
([B, B, B, B], [B, B, B, B], [B, B, B, B]),
([U, U, U, U], [U, U, U, U], [U, U, U, U]),
("x", "a", [B, B, U, B], True),
(["y", "x"], "a", [B, B, U, B], False),
(["y", "x"], ["a", "b"], [B, B, U, B], True),
(["y", ["x", "y"]], "a", [B, B, B, B], False),
(["y", ["x", "y"]], ["c", ["a", "b"]], [B, B, B, B], True),
],
)
def test__groupOperationUnivariate(left, right, expected):
data = pd.DataFrame(
{"x": [0, 1, 2, 3], "y": [0, 11, 22, 33], "z": [0, 111, 222, 333]}
)
base = SaQC(data=data)
this = SaQC(
data=data, flags=pd.DataFrame({k: pd.Series(left) for k in data.columns})
def test__groupOperation(field, target, expected, copy):
base = SaQC(
data=pd.DataFrame(
{"x": [0, 1, 2, 3], "y": [0, 11, 22, 33], "z": [0, 111, 222, 333]}
),
flags=pd.DataFrame({"x": [B, U, U, B], "y": [B, B, U, U], "z": [B, B, U, B]}),
)
that = SaQC(
data=data, flags=pd.DataFrame({k: pd.Series(right) for k in data.columns})
)
result = _groupOperation(
base=base, field="x", func=operator.or_, group={this: "y", that: ["y", "z"]}
data=pd.DataFrame({"x": [0, 1, 2, 3], "y": [0, 11, 22, 33]}),
flags=pd.DataFrame({"x": [U, B, U, B], "y": [U, U, B, U]}),
)
assert pd.Series(expected).equals(result.flags["x"])
@pytest.mark.parametrize(
"left,right,expected",
[
(pd.Series([B, U, U, B]), pd.Series([B, B, U, U]), pd.Series([B, B, U, B])),
(pd.Series([B, B, B, B]), pd.Series([B, B, B, B]), pd.Series([B, B, B, B])),
(pd.Series([U, U, U, U]), pd.Series([U, U, U, U]), pd.Series([U, U, U, U])),
],
)
def test__groupOperationMultivariate(left, right, expected):
data = pd.DataFrame({"x": [0, 1, 2, 3], "y": [0, 11, 22, 33]})
flags = pd.DataFrame({"x": pd.Series(left), "y": pd.Series(right)})
qc = SaQC(data=data, flags=flags)
# multi fields, no target
result = _groupOperation(base=qc.copy(), field=["x", "y"], func=operator.or_)
for v in ["x", "y"]:
assert expected.equals(result.flags[v])
# multi fields, multi target
result = _groupOperation(
base=qc.copy(), target=["a", "b"], field=["x", "y"], func=operator.or_
)
for v in ["a", "b"]:
assert expected.equals(result.flags[v])
for v, e in zip(["x", "y"], [left, right]):
assert e.equals(result.flags[v])
# multi fields, single target
result = _groupOperation(
base=qc.copy(), target="a", field=["x", "y"], func=operator.or_
saqc=base, field=field, target=target, func=operator.or_, group=[base, that]
)
assert expected.equals(result.flags["a"])
assert result.data["a"].isna().all()
for v, e in zip(["x", "y"], [left, right]):
assert e.equals(result.flags[v])
targets = toSequence(itertools.chain.from_iterable(target))
for t in targets:
assert pd.Series(expected).equals(result.flags[t])
# check source-target behavior
if copy:
fields = toSequence(itertools.chain.from_iterable(field))
for f, t in zip(fields, targets):
assert (result._data[f] == result._data[t]).all(axis=None)
......@@ -34,8 +34,8 @@ def test_statPass():
data[200:210] = noise[:10]
data = DictOfSeries(data=data)
flags = initFlagsLike(data)
qc = SaQC(data, flags).flagByStatLowPass(
"data", np.std, "20D", 0.999, "5D", 0.999, 0, flag=BAD
qc = SaQC(data, flags).flagByScatterLowpass(
"data", "20D", 0.999, "std", "5D", 0.999, 0, flag=BAD
)
assert (qc.flags["data"].iloc[:100] == UNFLAGGED).all()
assert (qc.flags["data"].iloc[100:120] == BAD).all()
......