Compare revisions

David Schäfer · David Schäfer · Peter Lünenschloß · Peter Lünenschloß · Peter Lünenschloß · Peter Lünenschloß
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,30 +10,34 @@ SPDX-License-Identifier: GPL-3.0-or-later
 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
 ### Added
 - add multivariate plotting options to `plot`
- added `plot_kwargs` keyword to `plot` function 
+- added `plot_kwargs` keyword to `plot` function
 - added checks and unified error message for common inputs.
 - added command line `--version` option
 - `-ll` CLI option as a shorthand for `--log-level`
 - basic json support for CLI config files, which are detected by `.json`-extension.
 - `--json-field` CLI option to use a non-root element of a json file.
+- String Selection Options for function selection in `flagScatterLowPass` 
 ### Changed
 - pin pandas to versions >= 2.0
- parameter `fill_na` of `SaQC.flagUniLOF` and `SaQC.assignUniLOF` is now of type 
+- parameter `fill_na` of `SaQC.flagUniLOF` and `SaQC.assignUniLOF` is now of type
  `bool` instead of one of `[None, "linear"]`
 - in `plot` function: changed default color for single variables to `black` with `80% transparency`
 - in `plot` function: added seperate legend for flags
+- deprecated `flagStatLowPass` in favor of `flagScatterLowPass`

 ### Removed
 - removed deprecated `DictOfSeries.to_df`
 - removed plotting option with complete history (`history="complete"`)
 ### Fixed
- Bug in `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed 
+- Bug in `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed
  to `min_period` was only recognised if also `window` was a tuple.
 - `SaQC.propagateFlags` was overwriting existing flags
 ### Deprecated
- Deprecate `plot` parameter `phaseplot` in favor of usage with `mode="biplot"`
- Deprecate `plot` parameter `cyclestart` in favor of usage with `marker_kwargs`
- Deprecate option `"complete"` for `plot` funcs parameter `history`
+- `SaQC.andGroup`: option to pass dictionaries to the parameter `group`.
+- `SaQC.orGroup`: option to pass dictionaries to the parameter `group`.
+- `plot`: parameter `phaseplot` in favor of usage with `mode="biplot"`
+- `plot`: parameter `cyclestart` in favor of usage with `marker_kwargs`
+- `plot`: option `"complete"` for parameter `history`

 ## [2.4.1](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22
 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
@@ -63,7 +67,7 @@ SPDX-License-Identifier: GPL-3.0-or-later
 - `func` arguments in text configurations were not parsed correctly
 - fail on duplicated arguments to test methods
 - `reample` was not writing meta entries
- `flagByStatLowPass` was overwriting existing flags
+- `flagByScatterLowpass` was overwriting existing flags
 - `flagUniLOF` and `flagLOF` were overwriting existing flags
 ### Deprecated
 - Deprecate `flagMVScore` parameters: `partition` in favor of `window`, `partition_min` in favor of `min_periods`, `min_periods` in favor of `min_periods_r`

--- a/docs/funcs/distributionalAnomalies.rst
+++ b/docs/funcs/distributionalAnomalies.rst
@@ -13,4 +13,4 @@ Change Points and Noise

   ~SaQC.flagChangePoints
   ~SaQC.assignChangePointCluster
-   ~SaQC.flagByStatLowPass
+   ~SaQC.flagByScatterLowpass
--- a/saqc/funcs/flagtools.py
+++ b/saqc/funcs/flagtools.py
@@ -9,7 +9,7 @@ from __future__ import annotations

 import operator
 import warnings
-from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
+from typing import TYPE_CHECKING, Any, Callable, Sequence

 import numpy as np
 import pandas as pd
@@ -18,7 +18,7 @@ from typing_extensions import Literal
 from saqc import BAD, FILTER_ALL, UNFLAGGED
 from saqc.core import DictOfSeries, flagging, register
 from saqc.lib.checking import validateChoice, validateWindow
-from saqc.lib.tools import isflagged, isunflagged, toSequence
+from saqc.lib.tools import initializeTargets, isflagged, isunflagged, toSequence

 if TYPE_CHECKING:
    from saqc import SaQC
@@ -93,7 +93,7 @@ class FlagtoolsMixin:
    def flagManual(
        self: "SaQC",
        field: str,
-        mdata: pd.Series | pd.DataFrame | DictOfSeries | list | np.ndarray,
+        mdata: str | pd.Series | np.ndarray | list | pd.DataFrame | DictOfSeries,
        method: Literal[
            "left-open", "right-open", "closed", "plain", "ontime"
        ] = "left-open",
@@ -103,61 +103,64 @@ class FlagtoolsMixin:
        **kwargs,
    ) -> "SaQC":
        """
-        Flag data by given, "manually generated" data.
+        Include flags listed in external data.

-        The data is flagged at locations where `mdata` is equal to a provided
-        flag (`mflag`). The format of mdata can be an indexed object,
-        like pd.Series, pd.Dataframe or dios.DictOfSeries, but also can
-        be a plain list- or array-like. How indexed mdata is aligned to
-        data is specified via the `method` parameter.
+        The method allows to integrate pre-existing flagging information.

        Parameters
        ----------
        mdata :
-            The Data determining, wich intervals are to be flagged, or a
-            string, denoting under which field the data is
-            accessable.
+            Determines which values or intervals will be flagged. Supported input types:

-        method :
-            Defines how mdata is projected on data. Except for the 'plain'
-            method, the methods assume mdata to have an index.
-
-            * 'plain': mdata must have the same length as data and is
-                projected one-to-one on data.
-            * 'ontime': works only with indexed mdata. mdata entries are
-                matched with data entries that have the same index.
-            * 'right-open': mdata defines intervals, values are to be
-                projected on. The intervals are defined,
-
-                (1) Either, by any two consecutive timestamps t_1 and 1_2
-                    where t_1 is valued with mflag, or by a series,
-                (2) Or, a Series, where the index contains in the t1 timestamps
-                    and the values the respective t2 stamps.
+            * ``pd.Series``: Needs a datetime index and values of type:

-                The value at t_1 gets projected onto all data timestamps t,
-                with t_1 <= t < t_2.
-
-            * 'left-open': like 'right-open', but the projected interval
-                now covers all t with t_1 < t <= t_2.
-            * 'closed': like 'right-open', but the projected interval
-                now covers all t with t_1 <= t <= t_2.
+              - datetime, for :py:attr:`method` values ``"right-closed"``, ``"left-closed"``, ``"closed"``
+              - or any scalar, for :py:attr:`method` values ``"plain"``, ``"ontime"``

+            * ``str``: Variable holding the manual flag information.
+            * ``pd.DataFrame``, ``DictOfSeries``: Need to provide a ``pd.Series`` with column name
+              :py:attr:`field`.
+            * ``list``, ``np.ndarray``: Only supported with :py:attr:`method` value ``"plain"`` and
+              :py:attr:`mformat` value ``"mflag"``
+        method :
+            Defines how :py:attr:`mdata` is projected to data:
+
+            * ``"plain"``: :py:attr:`mdata` must have the same length as :py:attr:`field`, flags
+              are set, where the values in :py:attr:`mdata` equal :py:attr:`mflag`.
+            * ``"ontime"``: Expects datetime indexed :py:attr:`mdata` (types ``pd.Series``,
+              ``pd.DataFrame``, ``DictOfSeries``). Flags are set, where the values in
+              :py:attr:`mdata` equal :py:attr:`mflag` and the indices of :py:attr:`field` and
+              :py:attr:`mdata` match.
+            * ``"right-open"``: Expects datetime indexed :py:attr:`mdata`, which will be interpreted
+              as a number of time intervals ``t_1, t_2``. Flags are set to all timestamps ``t`` of
+              :py:attr:`field` with ``t_1 <= t < t_2``.
+            * ``"left-open"``: like ``"right-open"``, but the interval covers all ``t`` with
+              ``t_1 < t <= t_2``.
+            * ``"closed"``: like ``"right-open"``, but the interval now covers all ``t`` with
+              ``t_1 <= t <= t_2``.
        mformat :
+            Controls the interval definition in :py:attr:`mdata` (see examples):

-            * "start-end": mdata is a Series, where every entry indicates
-                an interval to-flag. The index defines the left bound,
-                the value defines the right bound.
-            * "mflag": mdata is an array like, with entries containing
-                'mflag',where flags shall be set. See documentation for
-                examples.
+            * ``"start-end"``: expects datetime indexed :py:attr:`mdata` (types ``pd.Series``,
+              ``pd.DataFrame``, ``DictOfSeries``) with values of type datetime. Each
+              index-value pair is interpreted as an interval to flag, the index defines the
+              left bound, the respective value the right bound.
+            * ``"mflag"``:
+
+              - :py:attr:`mdata` of type ``pd.Series``, ``pd.DataFrame``, ``DictOfSeries``:
+                Two successive index values ``i_1, i_2`` will be interpreted as an interval
+                ``t_1, t_2`` to flag, if the value of ``t_1`` equals :py:attr:`mflag`
+              - :py:attr:`mdata` of type ``list``, ``np.ndarray``: Flags all :py:attr:`field`
+                where :py:attr:`mdata` euqals :py:attr:`mflag`.

        mflag :
-            The flag that indicates data points in `mdata`, of wich the
-            projection in data should be flagged.
+            Value in :py:attr:`mdata` indicating that a flag should be set at the respective
+            position, timestamp or interval. Ignored if :py:attr:`mformat` is set to ``"start-end"``.
+

        Examples
        --------
-        An example for mdata
+        Usage of :py:attr:`mdata`

        .. doctest:: ExampleFlagManual

@@ -169,9 +172,8 @@ class FlagtoolsMixin:
           2000-05-01    1
           dtype: int64

-        On *dayly* data, with the 'ontime' method, only the provided timestamps
-        are used. Bear in mind that only exact timestamps apply, any offset
-        will result in ignoring the timestamp.
+        On *daily* data, with :py:attr:`method` ``"ontime"``, only the provided timestamps
+        are used. Only exact matches apply, offsets will be ignored.

        .. doctest:: ExampleFlagManual

@@ -186,7 +188,7 @@ class FlagtoolsMixin:
           2000-05-01     True
           dtype: bool

-        With the 'right-open' method, the mdata is forward fill:
+        With :py:attr:`method` ``"right-open"`` , :py:attr:`mdata` is forward filled:

        .. doctest:: ExampleFlagManual

@@ -199,7 +201,7 @@ class FlagtoolsMixin:
           2000-05-01     True
           dtype: bool

-        With the 'left-open' method, backward filling is used:
+        With :py:attr:`method` ``"left-open"`` , :py:attr:`mdata` is backward filled:

        .. doctest:: ExampleFlagManual

@@ -482,9 +484,9 @@ class FlagtoolsMixin:
    )
    def andGroup(
        self: "SaQC",
-        field: str | list[str],
-        group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
-        target: str | None = None,
+        field: str | list[str | list[str]],
+        group: Sequence["SaQC"] | None = None,
+        target: str | list[str | list[str]] | None = None,
        flag: float = BAD,
        **kwargs,
    ) -> "SaQC":
@@ -494,15 +496,12 @@ class FlagtoolsMixin:
        Parameters
        ----------
        group:
-            A collection of ``SaQC`` objects to check for flags, defaults to the current object.
-
-            1. If given as a sequence of ``SaQC`` objects, all objects are checked for flags of a
-               variable named :py:attr:`field`.
-            2. If given as dictionary the keys are interpreted as ``SaQC`` objects and the corresponding
-               values as variables of the respective ``SaQC`` object to check for flags.
+            A collection of ``SaQC`` objects. Flag checks are performed on all ``SaQC`` objects
+            based on the variables specified in :py:attr:`field`. Whenever all monitored variables
+            are flagged, the associated timestamps will receive a flag.
        """
        return _groupOperation(
-            base=self,
+            saqc=self,
            field=field,
            target=target,
            func=operator.and_,
@@ -520,9 +519,9 @@ class FlagtoolsMixin:
    )
    def orGroup(
        self: "SaQC",
-        field: str | list[str],
-        group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
-        target: str | None = None,
+        field: str | list[str | list[str]],
+        group: Sequence["SaQC"] | None = None,
+        target: str | list[str | list[str]] | None = None,
        flag: float = BAD,
        **kwargs,
    ) -> "SaQC":
@@ -532,15 +531,12 @@ class FlagtoolsMixin:
        Parameters
        ----------
        group:
-            A collection of ``SaQC`` objects to check for flags, defaults to the current object.
-
-            1. If given as a sequence of ``SaQC`` objects, all objects are checked for flags of a
-               variable named :py:attr:`field`.
-            2. If given as dictionary the keys are interpreted as ``SaQC`` objects and the corresponding
-               values as variables of the respective ``SaQC`` object to check for flags.
+            A collection of ``SaQC`` objects. Flag checks are performed on all ``SaQC`` objects
+            based on the variables specified in :py:attr:`field`. Whenever any of monitored variables
+            is flagged, the associated timestamps will receive a flag.
        """
        return _groupOperation(
-            base=self,
+            saqc=self,
            field=field,
            target=target,
            func=operator.or_,
@@ -551,57 +547,101 @@ class FlagtoolsMixin:


 def _groupOperation(
-    base: "SaQC",
-    field: str | list[str],
+    saqc: "SaQC",
+    field: str | Sequence[str | Sequence[str]],
    func: Callable[[pd.Series, pd.Series], pd.Series],
-    group: Sequence["SaQC"] | dict["SaQC", str | Sequence[str]] | None = None,
-    target: str | list[str] | None = None,
+    group: Sequence["SaQC"] | None = None,
+    target: str | Sequence[str | Sequence[str]] | None = None,
    flag: float = BAD,
    **kwargs,
 ) -> "SaQC":
+    """
+    Perform a group operation on a collection of ``SaQC`` objects.
+
+    This function applies a specified function to perform a group operation on a collection
+    of `SaQC` objects. The operation involves checking specified :py:attr:`field` for flags,
+    and if satisfied, assigning a flag value to corresponding timestamps.
+
+    Parameters
+    ----------
+    saqc :
+        The main `SaQC` object on which the output flags will be set.
+    field :
+        The field(s) to be checked for flags for all mebers of :py:attr:`group`.
+    func :
+        The function used to combine flags across the specified :py:attr:`field`
+        and :py:attr:`group`.
+    group :
+        A sequence of ``SaQC`` objects forming the group for the group operation.
+        If not provided, the operation is performed on the main ``SaQC`` object.
+
+    Raises
+    ------
+    ValueError
+        If input lengths or conditions are invalid.
+
+    Notes
+    -----
+    - The `func` parameter should be a function that takes two boolean ``pd.Series`` objects,
+      representing information on existing flags, and return a boolean ``pd.Series`` that
+      representing the result od the elementwise logical combination of both.
+    """
+
+    def _flatten(seq: Sequence[str | Sequence[str]]) -> list[str]:
+        out = []
+        for e in seq:
+            if isinstance(e, str):
+                out.append(e)
+            else:  # Sequence[str]
+                out.extend(e)
+        return out
+
    if target is None:
        target = field
-    field, target = toSequence(field), toSequence(target)

-    if len(target) != 1 and len(target) != len(field):
-        raise ValueError(
-            "'target' needs to be a string or a sequence of the same length as 'field'"
+    if isinstance(group, dict):
+        warnings.warn(
+            "The option to pass dictionaries to 'group' is deprecated and will be removed in version 2.7",
+            DeprecationWarning,
        )
+        group = list(group.keys())
+        fields = list(group.values())
+
+    fields = toSequence(field)
+    targets = toSequence(target)

-    # harmonise `group` to type dict[SaQC, list[str]]
-    if group is None:
-        group = {base: field}
-    if not isinstance(group, dict):
-        group = {base if isinstance(qc, str) else qc: field for qc in group}
-    for k, v in group.items():
-        group[k] = toSequence(v)
+    if group is None or not group:
+        group = [saqc]
+
+    fields_ = fields[:]
+    if len(fields_) == 1:
+        # to simplify the retrieval from all groups...
+        fields_ = fields * len(group)
+
+    if len(fields_) != len(group):
+        raise ValueError(
+            "'field' needs to be a string or a sequence of the same length as 'group'"
+        )

    # generate mask
    mask = pd.Series(dtype=bool)
    dfilter = kwargs.get("dfilter", FILTER_ALL)
-    for qc, fields in group.items():
-        if set(field) - qc._flags.keys():
+    for qc, flds in zip(group, fields_):
+        if set(flds := toSequence(flds)) - qc._flags.keys():
            raise KeyError(
-                f"one or more variable(s) in {field} are missing in given SaQC object"
+                f"Failed to find one or more of the given variable(s), got {field}"
            )
-        for f in fields:
+        for f in flds:
            flagged = isflagged(qc._flags[f], thresh=dfilter)
            if mask.empty:
                mask = flagged
            mask = func(mask, flagged)

-    # initialize target(s)
-    if len(target) == 1:
-        if target[0] not in base._data:
-            base._data[target[0]] = pd.Series(np.nan, index=mask.index, name=target[0])
-            base._flags[target[0]] = pd.Series(np.nan, index=mask.index, name=target[0])
-    else:
-        for f, t in zip(field, target):
-            if t not in base._data:
-                base = base.copyField(field=f, target=t)
+    targets = _flatten(targets)
+    saqc = initializeTargets(saqc, _flatten(fields), targets, mask.index)

    # write flags
-    for t in target:
-        base._flags[mask, t] = flag
+    for t in targets:
+        saqc._flags[mask, t] = flag

-    return base
+    return saqc
--- a/saqc/funcs/noise.py
+++ b/saqc/funcs/noise.py
@@ -8,28 +8,37 @@
 from __future__ import annotations

 import operator
-from typing import TYPE_CHECKING, Callable
+import warnings
+from typing import TYPE_CHECKING, Callable, Literal

 import numpy as np
 import pandas as pd
+from scipy.stats import median_abs_deviation

 from saqc.constants import BAD
 from saqc.core.register import flagging
-from saqc.lib.checking import validateCallable, validateMinPeriods, validateWindow
+from saqc.lib.checking import (
+    isCallable,
+    validateChoice,
+    validateMinPeriods,
+    validateWindow,
+)
 from saqc.lib.tools import isunflagged, statPass

+STATS_DICT = {"std": np.std, "var": np.var, "mad": median_abs_deviation}
+
 if TYPE_CHECKING:
    from saqc import SaQC


 class NoiseMixin:
-    @flagging()
    def flagByStatLowPass(
        self: "SaQC",
        field: str,
-        func: Callable[[np.ndarray, pd.Series], float],
        window: str | pd.Timedelta,
        thresh: float,
+        func: Literal["std", "var", "mad"]
+        | Callable[[np.ndarray, pd.Series], float] = "std",
        sub_window: str | pd.Timedelta | None = None,
        sub_thresh: float | None = None,
        min_periods: int | None = None,
@@ -37,7 +46,9 @@ class NoiseMixin:
        **kwargs,
    ) -> "SaQC":
        """
-        Flag data chunks of length ``window``, if:
+        Flag data chunks of length ``window`` dependent on the data deviation.
+
+        Flag data chunks of length ``window`` if

        1. they excexceed ``thresh`` with regard to ``func`` and
        2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``,
@@ -46,7 +57,11 @@ class NoiseMixin:
        Parameters
        ----------
        func :
-            Aggregation function applied on every chunk.
+            Either a String value, determining the aggregation function applied on every chunk.
+            * 'std': standard deviation
+            * 'var': variance
+            * 'mad': median absolute deviation
+            Or a Callable function mapping 1 dimensional arraylikes onto scalars.

        window :
            Window (i.e. chunk) size.
@@ -65,13 +80,86 @@ class NoiseMixin:
            Minimum number of values needed in a chunk to perfom the test.
            Ignored if ``window`` is an integer.
        """
-        validateCallable(func, "func")
+        warnings.warn(
+            "function 'flagByStatLowPass' is deprecated and will be removed in a future release, "
+            "use 'flagByScatterLowpass' instead.",
+            DeprecationWarning,
+        )
+
+        return self.flagByScatterLowpass(
+            field=field,
+            window=window,
+            thresh=thresh,
+            func=func,
+            sub_window=sub_window,
+            sub_thresh=sub_thresh,
+            min_periods=min_periods,
+            flag=flag,
+        )
+
+    @flagging()
+    def flagByScatterLowpass(
+        self: "SaQC",
+        field: str,
+        window: str | pd.Timedelta,
+        thresh: float,
+        func: Literal["std", "var", "mad"]
+        | Callable[[np.ndarray, pd.Series], float] = "std",
+        sub_window: str | pd.Timedelta | None = None,
+        sub_thresh: float | None = None,
+        min_periods: int | None = None,
+        flag: float = BAD,
+        **kwargs,
+    ) -> "SaQC":
+        """
+        Flag data chunks of length ``window`` dependent on the data deviation.
+
+        Flag data chunks of length ``window`` if
+
+        1. they excexceed ``thresh`` with regard to ``func`` and
+        2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``,
+           exceed ``sub_thresh`` with regard to ``func``
+
+        Parameters
+        ----------
+        func :
+            Either a string, determining the aggregation function applied on every chunk
+            * 'std': standard deviation
+            * 'var': variance
+            * 'mad': median absolute deviation
+            Or a Callable, mapping 1 dimensional array likes onto scalars.
+
+        window :
+            Window (i.e. chunk) size.
+
+        thresh :
+            Threshold. A given chunk is flagged, if the return value of ``func`` excceeds ``thresh``.
+
+        sub_window :
+            Window size of sub chunks, that are additionally tested for exceeding ``sub_thresh``
+            with respect to ``func``.
+
+        sub_thresh :
+            Threshold. A given sub chunk is flagged, if the return value of ``func` excceeds ``sub_thresh``.
+
+        min_periods :
+            Minimum number of values needed in a chunk to perfom the test.
+            Ignored if ``window`` is an integer.
+        """
+        if (not isCallable(func)) and (func not in ["std", "var", "mad"]):
+            raise TypeError(
+                f"Parameter 'func' must either be of type 'Callable' or one out of ['std', 'var', 'mad']. Got {func}."
+            )
+
        validateWindow(window, allow_int=False)
        validateMinPeriods(min_periods)
        if sub_window is not None:
            validateWindow(sub_window, "sub_window", allow_int=False)
            sub_window = pd.Timedelta(sub_window)

+        if not isCallable(func):
+            func = STATS_DICT[func]
+
        to_set = statPass(
            datcol=self._data[field],
            stat=func,

--- a/saqc/funcs/outliers.py
+++ b/saqc/funcs/outliers.py
@@ -179,7 +179,7 @@ class OutliersMixin:
        thresh: Literal["auto"] | float = 1.5,
        algorithm: Literal["ball_tree", "kd_tree", "brute", "auto"] = "ball_tree",
        p: int = 1,
-        density: Literal["auto"] | float | Callable = "auto",
+        density: Literal["auto"] | float = "auto",
        fill_na: bool = True,
        flag: float = BAD,
        **kwargs,
@@ -245,8 +245,6 @@ class OutliersMixin:
              equal to the median of the absolute diff of the variable to flag.
            * ``float`` - introduces linear density with an increment
              equal to :py:attr:`density`
-            * Callable - calculates the density by applying the function
-              passed onto the variable to flag (passed as Series).

        fill_na :
            If True, NaNs in the data are filled with a linear interpolation.

--- a/saqc/funcs/scores.py
+++ b/saqc/funcs/scores.py
@@ -425,7 +425,7 @@ class ScoresMixin:
        n: int = 20,
        algorithm: Literal["ball_tree", "kd_tree", "brute", "auto"] = "ball_tree",
        p: int = 1,
-        density: Literal["auto"] | float | Callable = "auto",
+        density: Literal["auto"] | float = "auto",
        fill_na: bool = True,
        **kwargs,
    ) -> "SaQC":
@@ -463,8 +463,6 @@ class ScoresMixin:
        density :
            How to calculate the temporal distance/density for the variable-to-be-flagged.

-            * `auto` - introduces linear density with an increment equal to the median of the absolute diff of the
-              variable to be flagged
            * float - introduces linear density with an increment equal to `density`
            * Callable - calculates the density by applying the function passed onto the variable to be flagged
              (passed as Series).

--- a/saqc/lib/plotting.py
+++ b/saqc/lib/plotting.py
@@ -176,6 +176,8 @@ def makeFig(
        ax_kwargs.pop("fontsize", None) or plt.rcParams["font.size"]
    )

+    plt.rcParams["figure.figsize"] = FIG_KWARGS["figsize"]
+
    # set default axis sharing behavior (share x axis over rows if not explicitly opted sharex=False):
    sharex = False
    if len(d) > 1:

--- a/saqc/lib/tools.py
+++ b/saqc/lib/tools.py
@@ -10,7 +10,6 @@ from __future__ import annotations
 import collections
 import functools
 import itertools
-import operator as op
 import re
 import warnings
 from typing import (
@@ -21,11 +20,9 @@ from typing import (
    List,
    Literal,
    Sequence,
-    Tuple,
    TypeVar,
    Union,
    get_args,
-    get_origin,
    overload,
 )

@@ -34,11 +31,10 @@ import pandas as pd
 from scipy import fft
 from scipy.cluster.hierarchy import fcluster, linkage

+from saqc import FILTER_ALL, UNFLAGGED
 from saqc.lib.checking import _isLiteral
 from saqc.lib.types import CompT

-T = TypeVar("T")
-

 def extractLiteral(lit: type(Literal)) -> List:
    """Return a list of values from a typing.Literal[...] at runtime."""
@@ -47,12 +43,13 @@ def extractLiteral(lit: type(Literal)) -> List:
    return list(get_args(lit))


+T = TypeVar("T")
 # fmt: off
 @overload
-def toSequence(value: T) -> List[T]:
+def toSequence(value: Sequence[T]) -> List[T]:
    ...
 @overload
-def toSequence(value: Sequence[T]) -> List[T]:
+def toSequence(value: T) -> List[T]:
    ...
 def toSequence(value) -> List:
    if value is None or isinstance(value, (str, float, int)):
@@ -526,14 +523,13 @@ def filterKwargs(
    return kwargs


-from saqc import FILTER_ALL, UNFLAGGED
-
 A = TypeVar("A", np.ndarray, pd.Series)


 def isflagged(flagscol: A, thresh: float) -> A:
    """
-    Return a mask of flags accordingly to `thresh`. Return type is same as flags.
+    Check :py:attr:`flagscol` for flags according to :py:attr:`thresh`
+    Returns a boolean sequnce of the same type as :py:attr:`flagscol`
    """
    if not isinstance(thresh, (float, int)):
        raise TypeError(f"thresh must be of type float, not {repr(type(thresh))}")
@@ -548,6 +544,34 @@ def isunflagged(flagscol: A, thresh: float) -> A:
    return ~isflagged(flagscol, thresh)


+def initializeTargets(
+    saqc,
+    fields: Sequence[str],
+    targets: Sequence[str],
+    index: pd.Index,
+):
+    """
+    Initialize all targets based on field.
+
+    Note
+    ----
+    The following behavior is implemented:
+    1. n 'field', n 'target', n > 0     -> direct copy
+    2. n 'field', m 'target' mit n != m -> empty targets
+    """
+    if len(fields) == len(targets):
+        for f, t in zip(fields, targets):
+            if f in saqc._data and t not in saqc._data:
+                # we might not have field in 'saqc'
+                saqc = saqc.copyField(field=f, target=t)
+    for t in targets:
+        if t not in saqc._data:
+            saqc._data[t] = pd.Series(np.nan, index=index, name=t)
+            saqc._flags[t] = pd.Series(np.nan, index=index, name=t)
+
+    return saqc
+
+
 def getUnionIndex(obj, default: pd.DatetimeIndex | None = None):
    assert hasattr(obj, "columns")
    if default is None:

--- a/tests/funcs/test_flagtools.py
+++ b/tests/funcs/test_flagtools.py
@@ -4,6 +4,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later

+import itertools
 import operator

 import numpy as np
@@ -14,6 +15,7 @@ from saqc import BAD as B
 from saqc import UNFLAGGED as U
 from saqc import SaQC
 from saqc.funcs.flagtools import _groupOperation
+from saqc.lib.tools import toSequence

 N = np.nan

@@ -140,64 +142,35 @@ def test_orGroup(left, right, expected):


 @pytest.mark.parametrize(
-    "left,right,expected",
+    "field, target, expected, copy",
    [
-        ([B, U, U, B], [B, B, U, U], [B, B, U, B]),
-        ([B, B, B, B], [B, B, B, B], [B, B, B, B]),
-        ([U, U, U, U], [U, U, U, U], [U, U, U, U]),
+        ("x", "a", [B, B, U, B], True),
+        (["y", "x"], "a", [B, B, U, B], False),
+        (["y", "x"], ["a", "b"], [B, B, U, B], True),
+        (["y", ["x", "y"]], "a", [B, B, B, B], False),
+        (["y", ["x", "y"]], ["c", ["a", "b"]], [B, B, B, B], True),
    ],
 )
-def test__groupOperationUnivariate(left, right, expected):
-    data = pd.DataFrame(
-        {"x": [0, 1, 2, 3], "y": [0, 11, 22, 33], "z": [0, 111, 222, 333]}
-    )
-    base = SaQC(data=data)
-    this = SaQC(
-        data=data, flags=pd.DataFrame({k: pd.Series(left) for k in data.columns})
+def test__groupOperation(field, target, expected, copy):
+    base = SaQC(
+        data=pd.DataFrame(
+            {"x": [0, 1, 2, 3], "y": [0, 11, 22, 33], "z": [0, 111, 222, 333]}
+        ),
+        flags=pd.DataFrame({"x": [B, U, U, B], "y": [B, B, U, U], "z": [B, B, U, B]}),
    )
    that = SaQC(
-        data=data, flags=pd.DataFrame({k: pd.Series(right) for k in data.columns})
-    )
-    result = _groupOperation(
-        base=base, field="x", func=operator.or_, group={this: "y", that: ["y", "z"]}
+        data=pd.DataFrame({"x": [0, 1, 2, 3], "y": [0, 11, 22, 33]}),
+        flags=pd.DataFrame({"x": [U, B, U, B], "y": [U, U, B, U]}),
    )
-
-    assert pd.Series(expected).equals(result.flags["x"])
-
-
-@pytest.mark.parametrize(
-    "left,right,expected",
-    [
-        (pd.Series([B, U, U, B]), pd.Series([B, B, U, U]), pd.Series([B, B, U, B])),
-        (pd.Series([B, B, B, B]), pd.Series([B, B, B, B]), pd.Series([B, B, B, B])),
-        (pd.Series([U, U, U, U]), pd.Series([U, U, U, U]), pd.Series([U, U, U, U])),
-    ],
-)
-def test__groupOperationMultivariate(left, right, expected):
-    data = pd.DataFrame({"x": [0, 1, 2, 3], "y": [0, 11, 22, 33]})
-    flags = pd.DataFrame({"x": pd.Series(left), "y": pd.Series(right)})
-
-    qc = SaQC(data=data, flags=flags)
-
-    # multi fields, no target
-    result = _groupOperation(base=qc.copy(), field=["x", "y"], func=operator.or_)
-    for v in ["x", "y"]:
-        assert expected.equals(result.flags[v])
-
-    # multi fields, multi target
-    result = _groupOperation(
-        base=qc.copy(), target=["a", "b"], field=["x", "y"], func=operator.or_
-    )
-    for v in ["a", "b"]:
-        assert expected.equals(result.flags[v])
-    for v, e in zip(["x", "y"], [left, right]):
-        assert e.equals(result.flags[v])
-
-    # multi fields, single target
    result = _groupOperation(
-        base=qc.copy(), target="a", field=["x", "y"], func=operator.or_
+        saqc=base, field=field, target=target, func=operator.or_, group=[base, that]
    )
-    assert expected.equals(result.flags["a"])
-    assert result.data["a"].isna().all()
-    for v, e in zip(["x", "y"], [left, right]):
-        assert e.equals(result.flags[v])
+    targets = toSequence(itertools.chain.from_iterable(target))
+    for t in targets:
+        assert pd.Series(expected).equals(result.flags[t])
+
+    # check source-target behavior
+    if copy:
+        fields = toSequence(itertools.chain.from_iterable(field))
+        for f, t in zip(fields, targets):
+            assert (result._data[f] == result._data[t]).all(axis=None)
--- a/tests/funcs/test_functions.py
+++ b/tests/funcs/test_functions.py
@@ -34,8 +34,8 @@ def test_statPass():
    data[200:210] = noise[:10]
    data = DictOfSeries(data=data)
    flags = initFlagsLike(data)
-    qc = SaQC(data, flags).flagByStatLowPass(
-        "data", np.std, "20D", 0.999, "5D", 0.999, 0, flag=BAD
+    qc = SaQC(data, flags).flagByScatterLowpass(
+        "data", "20D", 0.999, "std", "5D", 0.999, 0, flag=BAD
    )
    assert (qc.flags["data"].iloc[:100] == UNFLAGGED).all()
    assert (qc.flags["data"].iloc[100:120] == BAD).all()
No results found