From d07f1e390ebdc6bdedc1bb1e23a5a331eddfba47 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Sun, 3 Mar 2024 16:57:21 +0100 Subject: [PATCH] updated doc --- saqc/funcs/outliers.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index ff6f629a7..2a5652ce6 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -179,8 +179,8 @@ class OutliersMixin: p: int = 1, density: Literal["auto"] | float = "auto", fill_na: bool = True, - slope_correct=True, - min_offset=None, + slope_correct: bool = True, + min_offset: float = None, flag: float = BAD, **kwargs, ) -> "SaQC": @@ -249,6 +249,15 @@ class OutliersMixin: fill_na : If True, NaNs in the data are filled with a linear interpolation. + slope_correct : + if True, a correction is applied, that removes outlier cluster that actually + just seem to be steep slopes + + min_offset : + If set, only those outlier cluster will be flagged, that are preceeded and succeeeded + by sufficiently large value "jumps". Defaults to estimating the sufficient value jumps from + the median over the absolute step sizes between data points. + See Also -------- :ref:`introduction to outlier detection with @@ -374,7 +383,7 @@ class OutliersMixin: g_mask = s_mask.diff() g_mask = g_mask.cumsum() dat = self._data[field] - od_groups = dat.groupby(by=g_mask) + od_groups = dat.interpolate('linear').groupby(by=g_mask) first_vals = od_groups.first() last_vals = od_groups.last() max_vals = od_groups.max() @@ -385,7 +394,9 @@ class OutliersMixin: eps = d_diff.abs().median() if eps == 0: eps = d_diff[d_diff != 0].abs().median() - eps = 3 * eps + else: + eps = density + eps = 3 * eps else: eps = min_offset up_slopes = (min_vals + eps >= last_vals.shift(1)) & ( -- GitLab