diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index ff6f629a7f0685e30db7c1643cba8e4c7c8c9d87..2a5652ce6920dd5773cde2337d231e586518ccfd 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -179,8 +179,8 @@ class OutliersMixin: p: int = 1, density: Literal["auto"] | float = "auto", fill_na: bool = True, - slope_correct=True, - min_offset=None, + slope_correct: bool = True, + min_offset: float = None, flag: float = BAD, **kwargs, ) -> "SaQC": @@ -249,6 +249,15 @@ class OutliersMixin: fill_na : If True, NaNs in the data are filled with a linear interpolation. + slope_correct : + if True, a correction is applied, that removes outlier cluster that actually + just seem to be steep slopes + + min_offset : + If set, only those outlier cluster will be flagged, that are preceeded and succeeeded + by sufficiently large value "jumps". Defaults to estimating the sufficient value jumps from + the median over the absolute step sizes between data points. + See Also -------- :ref:`introduction to outlier detection with @@ -374,7 +383,7 @@ class OutliersMixin: g_mask = s_mask.diff() g_mask = g_mask.cumsum() dat = self._data[field] - od_groups = dat.groupby(by=g_mask) + od_groups = dat.interpolate('linear').groupby(by=g_mask) first_vals = od_groups.first() last_vals = od_groups.last() max_vals = od_groups.max() @@ -385,7 +394,9 @@ class OutliersMixin: eps = d_diff.abs().median() if eps == 0: eps = d_diff[d_diff != 0].abs().median() - eps = 3 * eps + else: + eps = density + eps = 3 * eps else: eps = min_offset up_slopes = (min_vals + eps >= last_vals.shift(1)) & (