From 1cc97f49c637cf7079cc4923e4b2759017271484 Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Thu, 22 Apr 2021 14:04:15 +0200 Subject: [PATCH] bfx-es --- saqc/core/modules/noise.py | 2 +- saqc/funcs/noise.py | 36 +++++++++++++++++++++++++++++++++--- saqc/lib/tools.py | 2 -- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/saqc/core/modules/noise.py b/saqc/core/modules/noise.py index 009b19a9a..d6f45bf1f 100644 --- a/saqc/core/modules/noise.py +++ b/saqc/core/modules/noise.py @@ -21,4 +21,4 @@ class Noise(ModuleBase): min_periods: PositiveInt = None, flag: float = BAD ) -> SaQC: - return self.defer("flagByVarianceLowPass", locals()) + return self.defer("flagByStatLowPass", locals()) diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py index 8047329d4..1bddf788b 100644 --- a/saqc/funcs/noise.py +++ b/saqc/funcs/noise.py @@ -11,7 +11,7 @@ from saqc.lib.tools import statPass @register(masking='field', module="noise") -def flagByLowPass(data: DictOfSeries, +def flagByStatLowPass(data: DictOfSeries, field: ColumnName, flags: Flags, stat: Callable[[np.array, pd.Series], float], @@ -22,14 +22,44 @@ def flagByLowPass(data: DictOfSeries, min_periods: PositiveInt = None, flag: float = BAD, **kwargs): - """""" + """ + Flag *chunks* of length, `wnsz`: + + 1. If they excexceed `thresh` with regard to `stat`: + 2. If all (maybe overlapping) *sub-chunks* of *chunk*, with length `sub_wnsz`, + `excexceed `sub_thresh` with regard to `stat`: + + Parameters + ---------- + data : dios.DictOfSeries + A dictionary of pandas.Series, holding all the data. + field : str + The fieldname of the column, holding the data-to-be-flagged. + flags : saqc.Flags + Container to store quality flags to data. + stat: Callable[[np.array, pd.Series], float] + Function to aggregate chunk contnent with. + wnsz: FreqString + Temporal extension of the chunks to test + thresh: PositiveFloat + Threshold, that triggers flagging, if exceeded by stat value. + sub_wnsz: FreqString, default None, + Window size of the sub chunks, that are additionally tested for exceeding + `sub_thresh` with respect to `stat`. + sub_thresh: PositiveFloat, default None + min_periods: PositiveInt, default None + + Returns + ------- + """ datcol = data[field] if not min_periods: min_periods = 0 wnsz = pd.Timedelta(wnsz) - sub_wnsz = pd.Timedelta(sub_wnsz) + if sub_wnsz: + sub_wnsz = pd.Timedelta(sub_wnsz) to_set = statPass(datcol, stat, wnsz, thresh, sub_wnsz, sub_thresh, min_periods, comparator='>') flags[to_set[to_set].index, field] = flag return data, flags \ No newline at end of file diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 8d37ec9d7..3b78b46c9 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -617,8 +617,6 @@ def statPass(datcol: pd.Series, exceeding_sub = oper(min_stat, sub_thresh) exceeds = exceeding_sub & exceeds - - exceeds = exceeding_sub & exceeds to_set = pd.Series(False, index=exceeds.index) for g in exceeds.groupby(by=exceeds.values): -- GitLab