From eb1a3afbb8add26f7cdcecc5613c570c537bdbfe Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Mon, 12 Apr 2021 11:38:43 +0200 Subject: [PATCH] filterFuncs Added --- saqc/core/modules/__init__.py | 2 ++ saqc/core/modules/noise.py | 23 +++++++++++++++++ saqc/funcs/__init__.py | 1 + saqc/funcs/constants.py | 2 +- saqc/funcs/noise.py | 48 +++++++++++++++++++++++++++++++++++ saqc/lib/types.py | 4 +++ 6 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 saqc/core/modules/noise.py create mode 100644 saqc/funcs/noise.py diff --git a/saqc/core/modules/__init__.py b/saqc/core/modules/__init__.py index 00fd0bf43..178f57d7c 100644 --- a/saqc/core/modules/__init__.py +++ b/saqc/core/modules/__init__.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from saqc.core.modules.breaks import Breaks +from saqc.core.modules.noise import Noise from saqc.core.modules.changepoints import ChangePoints from saqc.core.modules.constants import Constants from saqc.core.modules.curvefit import Curvefit @@ -30,6 +31,7 @@ class FuncModules: self.flagtools = FlagTools(obj) self.generic = Generic(obj) self.interpolation = Interpolation(obj) + self.noise = Noise(obj) self.outliers = Outliers(obj) self.pattern = Pattern(obj) self.resampling = Resampling(obj) diff --git a/saqc/core/modules/noise.py b/saqc/core/modules/noise.py new file mode 100644 index 000000000..527f2562c --- /dev/null +++ b/saqc/core/modules/noise.py @@ -0,0 +1,23 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import annotations + +import numpy as np + +from saqc.constants import BAD +from saqc.core.modules.base import ModuleBase +from saqc.lib.types import FreqString, IntegerWindow, ColumnName +from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat + + +class Noise(ModuleBase): + def flagByVarianceLowPass(self, + field: ColumnName, + wnsz: FreqString, + thresh: PositiveFloat, + sub_wnsz: FreqString = None, + sub_thresh: PositiveFloat = None, + min_periods: PositiveInt = None, + flag: float = BAD + ) -> SaQC: + return self.defer("flagByVarianceLowPass", locals()) diff --git a/saqc/funcs/__init__.py b/saqc/funcs/__init__.py index 407cf5adc..d4fabccfc 100644 --- a/saqc/funcs/__init__.py +++ b/saqc/funcs/__init__.py @@ -19,3 +19,4 @@ from saqc.funcs.scores import * from saqc.funcs.tools import * from saqc.funcs.transformation import * from saqc.funcs.flagtools import * +from saqc.funcs.noise import * diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index 6f8d29828..1d2615dbf 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python + #! /usr/bin/env python # -*- coding: utf-8 -*- from operator import mod diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py new file mode 100644 index 000000000..57f4745a4 --- /dev/null +++ b/saqc/funcs/noise.py @@ -0,0 +1,48 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +import pandas as pd +from dios import DictOfSeries + +from saqc.constants import * +from saqc.core import register, Flags +from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat + + +@register(masking='field', module="noise") +def flagByVarianceLowPass(data: DictOfSeries, + field: ColumnName, + flags: Flags, + wnsz: FreqString, + thresh: PositiveFloat, + sub_wnsz: FreqString = None, + sub_thresh: PositiveFloat = None, + min_periods: PositiveInt = None, + flag: float = BAD, + **kwargs): + + datcol = data[field] + if not min_periods: + min_periods = 0 + if not sub_thresh: + sub_thresh = thresh + if not sub_wnsz: + sub_wnsz = wnsz + + wnsz = pd.Timedelta(wnsz) + sub_wnsz = pd.Timedelta(sub_wnsz) + + stat_parent = datcol.rolling(wnsz, min_periods=min_periods).std() + exceeding_parent = stat_parent > thresh + stat_sub = datcol.rolling(sub_wnsz).std() + + min_stat = stat_sub.rolling(wnsz - sub_wnsz, closed='both').min() + exceeding_sub = min_stat > sub_thresh + exceeds = exceeding_sub & exceeding_parent + to_set = pd.Series(False, index=exceeds.index) + + for g in exceeds.groupby(by=exceeds.values): + if g[0]: + to_set[g[1].index[0] - wnsz:g[1].index[-1]] = True + + flags[exceeds[exceeds].index, field] = flag + return data, flags \ No newline at end of file diff --git a/saqc/lib/types.py b/saqc/lib/types.py index f169a5e87..0b946fdd2 100644 --- a/saqc/lib/types.py +++ b/saqc/lib/types.py @@ -11,6 +11,8 @@ __all__ = [ 'IntegerWindow', 'TimestampColumnName', 'CurveFitter', + 'PositiveFloat', + 'PositiveInt' ] from typing import TypeVar, Union, NewType @@ -37,6 +39,8 @@ FreqString = NewType("FreqString", Literal["D", "H", "T", "min", "S", "L", "ms", ColumnName = NewType("ColumnName", str) IntegerWindow = NewType("IntegerWindow", int) TimestampColumnName = TypeVar("TimestampColumnName", bound=str) +PositiveFloat = NewType("PositiveFloat", float) +PositiveInt = NewType("PositiveInt", int) # needed for deeper typy hinting magic -- GitLab