diff --git a/saqc/core/modules/__init__.py b/saqc/core/modules/__init__.py index 00fd0bf43d942c51a104a4a1c1c63e9f619bbb51..178f57d7c3e5e1427df401024f15dd758898f18a 100644 --- a/saqc/core/modules/__init__.py +++ b/saqc/core/modules/__init__.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from saqc.core.modules.breaks import Breaks +from saqc.core.modules.noise import Noise from saqc.core.modules.changepoints import ChangePoints from saqc.core.modules.constants import Constants from saqc.core.modules.curvefit import Curvefit @@ -30,6 +31,7 @@ class FuncModules: self.flagtools = FlagTools(obj) self.generic = Generic(obj) self.interpolation = Interpolation(obj) + self.noise = Noise(obj) self.outliers = Outliers(obj) self.pattern = Pattern(obj) self.resampling = Resampling(obj) diff --git a/saqc/core/modules/noise.py b/saqc/core/modules/noise.py new file mode 100644 index 0000000000000000000000000000000000000000..527f2562c5bb04517d2ac3fb7e3262ad7f44cd8d --- /dev/null +++ b/saqc/core/modules/noise.py @@ -0,0 +1,23 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import annotations + +import numpy as np + +from saqc.constants import BAD +from saqc.core.modules.base import ModuleBase +from saqc.lib.types import FreqString, IntegerWindow, ColumnName +from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat + + +class Noise(ModuleBase): + def flagByVarianceLowPass(self, + field: ColumnName, + wnsz: FreqString, + thresh: PositiveFloat, + sub_wnsz: FreqString = None, + sub_thresh: PositiveFloat = None, + min_periods: PositiveInt = None, + flag: float = BAD + ) -> SaQC: + return self.defer("flagByVarianceLowPass", locals()) diff --git a/saqc/funcs/__init__.py b/saqc/funcs/__init__.py index 407cf5adc1c1185efede09c174178c98d026f2e0..d4fabccfc98765f48221838d54f21f9db73f5301 100644 --- a/saqc/funcs/__init__.py +++ b/saqc/funcs/__init__.py @@ -19,3 +19,4 @@ from saqc.funcs.scores import * from saqc.funcs.tools import * from saqc.funcs.transformation import * from saqc.funcs.flagtools import * +from saqc.funcs.noise import * diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index 6f8d29828a505ecc2fb33d57e8a8654f54be88e3..1d2615dbf07c7b92ee76f60d9e97ef3aebe0f2d1 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python + #! /usr/bin/env python # -*- coding: utf-8 -*- from operator import mod diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py new file mode 100644 index 0000000000000000000000000000000000000000..57f4745a4fb9515ca9eab195b3e9cddfdd4fc2d4 --- /dev/null +++ b/saqc/funcs/noise.py @@ -0,0 +1,48 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +import pandas as pd +from dios import DictOfSeries + +from saqc.constants import * +from saqc.core import register, Flags +from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat + + +@register(masking='field', module="noise") +def flagByVarianceLowPass(data: DictOfSeries, + field: ColumnName, + flags: Flags, + wnsz: FreqString, + thresh: PositiveFloat, + sub_wnsz: FreqString = None, + sub_thresh: PositiveFloat = None, + min_periods: PositiveInt = None, + flag: float = BAD, + **kwargs): + + datcol = data[field] + if not min_periods: + min_periods = 0 + if not sub_thresh: + sub_thresh = thresh + if not sub_wnsz: + sub_wnsz = wnsz + + wnsz = pd.Timedelta(wnsz) + sub_wnsz = pd.Timedelta(sub_wnsz) + + stat_parent = datcol.rolling(wnsz, min_periods=min_periods).std() + exceeding_parent = stat_parent > thresh + stat_sub = datcol.rolling(sub_wnsz).std() + + min_stat = stat_sub.rolling(wnsz - sub_wnsz, closed='both').min() + exceeding_sub = min_stat > sub_thresh + exceeds = exceeding_sub & exceeding_parent + to_set = pd.Series(False, index=exceeds.index) + + for g in exceeds.groupby(by=exceeds.values): + if g[0]: + to_set[g[1].index[0] - wnsz:g[1].index[-1]] = True + + flags[exceeds[exceeds].index, field] = flag + return data, flags \ No newline at end of file diff --git a/saqc/lib/types.py b/saqc/lib/types.py index f169a5e87f708825dc0e3bd217c98666bb140b8c..0b946fdd229557d010d9ca59644d66e21b6fad0d 100644 --- a/saqc/lib/types.py +++ b/saqc/lib/types.py @@ -11,6 +11,8 @@ __all__ = [ 'IntegerWindow', 'TimestampColumnName', 'CurveFitter', + 'PositiveFloat', + 'PositiveInt' ] from typing import TypeVar, Union, NewType @@ -37,6 +39,8 @@ FreqString = NewType("FreqString", Literal["D", "H", "T", "min", "S", "L", "ms", ColumnName = NewType("ColumnName", str) IntegerWindow = NewType("IntegerWindow", int) TimestampColumnName = TypeVar("TimestampColumnName", bound=str) +PositiveFloat = NewType("PositiveFloat", float) +PositiveInt = NewType("PositiveInt", int) # needed for deeper typy hinting magic