From eb1a3afbb8add26f7cdcecc5613c570c537bdbfe Mon Sep 17 00:00:00 2001
From: Peter Luenenschloss <peter.luenenschloss@ufz.de>
Date: Mon, 12 Apr 2021 11:38:43 +0200
Subject: [PATCH] filterFuncs Added

---
 saqc/core/modules/__init__.py |  2 ++
 saqc/core/modules/noise.py    | 23 +++++++++++++++++
 saqc/funcs/__init__.py        |  1 +
 saqc/funcs/constants.py       |  2 +-
 saqc/funcs/noise.py           | 48 +++++++++++++++++++++++++++++++++++
 saqc/lib/types.py             |  4 +++
 6 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 saqc/core/modules/noise.py
 create mode 100644 saqc/funcs/noise.py

diff --git a/saqc/core/modules/__init__.py b/saqc/core/modules/__init__.py
index 00fd0bf43..178f57d7c 100644
--- a/saqc/core/modules/__init__.py
+++ b/saqc/core/modules/__init__.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 from saqc.core.modules.breaks import Breaks
+from saqc.core.modules.noise import Noise
 from saqc.core.modules.changepoints import ChangePoints
 from saqc.core.modules.constants import Constants
 from saqc.core.modules.curvefit import Curvefit
@@ -30,6 +31,7 @@ class FuncModules:
         self.flagtools = FlagTools(obj)
         self.generic = Generic(obj)
         self.interpolation = Interpolation(obj)
+        self.noise = Noise(obj)
         self.outliers = Outliers(obj)
         self.pattern = Pattern(obj)
         self.resampling = Resampling(obj)
diff --git a/saqc/core/modules/noise.py b/saqc/core/modules/noise.py
new file mode 100644
index 000000000..527f2562c
--- /dev/null
+++ b/saqc/core/modules/noise.py
@@ -0,0 +1,23 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import numpy as np
+
+from saqc.constants import BAD
+from saqc.core.modules.base import ModuleBase
+from saqc.lib.types import FreqString, IntegerWindow, ColumnName
+from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat
+
+
+class Noise(ModuleBase):
+    def flagByVarianceLowPass(self,
+                              field: ColumnName,
+                              wnsz: FreqString,
+                              thresh: PositiveFloat,
+                              sub_wnsz: FreqString = None,
+                              sub_thresh: PositiveFloat = None,
+                              min_periods: PositiveInt = None,
+                              flag: float = BAD
+    ) -> SaQC:
+        return self.defer("flagByVarianceLowPass", locals())
diff --git a/saqc/funcs/__init__.py b/saqc/funcs/__init__.py
index 407cf5adc..d4fabccfc 100644
--- a/saqc/funcs/__init__.py
+++ b/saqc/funcs/__init__.py
@@ -19,3 +19,4 @@ from saqc.funcs.scores import *
 from saqc.funcs.tools import *
 from saqc.funcs.transformation import *
 from saqc.funcs.flagtools import *
+from saqc.funcs.noise import *
diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py
index 6f8d29828..1d2615dbf 100644
--- a/saqc/funcs/constants.py
+++ b/saqc/funcs/constants.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+    #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
 from operator import mod
diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py
new file mode 100644
index 000000000..57f4745a4
--- /dev/null
+++ b/saqc/funcs/noise.py
@@ -0,0 +1,48 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+import pandas as pd
+from dios import DictOfSeries
+
+from saqc.constants import *
+from saqc.core import register, Flags
+from saqc.lib.types import ColumnName, FreqString, PositiveInt, PositiveFloat
+
+
+@register(masking='field', module="noise")
+def flagByVarianceLowPass(data: DictOfSeries,
+                          field: ColumnName,
+                          flags: Flags,
+                          wnsz: FreqString,
+                          thresh: PositiveFloat,
+                          sub_wnsz: FreqString = None,
+                          sub_thresh: PositiveFloat = None,
+                          min_periods: PositiveInt = None,
+                          flag: float = BAD,
+                          **kwargs):
+
+    datcol = data[field]
+    if not min_periods:
+        min_periods = 0
+    if not sub_thresh:
+        sub_thresh = thresh
+    if not sub_wnsz:
+        sub_wnsz = wnsz
+
+    wnsz = pd.Timedelta(wnsz)
+    sub_wnsz = pd.Timedelta(sub_wnsz)
+
+    stat_parent = datcol.rolling(wnsz, min_periods=min_periods).std()
+    exceeding_parent = stat_parent > thresh
+    stat_sub = datcol.rolling(sub_wnsz).std()
+
+    min_stat = stat_sub.rolling(wnsz - sub_wnsz, closed='both').min()
+    exceeding_sub = min_stat > sub_thresh
+    exceeds = exceeding_sub & exceeding_parent
+    to_set = pd.Series(False, index=exceeds.index)
+
+    for g in exceeds.groupby(by=exceeds.values):
+        if g[0]:
+            to_set[g[1].index[0] - wnsz:g[1].index[-1]] = True
+
+    flags[exceeds[exceeds].index, field] = flag
+    return data, flags
\ No newline at end of file
diff --git a/saqc/lib/types.py b/saqc/lib/types.py
index f169a5e87..0b946fdd2 100644
--- a/saqc/lib/types.py
+++ b/saqc/lib/types.py
@@ -11,6 +11,8 @@ __all__ = [
     'IntegerWindow',
     'TimestampColumnName',
     'CurveFitter',
+    'PositiveFloat',
+    'PositiveInt'
 ]
 
 from typing import TypeVar, Union, NewType
@@ -37,6 +39,8 @@ FreqString = NewType("FreqString", Literal["D", "H", "T", "min", "S", "L", "ms",
 ColumnName = NewType("ColumnName", str)
 IntegerWindow = NewType("IntegerWindow", int)
 TimestampColumnName = TypeVar("TimestampColumnName", bound=str)
+PositiveFloat = NewType("PositiveFloat", float)
+PositiveInt = NewType("PositiveInt", int)
 
 
 # needed for deeper typy hinting magic
-- 
GitLab