From 627875c49320669cf2c8ebb394421df8bf0e77e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20L=C3=BCnenschlo=C3=9F?= <peter.luenenschloss@ufz.de>
Date: Tue, 16 Feb 2021 08:20:20 +0100
Subject: [PATCH] Retrieve trustworthy deprecation

---
 saqc/funcs/constants.py |  7 ++++--
 saqc/lib/tools.py       | 51 -----------------------------------------
 2 files changed, 5 insertions(+), 53 deletions(-)

diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py
index 03f4a6e2e..75b1eaaa1 100644
--- a/saqc/funcs/constants.py
+++ b/saqc/funcs/constants.py
@@ -11,7 +11,7 @@ from dios import DictOfSeries
 from saqc.core.register import register
 from saqc.flagger.baseflagger import BaseFlagger
 from saqc.lib.ts_operators import varQC
-from saqc.lib.tools import retrieveTrustworthyOriginal, customRoller
+from saqc.lib.tools import customRoller, getFreqDelta
 
 
 @register(masking='field')
@@ -112,8 +112,11 @@ def flagByVariance(
         Flags values may have changed, relatively to the flagger input.
     """
 
-    dataseries, data_rate = retrieveTrustworthyOriginal(data, field, flagger)
+    dataseries = data[field]
+    data_rate = getFreqDelta(dataseries.index)
 
+    if not data_rate:
+        raise IndexError('Timeseries irregularly sampled!')
     if max_missing is None:
         max_missing = np.inf
     if max_consec_missing is None:
diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py
index 769a322db..f72877940 100644
--- a/saqc/lib/tools.py
+++ b/saqc/lib/tools.py
@@ -116,57 +116,6 @@ def inferFrequency(data: pd.Series) -> pd.DateOffset:
     return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index))
 
 
-def retrieveTrustworthyOriginal(
-    data: dios.DictOfSeries, field: str, flagger=None, level: Any = None
-) -> dios.DictOfSeries:
-    """Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
-    differenciating between missng value - nans und fillvalue nans is impossible.
-
-    This function:
-    (1) if flagger is None:
-        (a) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
-            estimated samplng rate.
-
-    (2) if "flagger" is not None but "level" is None:
-        (a) all values are dropped, that are flagged worse then flagger.GOOD. (so unflagged values wont be dropped)
-        (b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
-            estimated samplng rate.
-    (3) if "flagger" is not None and "level" is not None:
-        (a) all values are dropped, that are flagged worse then level. (so unflagged values wont be dropped)
-        (b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
-            estimated samplng rate.
-
-    Note, that the passed dataseries should be harmonized to an equidistant
-        frequencie grid (maybe including blow up entries).
-
-    :param data:        DataFrame. The Data frame holding the data containing 'field'.
-    :param field:       String. Fieldname of the column in data, that you want to sample to original sampling rate.
-                        It has to have a harmonic
-    :param flagger:     None or a flagger object.
-    :param level:       Lower bound of flags that are excepted for data. Must be a flag the flagger can handle.
-
-    """
-    dataseries = data[field]
-
-    if flagger is not None:
-        mask = flagger.isFlagged(field, flag=level or flagger.GOOD, comparator="<=")
-        # drop all flags that are suspicious or worse
-        dataseries = dataseries[mask]
-
-    # drop the nan values that may result from any preceeding upsampling of the measurements:
-    dataseries = dataseries.dropna()
-
-    if dataseries.empty:
-        return dataseries, np.nan
-
-    # estimate original data sampling frequencie
-    # (the original series sampling rate may not match data-input sample rate):
-    seconds_rate = dataseries.index.to_series().diff().min().seconds
-    data_rate = pd.tseries.frequencies.to_offset(str(seconds_rate) + "s")
-
-    return dataseries.asfreq(data_rate), data_rate
-
-
 def offset2seconds(offset):
     """Function returns total seconds upon "offset like input
 
-- 
GitLab