From d92e09526a41a88b53837e18fc235d2c0378e44e Mon Sep 17 00:00:00 2001
From: Peter Luenenschloss <peter.luenenschloss@ufz.de>
Date: Wed, 20 May 2020 13:39:50 +0200
Subject: [PATCH] moved core of shifting methods from different places in harm
 to a single func in ts_operators

---
 saqc/funcs/harm_functions.py  | 36 +++--------------------------------
 saqc/lib/ts_operators.py      | 20 +++++++++++++++++++
 test/funcs/test_harm_funcs.py |  5 -----
 3 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/saqc/funcs/harm_functions.py b/saqc/funcs/harm_functions.py
index bc7f2b945..32f2d628a 100644
--- a/saqc/funcs/harm_functions.py
+++ b/saqc/funcs/harm_functions.py
@@ -9,7 +9,7 @@ import dios
 
 from saqc.funcs.register import register
 from saqc.lib.tools import toSequence, getFuncFromInput
-from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq
+from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq, shift2Freq
 
 
 logger = logging.getLogger("SaQC")
@@ -298,23 +298,9 @@ def _interpolateGrid(
     if method in aggregations:
         data = aggregate2Freq(data, method, agg_method, freq)
 
-
     # Shifts
     elif method in shifts:
-        if method == "fshift":
-            direction = "ffill"
-            tolerance = pd.Timedelta(freq)
-
-        elif method == "bshift":
-            direction = "bfill"
-            tolerance = pd.Timedelta(freq)
-        # if method = nshift
-        else:
-            direction = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        ref_ind = _makeGrid(data.index[0], data.index[-1], freq, name=data.index.name)
-        data = data.reindex(ref_ind, method=direction, tolerance=tolerance)
+        data = shift2Freq(data, method, freq)
 
     # Interpolations:
     elif method in interpolations:
@@ -418,23 +404,7 @@ def _reshapeFlags(
 
     if method in shifts:
         # forward/backward projection of every intervals last/first flag - rest will be dropped
-        if method == "fshift":
-            direction = "ffill"
-            tolerance = pd.Timedelta(freq)
-
-        elif method == "bshift":
-            direction = "bfill"
-            tolerance = pd.Timedelta(freq)
-        # varset for nshift
-        else:
-            direction = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        # if you want to keep previous comments
-        # only newly generated missing flags get commented:
-
-        ref_ind = _makeGrid(fdata.index[0], fdata.index[-1], freq, name=fdata.index.name)
-        fdata = fdata.reindex(ref_ind, tolerance=tolerance, method=direction, fill_value=np.nan)
+        fdata = shift2Freq(fdata, method, freq)
 
         flags[field] = fdata
         flagger_new = flagger.initFlags(flags=flags)
diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py
index a4c21d6a4..b2ea1db9e 100644
--- a/saqc/lib/ts_operators.py
+++ b/saqc/lib/ts_operators.py
@@ -293,6 +293,26 @@ def aggregate2Freq(data, method, agg_func, freq, fill_value=np.nan, max_invalid_
     return data
 
 
+def shift2Freq(data, method, freq, fill_value=np.nan):
+    # Shifts
+    if method == "fshift":
+        direction = "ffill"
+        tolerance = pd.Timedelta(freq)
+
+    elif method == "bshift":
+        direction = "bfill"
+        tolerance = pd.Timedelta(freq)
+
+    else:
+        direction = "nearest"
+        tolerance = pd.Timedelta(freq) / 2
+
+    target_ind = pd.date_range(start=data.index[0].floor(freq), end=data.index[-1].ceil(freq),
+                               freq=freq,
+                               name=data.index.name)
+    return data.reindex(target_ind, method=direction, tolerance=tolerance, fill_value=fill_value)
+
+
 def linearInterpolation(data, inter_limit=2):
     return interpolateNANs(data, 'time', inter_limit=inter_limit)
 
diff --git a/test/funcs/test_harm_funcs.py b/test/funcs/test_harm_funcs.py
index 3d30cd507..c91559c86 100644
--- a/test/funcs/test_harm_funcs.py
+++ b/test/funcs/test_harm_funcs.py
@@ -202,8 +202,6 @@ def test_harmSingleVarInterpolations(data, flagger, interpolation, freq):
     data, flagger = harm_harmonize(
         data, "data", flagger, freq, interpolation, "fshift", reshape_shift_comment=False, inter_agg="sum",
     )
-    #import pdb
-    #pdb.set_trace()
     if interpolation == "fshift":
         if freq == "15min":
             exp = pd.Series([np.nan, -37.5, -25.0, 0.0, 37.5, 50.0], index=test_index)
@@ -284,9 +282,6 @@ def test_multivariatHarmonization(multi_data, flagger, shift_comment):
     for c in multi_data.columns:
         harm_start = multi_data[c].index[0].floor(freq=freq)
         harm_end = multi_data[c].index[-1].ceil(freq=freq)
-        test_index = pd.date_range(start=harm_start, end=harm_end, freq=freq)
-
-        assert multi_data[c].index.equals(test_index)
         assert pd.Timedelta(pd.infer_freq(multi_data[c].index)) == pd.Timedelta(freq)
 
     multi_data, flagger = harm_deharmonize(multi_data, "data3", flagger, co_flagging=False)
-- 
GitLab