From d92e09526a41a88b53837e18fc235d2c0378e44e Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Wed, 20 May 2020 13:39:50 +0200 Subject: [PATCH] moved core of shifting methods from different places in harm to a single func in ts_operators --- saqc/funcs/harm_functions.py | 36 +++-------------------------------- saqc/lib/ts_operators.py | 20 +++++++++++++++++++ test/funcs/test_harm_funcs.py | 5 ----- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/saqc/funcs/harm_functions.py b/saqc/funcs/harm_functions.py index bc7f2b945..32f2d628a 100644 --- a/saqc/funcs/harm_functions.py +++ b/saqc/funcs/harm_functions.py @@ -9,7 +9,7 @@ import dios from saqc.funcs.register import register from saqc.lib.tools import toSequence, getFuncFromInput -from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq +from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq, shift2Freq logger = logging.getLogger("SaQC") @@ -298,23 +298,9 @@ def _interpolateGrid( if method in aggregations: data = aggregate2Freq(data, method, agg_method, freq) - # Shifts elif method in shifts: - if method == "fshift": - direction = "ffill" - tolerance = pd.Timedelta(freq) - - elif method == "bshift": - direction = "bfill" - tolerance = pd.Timedelta(freq) - # if method = nshift - else: - direction = "nearest" - tolerance = pd.Timedelta(freq) / 2 - - ref_ind = _makeGrid(data.index[0], data.index[-1], freq, name=data.index.name) - data = data.reindex(ref_ind, method=direction, tolerance=tolerance) + data = shift2Freq(data, method, freq) # Interpolations: elif method in interpolations: @@ -418,23 +404,7 @@ def _reshapeFlags( if method in shifts: # forward/backward projection of every intervals last/first flag - rest will be dropped - if method == "fshift": - direction = "ffill" - tolerance = pd.Timedelta(freq) - - elif method == "bshift": - direction = "bfill" - tolerance = pd.Timedelta(freq) - # varset for nshift - else: - direction = "nearest" - tolerance = pd.Timedelta(freq) / 2 - - # if you want to keep previous comments - # only newly generated missing flags get commented: - - ref_ind = _makeGrid(fdata.index[0], fdata.index[-1], freq, name=fdata.index.name) - fdata = fdata.reindex(ref_ind, tolerance=tolerance, method=direction, fill_value=np.nan) + fdata = shift2Freq(fdata, method, freq) flags[field] = fdata flagger_new = flagger.initFlags(flags=flags) diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py index a4c21d6a4..b2ea1db9e 100644 --- a/saqc/lib/ts_operators.py +++ b/saqc/lib/ts_operators.py @@ -293,6 +293,26 @@ def aggregate2Freq(data, method, agg_func, freq, fill_value=np.nan, max_invalid_ return data +def shift2Freq(data, method, freq, fill_value=np.nan): + # Shifts + if method == "fshift": + direction = "ffill" + tolerance = pd.Timedelta(freq) + + elif method == "bshift": + direction = "bfill" + tolerance = pd.Timedelta(freq) + + else: + direction = "nearest" + tolerance = pd.Timedelta(freq) / 2 + + target_ind = pd.date_range(start=data.index[0].floor(freq), end=data.index[-1].ceil(freq), + freq=freq, + name=data.index.name) + return data.reindex(target_ind, method=direction, tolerance=tolerance, fill_value=fill_value) + + def linearInterpolation(data, inter_limit=2): return interpolateNANs(data, 'time', inter_limit=inter_limit) diff --git a/test/funcs/test_harm_funcs.py b/test/funcs/test_harm_funcs.py index 3d30cd507..c91559c86 100644 --- a/test/funcs/test_harm_funcs.py +++ b/test/funcs/test_harm_funcs.py @@ -202,8 +202,6 @@ def test_harmSingleVarInterpolations(data, flagger, interpolation, freq): data, flagger = harm_harmonize( data, "data", flagger, freq, interpolation, "fshift", reshape_shift_comment=False, inter_agg="sum", ) - #import pdb - #pdb.set_trace() if interpolation == "fshift": if freq == "15min": exp = pd.Series([np.nan, -37.5, -25.0, 0.0, 37.5, 50.0], index=test_index) @@ -284,9 +282,6 @@ def test_multivariatHarmonization(multi_data, flagger, shift_comment): for c in multi_data.columns: harm_start = multi_data[c].index[0].floor(freq=freq) harm_end = multi_data[c].index[-1].ceil(freq=freq) - test_index = pd.date_range(start=harm_start, end=harm_end, freq=freq) - - assert multi_data[c].index.equals(test_index) assert pd.Timedelta(pd.infer_freq(multi_data[c].index)) == pd.Timedelta(freq) multi_data, flagger = harm_deharmonize(multi_data, "data3", flagger, co_flagging=False) -- GitLab