From 9ce61c19349236dfc7500a8887e39a77c97261e3 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Tue, 17 Jan 2023 20:39:45 +0100 Subject: [PATCH] fixed ini value bug for backward interpolation --- saqc/funcs/interpolation.py | 39 +++++++++++++++++++--------------- saqc/lib/ts_operators.py | 14 ++++++++---- tests/lib/test_ts_operators.py | 34 +++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 25 deletions(-) diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index 3c2a422eb..97a4c2a6b 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -149,7 +149,7 @@ class InterpolationMixin: **kwargs, ) -> "SaQC": """ - Function to interpolate nan values in the data. + Function to interpolate nan values in data. There are available all the interpolation methods from the pandas.interpolate method and they are applicable by the very same key words, that you would pass to the ``pd.Series.interpolate``'s method parameter. @@ -167,8 +167,11 @@ class InterpolationMixin: If there your selected interpolation method can be performed at different 'orders' - here you pass the desired order. - limit : int, optional - Maximum number of consecutive `nan` values to fill. Must be greater than 0. + limit : int or str, default None + Upper limit of missing index values (with respect to `freq`) to fill. The limit can either be expressed + as the number of consecutive missing values (integer) or temporal extension of the gaps to be filled + (Offset String). + If `None` is passed, no Limit is set. flag : float or None, default UNFLAGGED Flag that is set for interpolated values. If ``None``, no flags are set at all. @@ -209,15 +212,12 @@ class InterpolationMixin: freq: str, method: _SUPPORTED_METHODS, order: int = 2, - limit: int | None = None, - downgrade: bool = False, + limit: int | None = 2, + extrapolate: Literal['forward', 'backward', 'both'] = None, **kwargs, ) -> "SaQC": """ - Function to interpolate the data at regular (equidistant) timestamps (or Grid points). - - Note, that the interpolation will only be calculated, for grid timestamps that have a preceding AND a succeeding - valid data value within "freq" range. + Function to interpolate the data at regular (äquidistant) timestamps (or Grid points). Parameters ---------- @@ -233,17 +233,22 @@ class InterpolationMixin: The interpolation method you want to apply. order : int, default 2 - If there your selected interpolation method can be performed at different 'orders' - here you pass the desired + If your selected interpolation method can be performed at different 'orders' - here you pass the desired order. limit : int, optional - Maximum number of missing index values (with respect to `freq`) to fill. Must be greater than 0. + Upper limit of missing index values (with respect to `freq`) to fill. The limit can either be expressed + as the number of consecutive missing values (integer) or temporal extension of the gaps to be filled + (Offset String). + If `None` is passed, no Limit is set. - downgrade : bool, default False - If `True` and the interpolation can not be performed at current order, retry with a lower order. - This can happen, because the chosen ``method`` does not support the passed ``order``, or - simply because not enough values are present in a interval. + extraplate : {'forward', 'backward', 'both'}, default None + Use parameter to perform extrapolation instead of interpolation onto the trailing and/or leading chunks of + NaN values in data series. + * 'None' (default) - perform interpolation + * 'forward'/'backward' - perform forward/backward extrapolation + * 'both' - perform forward and backward extrapolation Returns ------- @@ -282,7 +287,7 @@ class InterpolationMixin: method=method, order=order, gap_limit=limit, - downgrade_interpolation=downgrade, + extrapolate=extrapolate, ) # override falsely interpolated values: @@ -305,7 +310,7 @@ class InterpolationMixin: "method": method, "order": order, "limit": limit, - "downgrade": downgrade, + "extrapolate": extrapolate, **kwargs, }, } diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py index 6573fd883..3615ce644 100644 --- a/saqc/lib/ts_operators.py +++ b/saqc/lib/ts_operators.py @@ -320,12 +320,13 @@ def interpolateNANs(data, method, order=2, gap_limit=2, extrapolate=None): :param method: String. Method keyword designating interpolation method to use. :param order: Integer. If your desired interpolation method needs an order to be passed - here you pass it. - :param gap_limit: Integer. Default = 2. Number up to which consecutive nan - values in the data get - replaced by interpolation. + :param gap_limit: Integer or Offset String. Default = 2. + Number up to which consecutive nan - values in the data get + replaced by interpolated values. Its default value suits an interpolation that only will apply to points of an inserted frequency grid. (regularization by interpolation) Gaps of size "limit" or greater will NOT be interpolated at all. - :param downgrade_interpolation: Boolean. Default False. If True: + :param extrapolate: Str or None. Default None. If True: If a data chunk not contains enough values for interpolation of the order "order", the highest order possible will be selected for that chunks interpolation. @@ -347,12 +348,17 @@ def interpolateNANs(data, method, order=2, gap_limit=2, extrapolate=None): # if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right # side of each too-large gap with a rolling.sum combo gap_mask = data.rolling(gap_limit, min_periods=0).count() > 0 + + # correction for initial gap + if isinstance(gap_limit, int): + gap_mask.iloc[:gap_limit]=True + if gap_limit == 2: # for the common case of gap_limit=2 (default "harmonisation"), we efficiently back propagate the False # value to fill the whole too-large gap by a shift and a conjunction. gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True) else: - # If the gap_size is bigger we make an flip-rolling combo to backpropagate the False values + # If the gap_size is bigger we make a flip-rolling combo to backpropagate the False values gap_mask = ~( (~gap_mask[::-1]).rolling(gap_limit, min_periods=0).sum() > 0 )[::-1] diff --git a/tests/lib/test_ts_operators.py b/tests/lib/test_ts_operators.py index 0ef417dbb..63711f468 100644 --- a/tests/lib/test_ts_operators.py +++ b/tests/lib/test_ts_operators.py @@ -198,35 +198,61 @@ def test_rateOfChange(data, expected): @pytest.mark.parametrize( - "limit,data,expected", + "limit,extrapolate,data,expected", [ ( 1, + None, + [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + ), + ( + 2, + 'backward', [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + [0, 0, np.nan, np.nan, np.nan, 4, np.nan], ), ( 2, + None, + [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + ), + ( + 3, + None, [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], ), ( 3, + 'forward', [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + [np.nan, 0, np.nan, np.nan, np.nan, 4, 4], + ), + ( + 4, + None, [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], + [np.nan, 0, 1, 2, 3, 4, np.nan], ), ( 4, + 'both', [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], [np.nan, 0, 1, 2, 3, 4, np.nan], ), ( + None, None, [np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan], [np.nan, 0, 1, 2, 3, 4, np.nan], ), ], ) -def test_interpolatNANs(limit, data, expected): - got = interpolateNANs(pd.Series(data), gap_limit=limit, method="linear") - assert got.equals(pd.Series(expected, dtype=float)) +def test_interpolatNANs(limit, extrapolate, data, expected): + got = interpolateNANs(pd.Series(data), gap_limit=limit, method="linear", extrapolate=extrapolate) + try: + assert got.equals(pd.Series(expected, dtype=float)) + except AssertionError: + print('stop') \ No newline at end of file -- GitLab