Skip to content
Snippets Groups Projects
Commit 39102bb3 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

further streamlining/found faster back prop trick/added support for offset defined gap limits

parent dd787ea0
No related branches found
No related tags found
1 merge request!600Inter limit fix
...@@ -144,7 +144,7 @@ class InterpolationMixin: ...@@ -144,7 +144,7 @@ class InterpolationMixin:
method: _SUPPORTED_METHODS, method: _SUPPORTED_METHODS,
order: int = 2, order: int = 2,
limit: int | None = None, limit: int | None = None,
downgrade: bool = False, extrapolate: Literal['forward', 'backward', 'both'] = None,
flag: float = UNFLAGGED, flag: float = UNFLAGGED,
**kwargs, **kwargs,
) -> "SaQC": ) -> "SaQC":
...@@ -187,6 +187,7 @@ class InterpolationMixin: ...@@ -187,6 +187,7 @@ class InterpolationMixin:
method, method,
order=order, order=order,
gap_limit=limit, gap_limit=limit,
extrapolate=extrapolate
) )
interpolated = self._data[field].isna() & inter_data.notna() interpolated = self._data[field].isna() & inter_data.notna()
......
...@@ -21,7 +21,6 @@ import pandas as pd ...@@ -21,7 +21,6 @@ import pandas as pd
from scipy.signal import butter, filtfilt from scipy.signal import butter, filtfilt
from scipy.stats import iqr, median_abs_deviation from scipy.stats import iqr, median_abs_deviation
from sklearn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors
from saqc.lib.tools import getFreqDelta from saqc.lib.tools import getFreqDelta
...@@ -317,30 +316,29 @@ def interpolateNANs( ...@@ -317,30 +316,29 @@ def interpolateNANs(
:return: :return:
""" """
# helper variable for checking numerical value of gap limit, if its a numeric value (to avoid comparison to str)
gap_check = np.nan if isinstance(gap_limit, str) else gap_limit
data = pd.Series(data, copy=True) data = pd.Series(data, copy=True)
limit_area = "inside" if not extrapolate else "outside" limit_area = "inside" if not extrapolate else "outside"
if gap_limit is None: if gap_check is None:
# if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps # if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps
gap_mask = pd.Series(True, index=data.index, name=data.name) gap_mask = pd.Series(True, index=data.index, name=data.name)
elif gap_limit < 2:
return data
else: else:
# if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right side if gap_check < 2:
# of each too-large gap with a rolling.sum combo # breaks execution down the line and is thus catched here since it basically means "do nothing"
gap_mask = data.isna().rolling(gap_limit, min_periods=0).sum() != gap_limit return data
if gap_limit == 2:
# for the common case of gap_limit=2 (default "harmonisation"), we efficiently back propagate the False
# value to fill the whole too-large gap by a shift and a conjunction.
gap_mask &= gap_mask & gap_mask.shift(-1, fill_value=True)
else: else:
# If the gap_size is bigger we use pandas backfill-interpolation to propagate the False values back. # if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right
# Therefor we replace the True values with np.nan so hat they are interpreted as missing periods. # side of each too-large gap with a rolling.sum combo
gap_mask = ( gap_mask = data.rolling(gap_limit, min_periods=0).count() > 0
gap_mask.replace(True, np.nan) if gap_limit == 2:
.fillna(method="bfill", limit=gap_limit - 1) # for the common case of gap_limit=2 (default "harmonisation"), we efficiently back propagate the False
.replace(np.nan, True) # value to fill the whole too-large gap by a shift and a conjunction.
.astype(bool) gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
) else:
# If the gap_size is bigger we make an flip-rolling combo to backpropagate the False values
gap_mask = ~((~gap_mask[::-1]).rolling(gap_limit, min_periods=0).sum() > 0)[::-1]
# memorizing the index for later reindexing # memorizing the index for later reindexing
pre_index = data.index pre_index = data.index
...@@ -361,21 +359,15 @@ def interpolateNANs( ...@@ -361,21 +359,15 @@ def interpolateNANs(
# with the .transform method of the grouper. # with the .transform method of the grouper.
gap_mask = (~gap_mask).cumsum()[data.index] gap_mask = (~gap_mask).cumsum()[data.index]
chunk_groups = data.groupby(by=gap_mask) chunk_groups = data.groupby(by=gap_mask)
if extrapolate: data = chunk_groups.transform(
if extrapolate in ['both', 'backward']: _interpolWrapper,
lead_idx = gap_mask[gap_mask==gap_mask.min()].index **{
data[lead_idx] = _interpolWrapper(data[lead_idx], order=order, method=method, limit_area=limit_area, limit_direction='backward') "order": order,
if extrapolate in ['both', 'forward']: "method": method,
trail_idx = gap_mask[gap_mask==gap_mask.max()].index "limit_area": limit_area,
data[trail_idx] = _interpolWrapper(data[lead_idx], order=order, method=method, limit_area=limit_area, limit_direction='forward') "limit_direction": extrapolate
else: },
data = chunk_groups.groupby(by=gap_mask).transform( )
_interpolWrapper,
**{
"order": order,
"method": method,
},
)
# finally reinsert the dropped data gaps # finally reinsert the dropped data gaps
data = data.reindex(pre_index) data = data.reindex(pre_index)
return data return data
......
...@@ -229,7 +229,4 @@ def test_rateOfChange(data, expected): ...@@ -229,7 +229,4 @@ def test_rateOfChange(data, expected):
) )
def test_interpolatNANs(limit, data, expected): def test_interpolatNANs(limit, data, expected):
got = interpolateNANs(pd.Series(data), gap_limit=limit, method="linear") got = interpolateNANs(pd.Series(data), gap_limit=limit, method="linear")
try: assert got.equals(pd.Series(expected, dtype=float))
assert got.equals(pd.Series(expected, dtype=float))
except:
print("stop")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment