Skip to content
Snippets Groups Projects

Inter limit fix

Merged Peter Lünenschloß requested to merge interLimitFix into develop
All threads resolved!
Files
3
+ 60
48
@@ -276,6 +276,30 @@ def meanQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
)
def _interpolWrapper(x, order=2, method='time', downgrade_interpolation=False):
"""
Function that automatically modifies the interpolation level or returns uninterpolated
input data if the data configuration breaks the interpolation method at the selected degree.
"""
if order < 0:
return x
elif x.count() > order:
try:
return x.interpolate(method=method, order=int(order))
except (NotImplementedError, ValueError):
warnings.warn(
f"Interpolation with method {method} is not supported at order "
f"{order}. and will be performed at order {order - 1}"
)
return _interpolWrapper(x, int(order - 1), method)
elif x.size < 3:
return x
else:
if downgrade_interpolation:
return _interpolWrapper(x, int(x.count() - 1), method)
else:
return x
def interpolateNANs(
data, method, order=2, inter_limit=2, downgrade_interpolation=False
):
@@ -301,66 +325,54 @@ def interpolateNANs(
:return:
"""
inter_limit = int(inter_limit or len(data) + 1)
data = pd.Series(data, copy=True)
gap_mask = data.isna().rolling(inter_limit, min_periods=0).sum() != inter_limit
if inter_limit == 2:
gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
if inter_limit is None:
# if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps
gap_mask = pd.Series(True, index=data.index, name=data.name)
else:
gap_mask = (
gap_mask.replace(True, np.nan)
.fillna(method="bfill", limit=inter_limit)
.replace(np.nan, True)
.astype(bool)
)
# if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right side
# of each too-large gap with a rolling.sum combo
gap_mask = data.isna().rolling(inter_limit, min_periods=0).sum() != inter_limit
if inter_limit < 20:
# for the common case of inter_limit=2 (default "harmonisation"), we efficiently bag propagate the False
# value to fill the whole too-large gap by a shift and a conjunction.
gap_mask &= gap_mask & gap_mask.shift(-1, fill_value=True)
else:
# If the gap_size is bigger we use pandas backfill-interpolation to propagate the False values back.
# Therefor we replace the True values with np.nan so hat they are interpreted as missing periods.
gap_mask = (
gap_mask.replace(True, np.nan)
.fillna(method="bfill", limit=inter_limit - 1)
.replace(np.nan, True)
.astype(bool)
)
# memorizing the index for later reindexing
pre_index = data.index
if data[gap_mask].empty:
# drop the gaps that are too large with regard to the inter_limit from the data-to-be interpolated
data = data[gap_mask]
if data.empty:
return data
else:
data = data[gap_mask]
if method in ["linear", "time"]:
# in the case of linear interpolation, not much can go wrong/break so this conditional branch has efficient
# finish by just calling pandas interpolation routine to fill the gaps remaining in the data:
data.interpolate(
method=method, inplace=True, limit=inter_limit - 1, limit_area="inside"
method=method, inplace=True, limit_area="inside"
)
else:
dat_name = data.name
gap_mask = (~gap_mask).cumsum()
data = pd.merge(gap_mask, data, how="inner", left_index=True, right_index=True)
def _interpolWrapper(x, wrap_order=order, wrap_method=method):
if wrap_order < 0:
return x
elif x.count() > wrap_order:
try:
return x.interpolate(method=wrap_method, order=int(wrap_order))
except (NotImplementedError, ValueError):
warnings.warn(
f"Interpolation with method {method} is not supported at order "
f"{wrap_order}. and will be performed at order {wrap_order - 1}"
)
return _interpolWrapper(x, int(wrap_order - 1), wrap_method)
elif x.size < 3:
return x
else:
if downgrade_interpolation:
return _interpolWrapper(x, int(x.count() - 1), wrap_method)
else:
return x
data = data.groupby(data.columns[0]).transform(_interpolWrapper)
# squeezing the 1-dimensional frame resulting from groupby for consistency
# reasons
data = data.squeeze(axis=1)
data.name = dat_name
# if the method that is inerpolated with depends on not only the left and right border points of any gap,
# but includes more points, it has to be applied on any data chunk seperated by the too-big gaps individually.
# So we use the gap_mask to group the data into chunks and perform the interpolation on every chunk seperatly
# with the .transform method of the grouper.
gap_mask = (~gap_mask).cumsum()[data.index]
data = data.groupby(by=gap_mask).transform(_interpolWrapper, **{'order':order,
'method':method,
'downgrade_inerpolation':downgrade_interpolation})
# finally reinsert the dropped data gaps
data = data.reindex(pre_index)
return data
Loading