From 01aa3f282cbb379dbcbd4773efd1c835533b7642 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Wed, 17 Mar 2021 17:08:15 +0100 Subject: [PATCH] fixed to_mask usage --- saqc/funcs/interpolation.py | 28 +++++++++++++--------------- saqc/funcs/resampling.py | 22 ++++------------------ 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index da8a10d4d..dd5036d9c 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -101,7 +101,6 @@ def interpolateInvalid( flag: float = UNFLAGGED, **kwargs ) -> Tuple[DictOfSeries, Flagger]: - """ Function to interpolate nan values in the data. @@ -185,15 +184,15 @@ def interpolateIndex( field: str, flagger: Flagger, freq: str, - method: Literal["linear", "time", "nearest", "zero", "slinear", "quadratic", "cubic", "spline", "barycentric", - "polynomial", "krogh", "piecewise_polynomial", "spline", "pchip", "akima"], - inter_order: int=2, - downgrade_interpolation: bool=False, - inter_limit: int=2, - to_mask: Optional[Union[Any, Sequence[Any]]]=BAD, + method: Literal[ + "linear", "time", "nearest", "zero", "slinear", "quadratic", "cubic", "spline", "barycentric", + "polynomial", "krogh", "piecewise_polynomial", "spline", "pchip", "akima" + ], + inter_order: int = 2, + downgrade_interpolation: bool = False, + inter_limit: int = 2, **kwargs ) -> Tuple[DictOfSeries, Flagger]: - """ Function to interpolate the data at regular (equidistant) timestamps (or Grid points). @@ -244,19 +243,18 @@ def interpolateIndex( The flagger object, holding flags and additional Informations related to `data`. Flags values and shape may have changed relatively to the flagger input. """ + if data[field].empty: + return data, flagger - if to_mask is None: - to_mask = BAD - - datcol = data[field] + datcol = data[field].copy() flagscol = flagger[field] - if datcol.empty: - return data, flagger start, end = datcol.index[0].floor(freq), datcol.index[-1].ceil(freq) grid_index = pd.date_range(start=start, end=end, freq=freq, name=datcol.index.name) - datcol = datcol.copy() + # always injected by register + to_mask = kwargs['to_mask'] + datcol.drop(flagscol[flagscol >= to_mask].index, inplace=True) datcol.dropna(inplace=True) dat_index = datcol.index diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py index 9c45aa576..3e24cd505 100644 --- a/saqc/funcs/resampling.py +++ b/saqc/funcs/resampling.py @@ -23,7 +23,6 @@ from saqc.lib.rolling import customRoller logger = logging.getLogger("SaQC") - METHOD2ARGS = { "inverse_fshift": ("backward", pd.Timedelta), "inverse_bshift": ("forward", pd.Timedelta), @@ -565,9 +564,7 @@ def resample( # create a dummys if all_na_2_empty and datcol.dropna().empty: - # Todo: This needs discussion. It makes possible, that different harmonized variables, - # resulting from the harmonization of the same logger, have differing timestamps! - # (Same holds for starting/ending nan-chunk truncation) + # Todo: This needs discussion. See issue #GL170 datcol = pd.Series([], index=pd.DatetimeIndex([]), name=field) flagscol = pd.Series([], index=pd.DatetimeIndex([]), name=field) @@ -668,7 +665,6 @@ def _inverseShift(target_flagscol, source_col=None, freq=None, method=None, drop return target_flagscol - @register(masking='none', module="resampling") def reindexFlags( data: DictOfSeries, @@ -676,11 +672,9 @@ def reindexFlags( flagger: Flagger, method: Literal["inverse_fagg", "inverse_bagg", "inverse_nagg", "inverse_fshift", "inverse_bshift", "inverse_nshift"], source: str, - freq: Optional[str]=None, - to_mask: Optional[Union[Any, Sequence[Any]]]=BAD, + freq: Optional[str] = None, **kwargs ) -> Tuple[DictOfSeries, Flagger]: - """ The Function projects flags of "source" onto flags of "field". Wherever the "field" flags are "better" then the source flags projected on them, they get overridden with this associated source flag value. @@ -728,9 +722,6 @@ def reindexFlags( freq : {None, str},default None The freq determines the projection range for the projection method. See above description for more details. Defaultly (None), the sampling frequency of source is used. - to_mask : {None, str, List[str]}, default None - Flags referring to values that are to drop before flags projection. Relevant only when projecting with an - inverted shift method. Defaultly BAD is listed. Returns ------- @@ -740,17 +731,13 @@ def reindexFlags( The flagger object, holding flags and additional Informations related to `data`. Flags values and shape may have changed relatively to the flagger input. """ - - if to_mask is None: - to_mask = BAD - flagscol = flagger[source] if flagscol.empty: return data, flagger if freq is None: freq = getFreqDelta(flagscol.index) - if freq is None and not method=='match': + if freq is None and not method == 'match': raise ValueError('To project irregularly sampled data, either use method="match", or pass custom ' 'projection range to freq parameter') @@ -762,15 +749,14 @@ def reindexFlags( merge_func = _inverseInterpolation merge_dict = dict(freq=freq, chunk_bounds=ignore) - if method[-3:] == "agg" or method == "match": projection_method = METHOD2ARGS[method][0] tolerance = METHOD2ARGS[method][1](freq) merge_func = _inverseAggregation merge_dict = dict(freq=tolerance, method=projection_method) - if method[-5:] == "shift": + to_mask = kwargs['to_mask'] drop_mask = (target_datcol.isna() | target_flagscol >= to_mask) projection_method = METHOD2ARGS[method][0] tolerance = METHOD2ARGS[method][1](freq) -- GitLab