From c9b6fe73c5bebf54e5a7281aa539612c884b55f2 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Mon, 22 Mar 2021 16:38:06 +0100
Subject: [PATCH] fixed shift and resamle

---
 saqc/funcs/resampling.py | 157 ++++++++++++++++-----------------------
 1 file changed, 64 insertions(+), 93 deletions(-)

diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py
index 4e9dad616..e304cc54f 100644
--- a/saqc/funcs/resampling.py
+++ b/saqc/funcs/resampling.py
@@ -337,40 +337,16 @@ def mapToOriginal(
 
 @register(masking='none', module="resampling")
 def shift(
-        data: DictOfSeries,
-        field: str,
-        flagger: Flagger,
-        freq: str,
-        method: Literal["fshift", "bshift", "nshift"]="nshift",
-        to_drop: Optional[Union[Any, Sequence[Any]]]=None,
-        empty_intervals_flag: Optional[str]=None,
-        freq_check: Optional[Literal["check", "auto"]]=None,  # TODO: not a user decision
-        **kwargs
-) -> Tuple[DictOfSeries, Flagger]:
-
-    data, flagger = copy(data, field, flagger, field + '_original')
-    data, flagger = _shift(
-        data, field, flagger, freq, method=method, to_drop=to_drop,
-        empty_intervals_flag=empty_intervals_flag, freq_check=freq_check, **kwargs
-    )
-    return data, flagger
-
-
-def _shift(
         data: DictOfSeries,
         field: str,
         flagger: Flagger,
         freq: str,
         method: Literal["fshift", "bshift", "nshift"] = "nshift",
-        freq_check: Optional[Literal["check", "auto"]] = None,
+        freq_check: Optional[Literal["check", "auto"]] = None,  # TODO: not a user decision
         **kwargs
 ) -> Tuple[DictOfSeries, Flagger]:
     """
-    Function to shift data points to regular (equidistant) timestamps.
-    Values and Flags get shifted according to the keyword passed to the `method` parameter.
-
-    Note: all data nans get excluded defaultly from shifting. If `to_drop` is ``None``, - all *BAD* flagged values get
-    excluded as well.
+    Function to shift data and flags to a regular (equidistant) timestamp grid, according to ``method``.
 
     Parameters
     ----------
@@ -412,6 +388,26 @@ def _shift(
         The flagger object, holding flags and additional Informations related to `data`.
         Flags values and shape may have changed relatively to the flagger input.
     """
+    data, flagger = copy(data, field, flagger, field + '_original')
+    return _shift(data, field, flagger, freq, method=method, freq_check=freq_check, **kwargs)
+
+
+def _shift(
+        data: DictOfSeries,
+        field: str,
+        flagger: Flagger,
+        freq: str,
+        method: Literal["fshift", "bshift", "nshift"] = "nshift",
+        freq_check: Optional[Literal["check", "auto"]] = None,
+        **kwargs
+) -> Tuple[DictOfSeries, Flagger]:
+    """
+    Function to shift data points to regular (equidistant) timestamps.
+
+    See Also
+    --------
+    shift : Main caller, docstring
+    """
     flagged = isflagged(flagger[field], kwargs['to_mask'])
     datcol = data[field]
     datcol[flagged] = np.nan
@@ -436,7 +432,7 @@ def _shift(
     return data, flagger
 
 
-@register(masking='field', module="resampling")
+@register(masking='none', module="resampling")
 def resample(
         data: DictOfSeries,
         field: str,
@@ -449,9 +445,6 @@ def resample(
         max_invalid_consec_f: Optional[int]=None,
         max_invalid_total_f: Optional[int]=None,
         flag_agg_func: Callable[[pd.Series], float]=max,
-        empty_intervals_flag: float = BAD,
-        to_drop: Optional[Union[Any, Sequence[Any]]]=None,
-        all_na_2_empty: bool=False,
         freq_check: Optional[Literal["check", "auto"]]=None,
         **kwargs
 ) -> Tuple[DictOfSeries, Flagger]:
@@ -480,45 +473,48 @@ def resample(
     ----------
     data : dios.DictOfSeries
         A dictionary of pandas.Series, holding all the data.
+
     field : str
         The fieldname of the column, holding the data-to-be-resampled.
+
     flagger : saqc.flagger.Flagger
         A flagger object, holding flags and additional Informations related to `data`.
+
     freq : str
         An Offset String, that will be interpreted as the frequency you want to resample your data with.
+
     agg_func : Callable
         The function you want to use for aggregation.
+
     method: {'fagg', 'bagg', 'nagg'}, default 'bagg'
         Specifies which intervals to be aggregated for a certain timestamp. (preceding, succeeding or
         "surrounding" interval). See description above for more details.
+
     max_invalid_total_d : {None, int}, default None
         Maximum number of invalid (nan) datapoints, allowed per resampling interval. If max_invalid_total_d is
         exceeded, the interval gets resampled to nan. By default (``np.inf``), there is no bound to the number of nan
         values in an interval and only intervals containing ONLY nan values or those, containing no values at all,
         get projected onto nan
+
     max_invalid_consec_d : {None, int}, default None
         Maximum number of consecutive invalid (nan) data points, allowed per resampling interval.
         If max_invalid_consec_d is exceeded, the interval gets resampled to nan. By default (np.inf),
         there is no bound to the number of consecutive nan values in an interval and only intervals
         containing ONLY nan values, or those containing no values at all, get projected onto nan.
+
     max_invalid_total_f : {None, int}, default None
         Same as `max_invalid_total_d`, only applying for the flags. The flag regarded as "invalid" value,
         is the one passed to empty_intervals_flag (default=``BAD``).
         Also this is the flag assigned to invalid/empty intervals.
+
     max_invalid_consec_f : {None, int}, default None
         Same as `max_invalid_total_f`, only applying onto flags. The flag regarded as "invalid" value, is the one passed
         to empty_intervals_flag. Also this is the flag assigned to invalid/empty intervals.
+
     flag_agg_func : Callable, default: max
         The function you want to aggregate the flags with. It should be capable of operating on the flags dtype
         (usually ordered categorical).
-    empty_intervals_flag : float, default BAD
-        A Flag, that you want to assign to invalid intervals. Invalid are those intervals, that contain nan values only,
-        or no values at all. Furthermore the empty_intervals_flag is the flag, serving as "invalid" identifyer when
-        checking for `max_total_invalid_f` and `max_consec_invalid_f patterns`.
-    to_drop : {None, str, List[str]}, default None
-        Flags that refer to values you want to drop before resampling - effectively excluding values that are flagged
-        with a flag in to_drop from the resampling process - this means that they also will not be counted in the
-        the `max_consec`/`max_total evaluation`. `to_drop` = ``None`` results in NO flags being dropped initially.
+
     freq_check : {None, 'check', 'auto'}, default None
 
         * ``None``: do not validate frequency-string passed to `freq`
@@ -535,63 +531,38 @@ def resample(
         The flagger object, holding flags and additional Informations related to `data`.
         Flags values and shape may have changed relatively to the flagger input.
     """
-
-    data = data.copy()
+    flagged = isflagged(flagger[field], kwargs['to_mask'])
     datcol = data[field]
-    flagscol = flagger[field]
-
-    drop_mask = getDropMask(field, to_drop, flagger, [])
-    datcol.drop(datcol[drop_mask].index, inplace=True)
+    datcol[flagged] = np.nan
     freq = evalFreqStr(freq, freq_check, datcol.index)
-    flagscol.drop(flagscol[drop_mask].index, inplace=True)
-
-    # create a dummys
-    if all_na_2_empty and datcol.dropna().empty:
-        # Todo: This needs discussion. See issue #GL170
-        datcol = pd.Series([], index=pd.DatetimeIndex([]), name=field)
-        flagscol = pd.Series([], index=pd.DatetimeIndex([]), name=field)
-
-        # clear the past
-        flagger.history[field] = flagger.history[field].reindex(datcol.index)
-        flagger[field] = flagscol
-
-    # do the resampling
-    else:
-        datcol = aggregate2Freq(
-            datcol,
-            method,
-            freq,
-            agg_func,
-            fill_value=np.nan,
-            max_invalid_total=max_invalid_total_d,
-            max_invalid_consec=max_invalid_consec_d,
-        )
-
-        flagscol = aggregate2Freq(
-            flagscol,
-            method,
-            freq,
-            flag_agg_func,
-            fill_value=empty_intervals_flag,
-            max_invalid_total=max_invalid_total_f,
-            max_invalid_consec=max_invalid_consec_f,
-        )
-
-        kws = dict(
-            method=method,
-            freq=freq,
-            agg_func=flag_agg_func,
-            fill_value=UNTOUCHED,
-            max_invalid_total=max_invalid_total_f,
-            max_invalid_consec=max_invalid_consec_f,
-        )
-
-        flagger = applyFunctionOnHistory(
-            flagger, field,
-            hist_func=aggregate2Freq, hist_kws=kws,
-            mask_func=aggregate2Freq, mask_kws=kws,
-            last_column=flagscol
-        )
+
+    datcol = aggregate2Freq(
+        datcol,
+        method,
+        freq,
+        agg_func,
+        fill_value=np.nan,
+        max_invalid_total=max_invalid_total_d,
+        max_invalid_consec=max_invalid_consec_d,
+    )
+
+    dummy = pd.Series(UNTOUCHED, index=datcol.index, dtype=float)
+
+    kws = dict(
+        method=method,
+        freq=freq,
+        agg_func=flag_agg_func,
+        fill_value=UNTOUCHED,
+        max_invalid_total=max_invalid_total_f,
+        max_invalid_consec=max_invalid_consec_f,
+    )
+
+    flagger = applyFunctionOnHistory(
+        flagger, field,
+        hist_func=aggregate2Freq, hist_kws=kws,
+        mask_func=aggregate2Freq, mask_kws=kws,
+        last_column=dummy
+    )
 
     data[field] = datcol
     return data, flagger
-- 
GitLab