From b668b0cf49abedba3bc44491577e642b20c5382d Mon Sep 17 00:00:00 2001
From: Peter Luenenschloss <peter.luenenschloss@ufz.de>
Date: Thu, 20 Aug 2020 08:51:30 +0200
Subject: [PATCH] added inclusive_selection parameter to modellin_mask to
 provide backwards compatibillity

---
 saqc/funcs/data_modelling.py | 44 +++++++++++++++++++++++++-----------
 saqc/funcs/functions.py      |  3 ++-
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/saqc/funcs/data_modelling.py b/saqc/funcs/data_modelling.py
index abc59dae2..329f3c7aa 100644
--- a/saqc/funcs/data_modelling.py
+++ b/saqc/funcs/data_modelling.py
@@ -250,7 +250,8 @@ def modelling_rollingMean(data, field, flagger, winsz, eval_flags=True, min_peri
     return data, flagger
 
 
-def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None, season_end=None):
+def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None, season_end=None,
+                   inclusive_selection="masked"):
     """
 
     Parameters
@@ -261,7 +262,7 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
         The fieldname of the column, holding the data-to-be-masked.
     flagger : saqc.flagger
         A flagger object, holding flags and additional Informations related to `data`.
-    mode : {"seasonal", "mask_var"}
+    mode : {"seasonal", "mask_var"}select
         The masking mode.
         - "seasonal": parameters "season_start", "season_end" are evaluated to generate a seasonal (periodical) mask
         - "mask_var": data[mask_var] is expected to be a boolean valued timeseries and is used as mask.
@@ -271,15 +272,22 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
         Neither the series` length nor its labels have to match data[field]`s index and length. An inner join of the
         indices will be calculated and values get masked where the values of the inner join are "True".
     season_start : {None, str}, default None
-        Only effective if mode == "mask_var"
+        Only effective if mode == "seasonal"
         String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
         Has to be of same length as `season_end` parameter.
         See examples section below for some examples.
     season_end : {None, str}, default None
-        Only effective if mode == "mask_var"
+        Only effective if mode == "seasonal"
         String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
         Has to be of same length as `season_end` parameter.
         See examples section below for some examples.
+    inclusive_selection : {"mask","season"}, default "mask"
+        Only effective if mode == "seasonal"
+        - "mask": the `season_start` and `season_end` keywords inclusivly frame the mask (INCLUDING INTERVAL BOUNDS)
+        - "season": the `season_start` and `season_end` keywords inclusivly frame the season
+        (INCLUDING INTERVAL BOUNDS)
+        (Parameter mainly introduced to provide backwards compatibility. But, as a side effect, provides more control
+        over what to do with samples at the exact turning points of date-defined masks and season.)
 
     Returns
     -------
@@ -297,7 +305,7 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
     They have to be strings of the forms: "mm-ddTHH:MM:SS", "ddTHH:MM:SS" , "HH:MM:SS", "MM:SS" or "SS"
     (mm=month, dd=day, HH=hour, MM=minute, SS=second)
     Single digit specifications have to be given with leading zeros.
-    `season_start` and `season_end` strings have to be of same length (refer to the same periodicity)
+    `season_start` and `seas   on_end` strings have to be of same length (refer to the same periodicity)
     The highest date unit gives the period.
     For example:
 
@@ -314,7 +322,7 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
     >>> season_start = "01-01T00:00:00"
     >>> season_end = "01-03T00:00:00"
 
-    Mask january and february of every year. masking is inclusive always, so in this case the mask will
+    Mask january and february of evcomprosed in theery year. masking is inclusive always, so in this case the mask will
     include 00:00:00 at the first of march. To exclude this one, pass:
 
     >>> season_start = "01-01T00:00:00"
@@ -325,11 +333,21 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
 
     >>> season_start = "22:00:00"
     >>> season_end = "06:00:00"
+comprosed in the
+    When inclusive_selection="season", all above examples work the same way, only that you now
+    determine wich values NOT TO mask (=wich values are to constitute the "seasons").
 
     """
     data = data.copy()
     datcol = data[field]
-    mask = pd.Series(False, index=datcol.index)
+    if inclusive_selection == "mask":
+        base_bool = False
+    elif inclusive_selection == "season":
+        base_bool = True
+    else:
+        raise ValueError("invalid value {} was passed. Please select from 'mask' and 'season'."
+                         .format(inclusive_selection))
+    mask = pd.Series(base_bool, index=datcol.index)
     if mode == 'seasonal':
         if len(season_start) == 2:
             def _composeStamp(index, stamp):
@@ -353,17 +371,17 @@ def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None,
             def _composeStamp(index, stamp):
                 return '{}-'.format(index.year[0]) + stamp
         else:
-            raise ValueError("Whats this?: {}".format(season_start))
+            raise ValueError("What´s this?: {}".format(season_start))
 
         if pd.Timestamp(_composeStamp(datcol.index, season_start)) <= pd.Timestamp(_composeStamp(datcol.index,
                                                                                                  season_end)):
-            def _selector(x, start=season_start, end=season_end):
-                x[_composeStamp(x.index, start):_composeStamp(x.index, end)] = True
+            def _selector(x, start=season_start, end=season_end, base_bool=base_bool):
+                x[_composeStamp(x.index, start):_composeStamp(x.index, end)] = not base_bool
                 return x
         else:
-            def _selector(x, start=season_start, end=season_end):
-                x[:_composeStamp(x.index, end)] = True
-                x[_composeStamp(x.index, start):] = True
+            def _selector(x, start=season_start, end=season_end, base_bool=base_bool):
+                x[:_composeStamp(x.index, end)] = not base_bool
+                x[_composeStamp(x.index, start):] = not base_bool
                 return x
 
         freq = '1' + 'mmmhhhdddMMMYYY'[len(season_start)]
diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py
index f3807100d..5ea2a51ab 100644
--- a/saqc/funcs/functions.py
+++ b/saqc/funcs/functions.py
@@ -234,7 +234,8 @@ def flagSesonalRange(
     data, flagger = proc_fork(data, field, flagger, suffix="_masked")
     data, flagger = modelling_mask(data, field + "_masked", flagger, mode='seasonal',
                                    season_end=str(startmonth).zfill(2) + '-' + str(startday).zfill(2) + 'T00:00:00',
-                                   season_start=str(endmonth).zfill(2) + '-' + str(endday).zfill(2) + 'T00:00:00')
+                                   season_start=str(endmonth).zfill(2) + '-' + str(endday).zfill(2) + 'T00:00:00',
+                                   inclusive_selection='season')
     data, flagger = flagRange(data, field + "_masked", flagger, min=min, max=max, **kwargs)
     data, flagger = proc_projectFlags(data, field, flagger, method='match', source=field + "_masked")
     data, flagger = proc_drop(data, field + "_masked", flagger)
-- 
GitLab