From 732519082db701d750c412028f177b832b851033 Mon Sep 17 00:00:00 2001
From: Peter Luenenschloss <peter.luenenschloss@ufz.de>
Date: Fri, 26 Apr 2019 16:55:00 +0200
Subject: [PATCH] soil frost flagger added to funcs

---
 funcs/functions.py | 115 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/funcs/functions.py b/funcs/functions.py
index a2c872cc0..280397048 100644
--- a/funcs/functions.py
+++ b/funcs/functions.py
@@ -127,3 +127,118 @@ def flagMad(data, flags, field, flagger, length, z, deriv, **kwargs):
     flags[field] = flagcol
 
     return data, flags
+
+
+def flagSoilMoistureBySoilFrost(data, flags, field, flagger, time_stamp, tolerated_deviation, soil_temp_reference,
+                                reference_field=None, reference_flags=None, reference_flagger=None,
+                                reference_time_stamp=None, frost_level=0, **kwargs):
+    """Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement.
+    Soil temperatures below "frost_level" are regarded as denoting frozen soil state.
+
+    :param data:                        The pandas dataframe holding the data-to-be flagged.
+    :param flags:                       A dataframe holding the flags/flag-entries of "data"
+    :param field:                       Fieldname of the Soil moisture measurements in data.
+                                        (Soil moisture measurement column should be accessible by "data[field]")
+    :param flagger:                     A flagger - object.
+    :param time_stamp:                  (1)A STRING, denoting the data fields name, that holds the timestamp
+                                        series associated with the data,
+                                        (2) Pass None or 'index', if the input data dataframe is indexed with a
+                                        timestamp.
+                                        (3) Pass an array-like thingy, holding timestamp/datetime
+                                        like thingies that refer to the data(including datestrings).
+    :param tolerated_deviation:         An offset alias, denoting the maximal temporal deviation,
+                                        the Soil frost states timestamp is allowed to have, relative to the
+                                        data point to-be-flagged.
+    :param soil_temp_reference:         (1) A STRING, denoting the fields name in data,
+                                        that holds the data series of soil temperature values,
+                                        the to-be-flagged values shall be checked against.
+                                        (2) A date indexed pandas.Series, holding the data series of soil
+                                        temperature values, the to-be-flagged values shall be checked against.
+                                        (3) A data frame (most likely refering to a loggers measurements), containing the
+                                        temperature values, the to-be-flagged values shall be checked against,
+                                        in one of its fields. (In this case, you have to pass
+                                        reference_field and reference_time_stamp as well)
+    :param reference_field:             If a Dataframe is passed to soil_temp_reference, that parameter holds the
+                                        Fieldname refereing to the Soil temperature measurements.
+    :param reference_flag:              If there are flags available for the reference series, pass them here
+    :param reference_flagger:           If the flagger of the reference series is not the same as the one used
+                                        for the data-to-be-flagged, pass it here.
+    :param reference_time_stamp:
+    :param frost_level:                 Value level, the flagger shall check against, when evaluating soil frost level.
+    """
+
+    # TODO: (To ASK):HOW TO FLAG nan values in input frame? general question: what should a test test?
+    # TODO: -> nan values with reference values that show frost, are flagged bad, nan values with reference value nan
+    # TODO: as well, are not flagged (test not applicable-> no flag)
+    # TODO: (To comment):PERFORMANCE COST OF NOT HARMONIZED
+    # TODO: Index = None input option
+
+    # check and retrieve data series input:
+    if isinstance(time_stamp, str):
+        dataseries = pd.Series(data[field].values, index=pd.to_datetime(data[time_stamp].values))
+    else:
+        dataseries = pd.Series(data[field].values, index=pd.to_datetime(list(time_stamp)))
+
+    # check and retrieve reference input:
+    #if reference is a string, it refers to data field
+    if isinstance(soil_temp_reference, str):
+        # if reference series is part of input data frame, evaluate input data flags:
+        flag_mask = flagger.isFlagged(flags)[soil_temp_reference]
+        # retrieve reference series
+        refseries = pd.Series(data[soil_temp_reference].values,
+                                               index=dataseries.index)
+        # drop flagged values:
+        refseries = refseries.loc[~np.array(flag_mask)]
+
+    # if reference is a series, it represents the soil temperature series-to-refer-to:
+    elif isinstance(soil_temp_reference, pd.Series):
+        refseries = soil_temp_reference
+        if reference_flags is not None:
+            if reference_flagger is None:
+                reference_flagger = flagger
+            reference_flag_mask = reference_flagger.isFlagged(reference_flags)
+            refseries = refseries.loc[~np.array(reference_flag_mask)]
+
+    # if reference is a dataframe, it contains the soil temperature series to-refer-to:
+    elif isinstance(soil_temp_reference, pd.DataFrame):
+        if isinstance(reference_time_stamp, str):
+            refseries = pd.Series(soil_temp_reference[reference_field].values,
+                                  index=pd.to_datetime(soil_temp_reference[reference_time_stamp].values))
+        else:
+            refseries = pd.Series(soil_temp_reference[reference_field].values,
+                                  index=pd.to_datetime(list(reference_time_stamp)))
+
+        if reference_flags is not None:
+            if reference_flagger is None:
+                reference_flagger = flagger
+            reference_flag_mask = reference_flagger.isFlagged(reference_flags)[reference_field]
+            refseries = refseries.loc[~np.array(reference_flag_mask)]
+
+
+    # make refseries index a datetime thingy
+    refseries.index = pd.to_datetime(refseries.index)
+    # drop nan values from reference series, since those are values you dont want to refer to.
+    refseries = refseries.dropna()
+
+    # wrap around df.index.get_loc method to catch key error in case of empty tolerance window:
+    def check_nearest_for_frost(ref_date, ref_series, tolerance, check_level):
+        try:
+            # if there is no reference value within tolerance margin, following line will rise key error and
+            # trigger the exception
+            ref_pos = ref_series.index.get_loc(ref_date, method='nearest', tolerance=tolerance)
+        except KeyError:
+            # since test is not applicable: make no change to flag state
+            return False
+
+        # if reference value index is available, return comparison result (to determine flag)
+        return ref_series[ref_pos] <= check_level
+
+    # make temporal frame holding dateindex, since df.apply cant access index
+    temp_frame = pd.Series(dataseries.index)
+    # get flagging mask
+    mask = temp_frame.apply(check_nearest_for_frost, args=(refseries,
+                                                           tolerated_deviation, frost_level))
+    # apply calculated flags
+    flags.loc[mask.values, field] = flagger.setFlag(flags.loc[mask, field], **kwargs)
+
+    return data, flags
-- 
GitLab