From c38034ffa368ae0199004fbc022eee875badf94e Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Mon, 29 Apr 2019 19:41:35 +0200 Subject: [PATCH] made flagSoilMoistureByFrost parameter passing compatible to call by metadata.csv. --- funcs/functions.py | 81 +++++++++------------------------------------- 1 file changed, 16 insertions(+), 65 deletions(-) diff --git a/funcs/functions.py b/funcs/functions.py index 280397048..6f0cf24ea 100644 --- a/funcs/functions.py +++ b/funcs/functions.py @@ -129,92 +129,43 @@ def flagMad(data, flags, field, flagger, length, z, deriv, **kwargs): return data, flags -def flagSoilMoistureBySoilFrost(data, flags, field, flagger, time_stamp, tolerated_deviation, soil_temp_reference, - reference_field=None, reference_flags=None, reference_flagger=None, - reference_time_stamp=None, frost_level=0, **kwargs): +def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference, tolerated_deviation, + frost_level=0, **kwargs): """Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement. Soil temperatures below "frost_level" are regarded as denoting frozen soil state. :param data: The pandas dataframe holding the data-to-be flagged. :param flags: A dataframe holding the flags/flag-entries of "data" - :param field: Fieldname of the Soil moisture measurements in data. - (Soil moisture measurement column should be accessible by "data[field]") + :param field: Fieldname of the Soil moisture measurements field in data. :param flagger: A flagger - object. - :param time_stamp: (1)A STRING, denoting the data fields name, that holds the timestamp - series associated with the data, - (2) Pass None or 'index', if the input data dataframe is indexed with a - timestamp. - (3) Pass an array-like thingy, holding timestamp/datetime like thingies that refer to the data(including datestrings). :param tolerated_deviation: An offset alias, denoting the maximal temporal deviation, - the Soil frost states timestamp is allowed to have, relative to the + the soil frost states timestamp is allowed to have, relative to the data point to-be-flagged. - :param soil_temp_reference: (1) A STRING, denoting the fields name in data, + :param soil_temp_reference: A STRING, denoting the fields name in data, that holds the data series of soil temperature values, the to-be-flagged values shall be checked against. - (2) A date indexed pandas.Series, holding the data series of soil - temperature values, the to-be-flagged values shall be checked against. - (3) A data frame (most likely refering to a loggers measurements), containing the - temperature values, the to-be-flagged values shall be checked against, - in one of its fields. (In this case, you have to pass - reference_field and reference_time_stamp as well) - :param reference_field: If a Dataframe is passed to soil_temp_reference, that parameter holds the - Fieldname refereing to the Soil temperature measurements. - :param reference_flag: If there are flags available for the reference series, pass them here - :param reference_flagger: If the flagger of the reference series is not the same as the one used - for the data-to-be-flagged, pass it here. - :param reference_time_stamp: :param frost_level: Value level, the flagger shall check against, when evaluating soil frost level. """ # TODO: (To ASK):HOW TO FLAG nan values in input frame? general question: what should a test test? # TODO: -> nan values with reference values that show frost, are flagged bad, nan values with reference value nan # TODO: as well, are not flagged (test not applicable-> no flag) - # TODO: (To comment):PERFORMANCE COST OF NOT HARMONIZED - # TODO: Index = None input option + # TODO: puffer zone for intermediate/fluktuating frost state - # check and retrieve data series input: - if isinstance(time_stamp, str): - dataseries = pd.Series(data[field].values, index=pd.to_datetime(data[time_stamp].values)) - else: - dataseries = pd.Series(data[field].values, index=pd.to_datetime(list(time_stamp))) + # retrieve data series input: + dataseries = pd.Series(data[field].values, index=pd.to_datetime(data.index)) - # check and retrieve reference input: - #if reference is a string, it refers to data field - if isinstance(soil_temp_reference, str): - # if reference series is part of input data frame, evaluate input data flags: - flag_mask = flagger.isFlagged(flags)[soil_temp_reference] - # retrieve reference series - refseries = pd.Series(data[soil_temp_reference].values, - index=dataseries.index) - # drop flagged values: - refseries = refseries.loc[~np.array(flag_mask)] - - # if reference is a series, it represents the soil temperature series-to-refer-to: - elif isinstance(soil_temp_reference, pd.Series): - refseries = soil_temp_reference - if reference_flags is not None: - if reference_flagger is None: - reference_flagger = flagger - reference_flag_mask = reference_flagger.isFlagged(reference_flags) - refseries = refseries.loc[~np.array(reference_flag_mask)] - - # if reference is a dataframe, it contains the soil temperature series to-refer-to: - elif isinstance(soil_temp_reference, pd.DataFrame): - if isinstance(reference_time_stamp, str): - refseries = pd.Series(soil_temp_reference[reference_field].values, - index=pd.to_datetime(soil_temp_reference[reference_time_stamp].values)) - else: - refseries = pd.Series(soil_temp_reference[reference_field].values, - index=pd.to_datetime(list(reference_time_stamp))) - - if reference_flags is not None: - if reference_flagger is None: - reference_flagger = flagger - reference_flag_mask = reference_flagger.isFlagged(reference_flags)[reference_field] - refseries = refseries.loc[~np.array(reference_flag_mask)] + # retrieve reference input: + #if reference is a string, it refers to data field + # if reference series is part of input data frame, evaluate input data flags: + flag_mask = flagger.isFlagged(flags)[soil_temp_reference] + # retrieve reference series + refseries = pd.Series(data[soil_temp_reference].values, index=pd.to_datetime(data.index)) + # drop flagged values: + refseries = refseries.loc[~np.array(flag_mask)] # make refseries index a datetime thingy refseries.index = pd.to_datetime(refseries.index) # drop nan values from reference series, since those are values you dont want to refer to. -- GitLab