Skip to content
Snippets Groups Projects
Commit aa1fdab7 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

Basflagger now returns categorical flags

parent 2b7996b2
No related branches found
No related tags found
No related merge requests found
...@@ -50,9 +50,8 @@ class BaseFlagger: ...@@ -50,9 +50,8 @@ class BaseFlagger:
return flags.values return flags.values
def initFlags(self, data: pd.DataFrame) -> pd.DataFrame: def initFlags(self, data: pd.DataFrame) -> pd.DataFrame:
out = data.copy().astype(self.flags) out = pd.DataFrame(data=self.flags[0], index=data.index, columns=data.columns)
out.loc[:] = self.flags[0] return out.astype(self.flags)
return out
def isFlagged(self, flags: ArrayLike, flag: T = None) -> ArrayLike: def isFlagged(self, flags: ArrayLike, flag: T = None) -> ArrayLike:
if flag is None: if flag is None:
......
...@@ -111,7 +111,8 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference ...@@ -111,7 +111,8 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference
"""Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement. """Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement.
Soil temperatures below "frost_level" are regarded as denoting frozen soil state. Soil temperatures below "frost_level" are regarded as denoting frozen soil state.
:param data: The pandas dataframe holding the data-to-be flagged. :param data: The pandas dataframe holding the data-to-be flagged, as well as the reference
series. Dataframe should be indexed by a datetime series.
:param flags: A dataframe holding the flags/flag-entries of "data" :param flags: A dataframe holding the flags/flag-entries of "data"
:param field: Fieldname of the Soil moisture measurements field in data. :param field: Fieldname of the Soil moisture measurements field in data.
:param flagger: A flagger - object. :param flagger: A flagger - object.
...@@ -125,26 +126,25 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference ...@@ -125,26 +126,25 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference
:param frost_level: Value level, the flagger shall check against, when evaluating soil frost level. :param frost_level: Value level, the flagger shall check against, when evaluating soil frost level.
""" """
# retrieve data series input: # retrieve data series input:
dataseries = pd.Series(data[field].values, index=pd.to_datetime(data.index)) dataseries = data[field]
# if reference series is part of input data frame, evaluate input data flags: # if reference series is part of input data frame, evaluate input data flags:
flag_mask = flagger.isFlagged(flags)[soil_temp_reference] # flag_mask = flagger.isFlagged(flags)[soil_temp_reference]
# retrieve reference series # retrieve reference series
refseries = pd.Series(data[soil_temp_reference].values, index=pd.to_datetime(data.index)) refseries = data[soil_temp_reference]
# drop flagged values: # drop flagged values:
refseries = refseries.loc[~np.array(flag_mask)] # refseries = refseries.loc[~np.array(flag_mask)]
# make refseries index a datetime thingy # make refseries index a datetime thingy
refseries.index = pd.to_datetime(refseries.index) refseries.index = pd.to_datetime(refseries.index)
# drop nan values from reference series, since those are values you dont want to refer to. # drop nan values from reference series, since those are values you dont want to refer to.
refseries = refseries.dropna() refseries = refseries.dropna()
# wrap around df.index.get_loc method to catch key error in case of empty tolerance window: # wrap around df.index.get_loc method, to catch key error in case of empty tolerance window:
def check_nearest_for_frost(ref_date, ref_series, tolerance, check_level): def check_nearest_for_frost(ref_date, ref_series, tolerance, check_level):
try: try:
# if there is no reference value within tolerance margin, following line will rise key error and # if there is no reference value within tolerance margin, following line will raise key error and
# trigger the exception # trigger the exception
ref_pos = ref_series.index.get_loc(ref_date, method='nearest', tolerance=tolerance) ref_pos = ref_series.index.get_loc(ref_date, method='nearest', tolerance=tolerance)
except KeyError: except KeyError:
...@@ -156,11 +156,11 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference ...@@ -156,11 +156,11 @@ def flagSoilMoistureBySoilFrost(data, flags, field, flagger, soil_temp_reference
# make temporal frame holding dateindex, since df.apply cant access index # make temporal frame holding dateindex, since df.apply cant access index
temp_frame = pd.Series(dataseries.index) temp_frame = pd.Series(dataseries.index)
# get flagging mask # get flagging mask ("False" denotes "bad"="test succesfull")
mask = temp_frame.apply(check_nearest_for_frost, args=(refseries, mask = temp_frame.apply(check_nearest_for_frost, args=(refseries,
tolerated_deviation, frost_level)) tolerated_deviation, frost_level))
# apply calculated flags # apply calculated flags
flags.loc[mask.values, field] = flagger.setFlag(flags.loc[mask, field], **kwargs) flags.loc[mask.values, field] = flagger.setFlag(flags.loc[mask.values, field], **kwargs)
return data, flags return data, flags
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment