From 8be35da320559ca94448f3f2bd11676fba8fa4d5 Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Tue, 3 Dec 2019 17:30:27 +0100 Subject: [PATCH] increased soil moisture tests test coverage --- saqc/funcs/soil_moisture_tests.py | 24 +++++++++++++++++++----- test/funcs/test_soil_moisture_tests.py | 21 +++++++++++++++++++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/saqc/funcs/soil_moisture_tests.py b/saqc/funcs/soil_moisture_tests.py index 37f9ad1de..5722c0a36 100644 --- a/saqc/funcs/soil_moisture_tests.py +++ b/saqc/funcs/soil_moisture_tests.py @@ -265,17 +265,17 @@ def flagSoilMoistureByConstantsDetection( plateau_window_min="12h", plateau_var_limit=0.0005, rainfall_window_range="12h", - filter_window_size="3h", + filter_window_size=None, var_total_nans=np.inf, var_consec_nans=np.inf, derivative_maximum_lb=0.0025, derivative_minimum_ub=0, data_max_tolerance=0.95, + smooth_poly_order=2, **kwargs ): - """Function is not ready to use yet: we are waiting for response from the author of [Paper] in order of getting - able to exclude some sources of confusion. + """ Note, function has to be harmonized to equidistant freq_grid @@ -322,13 +322,27 @@ def flagSoilMoistureByConstantsDetection( # 2. extend forwards: if period_diff > 0: cond1_sets = cond1_sets.replace(1, method='ffill', limit=period_diff) + # get first derivative + if filter_window_size is None: + filter_window_size = 3 * pd.Timedelta(moist_rate) + else: + filter_window_size = pd.Timedelta(filter_window_size) first_derivative = dataseries.diff() - # cumsum trick to seperate continous plateau groups from each other + filter_window_seconds = filter_window_size.seconds + smoothing_periods = int(np.ceil((filter_window_seconds / moist_rate.n))) + first_derivate = savgol_filter( + dataseries, + window_length=smoothing_periods, + polyorder=smooth_poly_order, + deriv=1, + ) + first_derivate = pd.Series(data=first_derivate, index=dataseries.index, name=dataseries.name) + # cumsumming to seperate continous plateau groups from each other: group_counter = cond1_sets.cumsum() group_counter = group_counter[group_counter.diff() == 0] group_counter.name = 'group_counter' - group_frame = pd.merge(group_counter, first_derivative, left_index=True, right_index=True, how='inner') + group_frame = pd.merge(group_counter, first_derivate, left_index=True, right_index=True, how='inner') group_frame = group_frame.groupby('group_counter') condition_passed = group_frame.filter( lambda x: (x[field].max() >= derivative_maximum_lb) & (x[field].min() <= derivative_minimum_ub)) diff --git a/test/funcs/test_soil_moisture_tests.py b/test/funcs/test_soil_moisture_tests.py index e9834f2bf..3c6f0d8fb 100644 --- a/test/funcs/test_soil_moisture_tests.py +++ b/test/funcs/test_soil_moisture_tests.py @@ -8,9 +8,10 @@ import pandas as pd from saqc.funcs.soil_moisture_tests import ( flagSoilMoistureBySoilFrost, flagSoilMoistureByPrecipitationEvents, + flagSoilMoistureByConstantsDetection ) -from test.common import TESTFLAGGER +from test.common import TESTFLAGGER, initData @pytest.mark.parametrize("flagger", TESTFLAGGER) @@ -58,6 +59,22 @@ def test_flagSoilMoisturePrecipitationEvents(flagger): test_sum = (flag_result[flag_assertion] == flagger.BAD).sum() assert test_sum == len(flag_assertion) +@pytest.mark.parametrize("flagger", TESTFLAGGER) +def test_flagSoilMoistureByConstantsDetection(flagger): + + data = initData( + 1, start_date="2011-01-01 00:00:00", end_date="2011-01-02 00:00:00", freq="5min" + ) + data.iloc[5:25] = 0 + data.iloc[100:120] = data.max()[0] + field = data.columns[0] + flagger = flagger.initFlags(data) + data, flagger = flagSoilMoistureByConstantsDetection(data, field, flagger, plateau_window_min='1h', + rainfall_window_range='1h') + + assert ~(flagger.isFlagged()[5:25]).all()[0] + assert (flagger.isFlagged()[100:120]).all()[0] + if __name__ == "__main__": flagger = TESTFLAGGER[2] - test_flagSoilMoistureBySoilFrost(flagger) + test_flagSoilMoistureByConstantsDetection(flagger) -- GitLab