From dfb5a90a94e15f4122225199edead71530e1e63a Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Tue, 2 Jul 2019 12:41:02 +0200 Subject: [PATCH] added sample rate estimation function to lib.tools --- saqc/funcs/functions.py | 4 ++-- saqc/lib/tools.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py index 210fa743c..f3025aa19 100644 --- a/saqc/funcs/functions.py +++ b/saqc/funcs/functions.py @@ -227,8 +227,8 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe ef = eval_frame[0] ef.index = eval_frame['level_0'] - # make raise and std. dev tester function (returns True for values that - # should be flagged bad and False respectively. (must be this way, since np.nan gets casted to True)) + # make raise and std. dev tester function (returns False for values that + # should be flagged bad and True respectively. (must be this way, since np.nan gets casted to True))) def prec_test(x): x_moist = x[0::2] x_rain = x[1::2] diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index f47cf65d8..3a2d28fcc 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -100,3 +100,18 @@ def inferFrequency(data): return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index)) +def estimateSamplingRate(index): + """The function estimates the sampling rate of a datetime index. + The estimation basically evaluates a histogram of bins with seconds-accuracy. This means, that the + result may be contra intuitive very likely, if the input series is not rastered (harmonized with skips) + to an interval divisible by seconds. + + :param index: A DatetimeIndex or array like Datetime listing, of wich you want the sampling rate to be + estimated. + """ + scnds_series = (pd.Series(index).diff().dt.total_seconds()).dropna() + max_scnds = scnds_series.max() + min_scnds = scnds_series.min() + hist = np.histogram(scnds_series, range=(min_scnds, max_scnds + 1), bins=int(max_scnds - min_scnds + 1)) + # return smallest non zero sample difference (this works, because input is expected to be harmonized) + return pd.tseries.frequencies.to_offset(str(int(hist[1][:-1][hist[0] > 0].min())) + 's') -- GitLab