diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py index 210fa743c33b296b92ab4ef11d13e311e3f4d5dd..f3025aa194ae39d3aaa94babc9219fc76c234abf 100644 --- a/saqc/funcs/functions.py +++ b/saqc/funcs/functions.py @@ -227,8 +227,8 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe ef = eval_frame[0] ef.index = eval_frame['level_0'] - # make raise and std. dev tester function (returns True for values that - # should be flagged bad and False respectively. (must be this way, since np.nan gets casted to True)) + # make raise and std. dev tester function (returns False for values that + # should be flagged bad and True respectively. (must be this way, since np.nan gets casted to True))) def prec_test(x): x_moist = x[0::2] x_rain = x[1::2] diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index f47cf65d83610bbd8e08d3d0bcbc16df1171db84..3a2d28fcc37df21a86b1f4f039cc4a14590592bc 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -100,3 +100,18 @@ def inferFrequency(data): return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index)) +def estimateSamplingRate(index): + """The function estimates the sampling rate of a datetime index. + The estimation basically evaluates a histogram of bins with seconds-accuracy. This means, that the + result may be contra intuitive very likely, if the input series is not rastered (harmonized with skips) + to an interval divisible by seconds. + + :param index: A DatetimeIndex or array like Datetime listing, of wich you want the sampling rate to be + estimated. + """ + scnds_series = (pd.Series(index).diff().dt.total_seconds()).dropna() + max_scnds = scnds_series.max() + min_scnds = scnds_series.min() + hist = np.histogram(scnds_series, range=(min_scnds, max_scnds + 1), bins=int(max_scnds - min_scnds + 1)) + # return smallest non zero sample difference (this works, because input is expected to be harmonized) + return pd.tseries.frequencies.to_offset(str(int(hist[1][:-1][hist[0] > 0].min())) + 's')