Skip to content
Snippets Groups Projects
Commit 531dde49 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

helper functionadded to lib.tool: retrieveTrustworthyOriginal

parent 1e6f47b4
No related branches found
No related tags found
No related merge requests found
......@@ -189,8 +189,9 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe
:param soil_porosity: Porosity of moisture sensors surrounding soil, [-].
"""
# retrieve input sampling rate:
# retrieve input sampling rate (needed to translate ref and data rates into each other):
input_rate = estimateSamplingRate(data.index)
# retrieve data series input:
dataseries = data[field]
# "nan" suspicious values (neither "unflagged" nor "min-flagged")
......@@ -199,26 +200,30 @@ def flagSoilMoistureByPrecipitationEvents(data, flags, field, flagger, prec_refe
flagger.isFlagged(data_flags, flag=flagger.flags.unflagged())
# drop suspicious values
dataseries = dataseries[data_use.values]
# additionally drop the nan values that result from any preceeding upsampling of the
# additionally, drop the nan values that result from any preceeding upsampling of the
# measurements:
dataseries = dataseries.dropna()
# eventually, after dropping all nans, there is nothing left:
if dataseries.empty:
return (data, flags)
# estimate moisture sampling frequencie (the original series sampling rate may not match data-input sample rate):
# estimate original data sampling frequencie (the original series sampling rate may not match data-input sample
# rate):
moist_rate = estimateSamplingRate(dataseries.index)
# resample dataseries to its original sampling rate
# resample dataseries to its original sampling rate (now certain, to only get nans, indeed denoting "missing" data)
dataseries = dataseries.resample(moist_rate).asfreq()
# retrieve reference series input
refseries = data[prec_reference]
# "nan" suspicious values (neither "unflagged" nor "min-flagged")
# NOTE: suspicious values wont be dropped from reference series, because they make suspicious the entire
# 24h aggregation intervall, that is computed later on.
ref_flags = flags[prec_reference]
ref_use = flagger.isFlagged(ref_flags, flag=flagger.flags.min()) | \
flagger.isFlagged(ref_flags, flag=flagger.flags.unflagged())
# drop suspicious values
refseries = refseries[ref_use.values]
# additionally, drop the nan values that result from any preceeding upsampling of the
# measurements:
refseries = refseries.dropna()
# eventually after dropping all nans, there is nothing left:
if refseries.empty:
return (data,flags)
prec_rate = estimateSamplingRate(refseries.index)
......
......@@ -103,7 +103,7 @@ def inferFrequency(data):
def estimateSamplingRate(index):
"""The function estimates the sampling rate of a datetime index.
The estimation basically evaluates a histogram of bins with seconds-accuracy. This means, that the
result may be contra intuitive very likely, if the input series is not rastered (harmonized with skips)
result may be contra intuitive or trashy very likely, if the input series is not rastered (harmonized with skips)
to an interval divisible by seconds.
:param index: A DatetimeIndex or array like Datetime listing, of wich you want the sampling rate to be
......@@ -119,3 +119,32 @@ def estimateSamplingRate(index):
# return smallest non zero sample difference (this works, because input is expected to be at least
# harmonized with skips)
return pd.tseries.frequencies.to_offset(str(int(hist[1][:-1][hist[0] > 0].min())) + 's')
def retrieveTrustworthyOriginal(dataseries, dataflags=None, flagger=None):
"""Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
differenciating between missng value - nans und fillvalue nans is impossible. This function evaluates flags for a
passed series, if flags and flagger object are passed and downsamples the input series to its original sampling
rate and sparsity.
:param dataseries: The pd.dataseries object that you want to sample to original rate. It has to have a harmonic
timestamp.
:param dataflags: the flags series,referring to the passed dataseries.
:param dataflags: A flagger object, to apply the passed flags onto the dataseries.
"""
if (dataflags is not None) and (flagger is not None):
data_use = flagger.isFlagged(data_flags, flag=flagger.flags.min()) | \
flagger.isFlagged(data_flags, flag=flagger.flags.unflagged())
# drop suspicious values
dataseries = dataseries[data_use.values]
# additionally, drop the nan values that result from any preceeding upsampling of the
# measurements:
dataseries = dataseries.dropna()
# eventually, after dropping all nans, there is nothing left:
if dataseries.empty:
return dataseries
# estimate original data sampling frequencie (the original series sampling rate may not match data-input sample
# rate):
moist_rate = estimateSamplingRate(dataseries.index)
# resample dataseries to its original sampling rate (now certain, to only get nans, indeed denoting "missing" data)
return dataseries.resample(moist_rate).asfreq()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment