Skip to content
Snippets Groups Projects
Commit a67038f9 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

Merge branch 'retrieveTrustworthyDeprecation' into 'develop'

Retrieve trustworthy deprecation

See merge request !221
parents 67fe2f60 627875c4
No related branches found
No related tags found
1 merge request!221Retrieve trustworthy deprecation
Pipeline #16772 passed with stages
in 9 minutes and 36 seconds
......@@ -11,7 +11,7 @@ from dios import DictOfSeries
from saqc.core.register import register
from saqc.flagger.baseflagger import BaseFlagger
from saqc.lib.ts_operators import varQC
from saqc.lib.tools import retrieveTrustworthyOriginal, customRoller
from saqc.lib.tools import customRoller, getFreqDelta
@register(masking='field')
......@@ -112,8 +112,11 @@ def flagByVariance(
Flags values may have changed, relatively to the flagger input.
"""
dataseries, data_rate = retrieveTrustworthyOriginal(data, field, flagger)
dataseries = data[field]
data_rate = getFreqDelta(dataseries.index)
if not data_rate:
raise IndexError('Timeseries irregularly sampled!')
if max_missing is None:
max_missing = np.inf
if max_consec_missing is None:
......
......@@ -116,57 +116,6 @@ def inferFrequency(data: pd.Series) -> pd.DateOffset:
return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index))
def retrieveTrustworthyOriginal(
data: dios.DictOfSeries, field: str, flagger=None, level: Any = None
) -> dios.DictOfSeries:
"""Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
differenciating between missng value - nans und fillvalue nans is impossible.
This function:
(1) if flagger is None:
(a) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
estimated samplng rate.
(2) if "flagger" is not None but "level" is None:
(a) all values are dropped, that are flagged worse then flagger.GOOD. (so unflagged values wont be dropped)
(b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
estimated samplng rate.
(3) if "flagger" is not None and "level" is not None:
(a) all values are dropped, that are flagged worse then level. (so unflagged values wont be dropped)
(b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the
estimated samplng rate.
Note, that the passed dataseries should be harmonized to an equidistant
frequencie grid (maybe including blow up entries).
:param data: DataFrame. The Data frame holding the data containing 'field'.
:param field: String. Fieldname of the column in data, that you want to sample to original sampling rate.
It has to have a harmonic
:param flagger: None or a flagger object.
:param level: Lower bound of flags that are excepted for data. Must be a flag the flagger can handle.
"""
dataseries = data[field]
if flagger is not None:
mask = flagger.isFlagged(field, flag=level or flagger.GOOD, comparator="<=")
# drop all flags that are suspicious or worse
dataseries = dataseries[mask]
# drop the nan values that may result from any preceeding upsampling of the measurements:
dataseries = dataseries.dropna()
if dataseries.empty:
return dataseries, np.nan
# estimate original data sampling frequencie
# (the original series sampling rate may not match data-input sample rate):
seconds_rate = dataseries.index.to_series().diff().min().seconds
data_rate = pd.tseries.frequencies.to_offset(str(seconds_rate) + "s")
return dataseries.asfreq(data_rate), data_rate
def offset2seconds(offset):
"""Function returns total seconds upon "offset like input
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment