diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index 03f4a6e2ef898b2ee21b3f21af8bf60aa8295376..75b1eaaa1ee559d34845a75cc50b504e97b8bed0 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -11,7 +11,7 @@ from dios import DictOfSeries from saqc.core.register import register from saqc.flagger.baseflagger import BaseFlagger from saqc.lib.ts_operators import varQC -from saqc.lib.tools import retrieveTrustworthyOriginal, customRoller +from saqc.lib.tools import customRoller, getFreqDelta @register(masking='field') @@ -112,8 +112,11 @@ def flagByVariance( Flags values may have changed, relatively to the flagger input. """ - dataseries, data_rate = retrieveTrustworthyOriginal(data, field, flagger) + dataseries = data[field] + data_rate = getFreqDelta(dataseries.index) + if not data_rate: + raise IndexError('Timeseries irregularly sampled!') if max_missing is None: max_missing = np.inf if max_consec_missing is None: diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 769a322dbf52d0e7c65cdd8e3b62c24262c9c5f4..f728779405775d17aa31e2cd0abb3c60a8c9ea1c 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -116,57 +116,6 @@ def inferFrequency(data: pd.Series) -> pd.DateOffset: return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index)) -def retrieveTrustworthyOriginal( - data: dios.DictOfSeries, field: str, flagger=None, level: Any = None -) -> dios.DictOfSeries: - """Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus - differenciating between missng value - nans und fillvalue nans is impossible. - - This function: - (1) if flagger is None: - (a) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the - estimated samplng rate. - - (2) if "flagger" is not None but "level" is None: - (a) all values are dropped, that are flagged worse then flagger.GOOD. (so unflagged values wont be dropped) - (b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the - estimated samplng rate. - (3) if "flagger" is not None and "level" is not None: - (a) all values are dropped, that are flagged worse then level. (so unflagged values wont be dropped) - (b) estimates the sampling rate of the input dataseries by dropping all nans and then returns the series at the - estimated samplng rate. - - Note, that the passed dataseries should be harmonized to an equidistant - frequencie grid (maybe including blow up entries). - - :param data: DataFrame. The Data frame holding the data containing 'field'. - :param field: String. Fieldname of the column in data, that you want to sample to original sampling rate. - It has to have a harmonic - :param flagger: None or a flagger object. - :param level: Lower bound of flags that are excepted for data. Must be a flag the flagger can handle. - - """ - dataseries = data[field] - - if flagger is not None: - mask = flagger.isFlagged(field, flag=level or flagger.GOOD, comparator="<=") - # drop all flags that are suspicious or worse - dataseries = dataseries[mask] - - # drop the nan values that may result from any preceeding upsampling of the measurements: - dataseries = dataseries.dropna() - - if dataseries.empty: - return dataseries, np.nan - - # estimate original data sampling frequencie - # (the original series sampling rate may not match data-input sample rate): - seconds_rate = dataseries.index.to_series().diff().min().seconds - data_rate = pd.tseries.frequencies.to_offset(str(seconds_rate) + "s") - - return dataseries.asfreq(data_rate), data_rate - - def offset2seconds(offset): """Function returns total seconds upon "offset like input