From 8eae4b96368d41d34bad9e55e4c55c218f4a3fe8 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Tue, 2 Mar 2021 16:03:21 +0100 Subject: [PATCH] fixed curvefit.py and rolling.py --- saqc/funcs/curvefit.py | 17 +++++++---------- saqc/funcs/rolling.py | 14 +++++--------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/saqc/funcs/curvefit.py b/saqc/funcs/curvefit.py index c4862fe87..3a98dfdde 100644 --- a/saqc/funcs/curvefit.py +++ b/saqc/funcs/curvefit.py @@ -18,7 +18,6 @@ from saqc.flagger import Flagger from saqc.lib.ts_operators import polyRollerIrregular, polyRollerNumba, polyRoller, polyRollerNoMissingNumba, polyRollerNoMissing - @register(masking='field', module="curvefit") def fitPolynomial(data: DictOfSeries, field: str, flagger: Flagger, winsz: Union[int, str], @@ -103,11 +102,11 @@ def fitPolynomial(data: DictOfSeries, field: str, flagger: Flagger, Flags values may have changed relatively to the flagger input. """ + # todo: some (rater large) parts are functional similar to saqc.funcs.rolling.roll if data[field].empty: return data, flagger data = data.copy() to_fit = data[field] - flags = flagger.getFlags(field) regular = getFreqDelta(to_fit.index) if not regular: if isinstance(winsz, int): @@ -194,13 +193,11 @@ def fitPolynomial(data: DictOfSeries, field: str, flagger: Flagger, data[field] = residues if eval_flags: - num_cats, codes = flags.factorize() - num_cats = pd.Series(num_cats, index=flags.index).rolling(winsz, center=True, min_periods=min_periods).max() - nan_samples = num_cats[num_cats.isna()] - num_cats.drop(nan_samples.index, inplace=True) - to_flag = pd.Series(codes[num_cats.astype(int)], index=num_cats.index) - to_flag = to_flag.align(nan_samples)[0] - to_flag[nan_samples.index] = flags[nan_samples.index] - flagger = flagger.setFlags(field, to_flag.values, **kwargs) + # with the new flagger we dont have to care + # about to set NaNs to the original flags anymore + # todo: we does not get any flags here, because of masking=field + worst = flagger[field].rolling(winsz, center=True, min_periods=min_periods).max() + flagger[field] = worst return data, flagger + diff --git a/saqc/funcs/rolling.py b/saqc/funcs/rolling.py index ab415bfe0..99f6be681 100644 --- a/saqc/funcs/rolling.py +++ b/saqc/funcs/rolling.py @@ -72,7 +72,6 @@ def roll( data = data.copy() to_fit = data[field] - flags = flagger.getFlags(field) if to_fit.empty: return data, flagger @@ -123,13 +122,10 @@ def roll( data[field] = means if eval_flags: - num_cats, codes = flags.factorize() - num_cats = pd.Series(num_cats, index=flags.index).rolling(winsz, center=True, min_periods=min_periods).max() - nan_samples = num_cats[num_cats.isna()] - num_cats.drop(nan_samples.index, inplace=True) - to_flag = pd.Series(codes[num_cats.astype(int)], index=num_cats.index) - to_flag = to_flag.align(nan_samples)[0] - to_flag[nan_samples.index] = flags[nan_samples.index] - flagger = flagger.setFlags(field, to_flag.values, **kwargs) + # with the new flagger we dont have to care + # about to set NaNs to the original flags anymore + # todo: we does not get any flags here, because of masking=field + worst = flagger[field].rolling(winsz, center=True, min_periods=min_periods).max() + flagger[field] = worst return data, flagger -- GitLab