diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2edebe1d85a325e96f3ead84a7c23343e6eeac7d..2f4d6604a73532a9f3df497e53424e34a08defcc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,7 +82,7 @@ coverage: pages: stage: deploy only: - - develop + - cookBux except: - schedules script: diff --git a/requirements.txt b/requirements.txt index 925b79c3f98f1a00ae13894b21d9c6fbbe97983c..43a97ff4ca9663642590929de765530cc8f1c44a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,8 @@ mlxtend==0.18.0 numba==0.53.1 numpy==1.20.2 outlier-utils==0.0.3 +pyarrow==4.0.0 pandas==1.2.4 -pyarrow==3.0.0 pytest==6.2.3 pytest-lazy-fixture==0.6.3 PyWavelets==1.1.1 diff --git a/saqc/core/modules/drift.py b/saqc/core/modules/drift.py index 951219c1dc5dfc39bd3430c2e0f164a6ef34755f..29ec3b25ddf1af135b8c7e4e02552389a8352010 100644 --- a/saqc/core/modules/drift.py +++ b/saqc/core/modules/drift.py @@ -68,6 +68,7 @@ class Drift(ModuleBase): self, field: ColumnName, maint_data_field: ColumnName, + driftModel: Callable[..., float], cal_mean: int = 5, flag_maint_period: bool = False, flag: float = BAD, diff --git a/saqc/core/register.py b/saqc/core/register.py index ac89eb6fbdbbe56315a7145f2e7d5ff9885b46f1..e0e6a443378eea4f0744d59b2429be55999079f8 100644 --- a/saqc/core/register.py +++ b/saqc/core/register.py @@ -272,6 +272,8 @@ def _prepareFlags(flags: Flags, masking) -> Flags: Currently this only clears the flags, but in future, this should be sliced the flags to the columns, that the saqc-function needs. + + Always return a copy of flags or a new flags-frame. """ # Either the index or the columns itself changed if masking == "none": @@ -281,6 +283,21 @@ def _prepareFlags(flags: Flags, masking) -> Flags: def _restoreFlags(flags: Flags, old_state: CallState): + """ + Generate flags from the temporary result-flags and the original flags. + + Parameters + ---------- + flags : Flags + The flags-frame, which is the result from a saqc-function + + old_state : CallState + The state before the saqc-function was called + + Returns + ------- + Flags + """ if old_state.masking == "none": return flags @@ -291,19 +308,28 @@ def _restoreFlags(flags: Flags, old_state: CallState): columns = columns.append(pd.Index([old_state.field])) out = old_state.flags.copy() + + # this implicitly squash the new flags history (RHS) + # to a single column, which than is appended to the + # old history (LHS). The new flags history possibly + # consists of multiple columns, one for each time a + # series or scalar was passed to the flags. for c in columns: - # this implicitly squash the new flags history (RHS) - # to a single column, which than is appended to the - # old history (LHS). The new flags history possibly - # consists of multiple columns, one for each time a - # series or scalar was passed to the flags. - if len(flags.history[c].columns) > 1 or c not in out: - # We only want to assign a new column to our history - # if something changed on the RHS, or if a new variable - # appeared. Otherwise blow up our history with dummy - # columns + + if c not in out: out[c] = flags[c] + # Guard to avoid adding the dummy column only (`UNFLAGGED`-column). + if len(flags.history[c].columns) <= 1: + continue + + # We reset the dummy column, which make the initial + # UNFLAGGED column completely transparent, so we can 'see' + # which positions the current test really touched. + h = flags.history[c] + h.hist[0] = UNTOUCHED + out[c] = h.max() + return out diff --git a/saqc/funcs/drift.py b/saqc/funcs/drift.py index 6a9d16d7991e200ce824468bda188ae352403bd7..d078c80717a67992e7f1fc1f71e3a554fdae2816 100644 --- a/saqc/funcs/drift.py +++ b/saqc/funcs/drift.py @@ -464,8 +464,9 @@ def correctDrift( to_correct = data[field] maint_data = data[maint_data_field] - d = {"drift_group": np.nan, to_correct.name: to_correct.values} - drift_frame = pd.DataFrame(d, index=to_correct.index) + to_correct_clean = to_correct.dropna() + d = {"drift_group": np.nan, to_correct.name: to_correct_clean.values} + drift_frame = pd.DataFrame(d, index=to_correct_clean.index) # group the drift frame for k in range(0, maint_data.shape[0] - 1): diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index 14a4bb755e4aad1f447b517e9d5393afc08aca0d..3c1cf609268fbad0ac48a9af68af7834685dbbbc 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -898,7 +898,9 @@ def flagOffset( if rel_thresh: s = np.sign(rel_thresh) - rel_jumps = s * (dataseries.shift(1).div(dataseries) - 1) > abs(rel_thresh) + rel_jumps = s * (dataseries.shift(1) - dataseries).div(dataseries.abs()) > abs( + rel_thresh + ) if thresh: post_jumps = rel_jumps & post_jumps else: