From 954ebf2d93e09f8e7471f8da779193f6afae142c Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 11:51:39 +0100 Subject: [PATCH 1/7] added missing return value assignment --- saqc/funcs/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index a3af13d61..b1c68ee10 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -73,7 +73,7 @@ class ConstantsMixin: rolling = d.rolling(window=window, min_periods=min_periods) starting_points_mask = rolling.max() - rolling.min() <= thresh - removeRollingRamps(starting_points_mask, window=window, inplace=True) + starting_points_mask = removeRollingRamps(starting_points_mask, window=window, inplace=True) # mimic forward rolling by roll over inverse [::-1] rolling = starting_points_mask[::-1].rolling( -- GitLab From edf9622b02842e63d17ea8e3ffcfa5222aba424d Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 11:59:14 +0100 Subject: [PATCH 2/7] fixed/clarified docstring --- saqc/funcs/constants.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index b1c68ee10..974039edb 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -51,18 +51,18 @@ class ConstantsMixin: thresh : Maximum total change allowed per window. + window : + Size of the moving window. This determines the number of observations used + for calculating the absolute change per window. + Each window will be of either of a fixed number of periods (integer defined window), + or will have a fixed temporal extension (offset defined window). + min_periods : Minimum number of observations in window required to generate - a flag. Must be an integer greater or equal `2`, because a + a flag. This is to exclude underpopulated offset defined windows from flagging. + Must be an integer greater or equal `2`, because a single value would always be considered constant. Defaults to `2`. - - window : - Size of the moving window. This is the number of observations used - for calculating the statistic. Each window will be a fixed size. - If it is an offset then this will be the time period of each window. - Each window will be a variable sized based on the observations included - in the time-period. """ d: pd.Series = self._data[field] validateWindow(window, index=d.index) -- GitLab From b2b87270d6d2f28d93c0484024bb8d779536d661 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 12:12:33 +0100 Subject: [PATCH 3/7] updated changelog --- CHANGELOG.md | 1 + saqc/funcs/constants.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d092ab3ae..d32e8e065 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ SPDX-License-Identifier: GPL-3.0-or-later ### Changed ### Removed ### Fixed +- `flagConstants`: fixed flagging of rolling ramps - `Flags`: add meta entry to imported flags - group operations were overwriting existing flags - `SaQC._construct` : was not working for inherit classes (used hardcoded `SaQC` to construct a new instance). diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index 974039edb..e0c6d5de0 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -59,7 +59,8 @@ class ConstantsMixin: min_periods : Minimum number of observations in window required to generate - a flag. This is to exclude underpopulated offset defined windows from flagging. + a flag. This is only to exclude underpopulated *offset* defined windows from + flagging. Must be an integer greater or equal `2`, because a single value would always be considered constant. Defaults to `2`. -- GitLab From abc0c887714dfa47a03f2a2004739d2055d89749 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 12:21:28 +0100 Subject: [PATCH 4/7] typos and stuff --- saqc/funcs/constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index e0c6d5de0..b8dc7badd 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -54,13 +54,13 @@ class ConstantsMixin: window : Size of the moving window. This determines the number of observations used for calculating the absolute change per window. - Each window will be of either of a fixed number of periods (integer defined window), + Each window will either contain a fixed number of periods (integer defined window), or will have a fixed temporal extension (offset defined window). min_periods : Minimum number of observations in window required to generate - a flag. This is only to exclude underpopulated *offset* defined windows from - flagging. + a flag. This can be used to exclude underpopulated *offset* defined windows from + flagging. (Integer defined windows will always contain exactly *window* samples). Must be an integer greater or equal `2`, because a single value would always be considered constant. Defaults to `2`. -- GitLab From eb19f9a97a5bc8efb43c2435bc5ca93650484112 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 12:21:56 +0100 Subject: [PATCH 5/7] black / isort --- saqc/funcs/constants.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index b8dc7badd..d53fee1c0 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -74,7 +74,9 @@ class ConstantsMixin: rolling = d.rolling(window=window, min_periods=min_periods) starting_points_mask = rolling.max() - rolling.min() <= thresh - starting_points_mask = removeRollingRamps(starting_points_mask, window=window, inplace=True) + starting_points_mask = removeRollingRamps( + starting_points_mask, window=window, inplace=True + ) # mimic forward rolling by roll over inverse [::-1] rolling = starting_points_mask[::-1].rolling( -- GitLab From 62e9a08c016357087d2bdb0cfd62ce8255c4fa60 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Wed, 21 Feb 2024 15:46:49 +0100 Subject: [PATCH 6/7] removed usage of inplace keyword --- saqc/funcs/constants.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index d53fee1c0..3e4afba3a 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -74,9 +74,7 @@ class ConstantsMixin: rolling = d.rolling(window=window, min_periods=min_periods) starting_points_mask = rolling.max() - rolling.min() <= thresh - starting_points_mask = removeRollingRamps( - starting_points_mask, window=window, inplace=True - ) + starting_points_mask = removeRollingRamps(starting_points_mask, window=window) # mimic forward rolling by roll over inverse [::-1] rolling = starting_points_mask[::-1].rolling( -- GitLab From cc4418cc377b7c9c980c82fce6878fe21b029b44 Mon Sep 17 00:00:00 2001 From: luenensc <peter.luenenschloss@ufz.de> Date: Tue, 18 Mar 2025 23:57:21 +0100 Subject: [PATCH 7/7] flagConstantsFix --- CHANGELOG.md | 1 + saqc/funcs/constants.py | 5 ++--- tests/core/test_flags.py | 4 ++-- tests/funcs/test_constants_detection.py | 20 ++++++++++++++++++++ 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2625096d8..0e90c4c52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ SPDX-License-Identifier: GPL-3.0-or-later ### Changed ### Removed ### Fixed +- `flagConstants`: fixed bug where last `min_periods` will never get flagged ### Deprecated ## [2.6.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.6.0) - 2024-04-15 diff --git a/saqc/funcs/constants.py b/saqc/funcs/constants.py index 3e4afba3a..2d57fba5f 100644 --- a/saqc/funcs/constants.py +++ b/saqc/funcs/constants.py @@ -77,9 +77,8 @@ class ConstantsMixin: starting_points_mask = removeRollingRamps(starting_points_mask, window=window) # mimic forward rolling by roll over inverse [::-1] - rolling = starting_points_mask[::-1].rolling( - window=window, min_periods=min_periods - ) + + rolling = starting_points_mask[::-1].rolling(window=window, min_periods=0) # mimic any() mask = (rolling.sum()[::-1] > 0) & d.notna() diff --git a/tests/core/test_flags.py b/tests/core/test_flags.py index 9e6765188..eb11470fb 100644 --- a/tests/core/test_flags.py +++ b/tests/core/test_flags.py @@ -206,7 +206,7 @@ def test_set_flags(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]] @pytest.mark.parametrize("data", testdata) def test_set_flags_with_mask( - data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]] + data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]], ): flags = Flags(data) @@ -253,7 +253,7 @@ def test_set_flags_with_mask( @pytest.mark.parametrize("data", testdata) def test_set_flags_with_index( - data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]] + data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]], ): flags = Flags(data) diff --git a/tests/funcs/test_constants_detection.py b/tests/funcs/test_constants_detection.py index a6ec79792..74cef249d 100644 --- a/tests/funcs/test_constants_detection.py +++ b/tests/funcs/test_constants_detection.py @@ -7,6 +7,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pandas as pd import pytest from saqc import BAD, UNFLAGGED, SaQC @@ -24,6 +25,15 @@ def data(): return constants_data +@pytest.fixture +def data_const_tail(): + constants_data = pd.DataFrame( + {"a": [1, 2, 3, 4, 5, 9, 9, 9, 9, 9]}, + index=pd.date_range("2000", freq="1h", periods=10), + ) + return constants_data + + def test_constants_flagBasic(data): field, *_ = data.columns flags = initFlagsLike(data) @@ -35,6 +45,16 @@ def test_constants_flagBasic(data): assert np.all(flagscol[25 + 1 :] == UNFLAGGED) +@pytest.mark.parametrize("window", [3, "3h", 5, "5h"]) +def test_constants_tail(data_const_tail, window): + field, *_ = data_const_tail.columns + qc = SaQC(data_const_tail) + qc = qc.flagConstants(field, thresh=1, window=window, flag=BAD) + flagscol = qc._flags[field] + assert np.all(flagscol[-5:] == BAD) + assert np.all(flagscol[:-5] == UNFLAGGED) + + def test_constants_flagVarianceBased(data): field, *_ = data.columns flags = initFlagsLike(data) -- GitLab