From 413c77e603e7e1d23d277efc9ee7cbeb5ca57945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Sch=C3=A4fer?= <david.schaefer@ufz.de> Date: Wed, 20 Mar 2024 17:31:13 +0100 Subject: [PATCH] Bump pandas from 2.1.4 to 2.2.1 --- CHANGELOG.md | 4 ++-- requirements.txt | 2 +- saqc/core/translation/basescheme.py | 3 ++- saqc/lib/tools.py | 19 ++++++++++++++++++- tests/funcs/test_functions.py | 2 +- tests/funcs/test_outlier_detection.py | 2 +- tests/fuzzy/lib.py | 4 ++-- 7 files changed, 27 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9296a9dd5..f24a24883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,12 @@ SPDX-License-Identifier: GPL-3.0-or-later - Option to change the flagging scheme after initialization - `flagByClick`: manually assign flags using a graphical user interface - `SaQC`: support for selection, slicing and setting of items by use of subscription on SaQC objects (e.g. `qc[key]` and `qc[key] = value`). - Selection works with single keys, collections of keys and string slices (e.g. `qc["a":"f"]`). Values can be SaQC objects, pd.Series, + Selection works with single keys, collections of keys and string slices (e.g. `qc["a":"f"]`). Values can be SaQC objects, pd.Series, Iterable of Series and dict-like with series values. - `transferFlags` is a multivariate function - `plot`: added `yscope` keyword - `setFlags`: function to replace `flagManual` -- `flagUniLOF`: added defaultly applied correction to mitigate phenomenon of overflagging at relatively steep data value slopes. (parameter `slope_correct`). +- `flagUniLOF`: added defaultly applied correction to mitigate phenomenon of overflagging at relatively steep data value slopes. (parameter `slope_correct`). ### Changed ### Removed ### Fixed diff --git a/requirements.txt b/requirements.txt index c9553ab49..d79dee2b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ matplotlib==3.8.3 numpy==1.26.4 outlier-utils==0.0.5 pyarrow==15.0.0 -pandas==2.1.4 +pandas==2.2.1 scikit-learn==1.4.1.post1 scipy==1.12.0 typing_extensions==4.5.0 diff --git a/saqc/core/translation/basescheme.py b/saqc/core/translation/basescheme.py index c072907c8..66f9cb8db 100644 --- a/saqc/core/translation/basescheme.py +++ b/saqc/core/translation/basescheme.py @@ -148,7 +148,8 @@ class MappingScheme(TranslationScheme): out = DictOfSeries() expected = pd.Index(trans_map.values()) for field in flags.columns: - out[field] = flags[field].replace(trans_map) + with pd.option_context("future.no_silent_downcasting", True): + out[field] = flags[field].replace(trans_map).infer_objects() diff = pd.Index(out[field]).difference(expected) if not diff.empty: raise ValueError( diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 4dbcf7108..feb33af8e 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -177,7 +177,24 @@ def periodicMask( x[e:e] = True return x - freq = "1" + "mmmhhhdddMMMYYY"[len(season_start)] + freq = ( + "1", + "m", + "m", + "m", + "h", + "h", + "h", + "d", + "d", + "d", + "M", + "M", + "M", + "YE", + "YE", + "YE", + )[len(season_start)] out = mask.groupby(pd.Grouper(freq=freq)).transform(_selector) if invert: out = ~out diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py index e56783568..d956e39b1 100644 --- a/tests/funcs/test_functions.py +++ b/tests/funcs/test_functions.py @@ -52,7 +52,7 @@ def test_flagRange(data, field): assert all(flagged == expected) -def test_flagSeasonalRange(data, field): +def test_selectTime(data, field): data[field].iloc[::2] = 0 data[field].iloc[1::2] = 50 nyears = len(data[field].index.year.unique()) diff --git a/tests/funcs/test_outlier_detection.py b/tests/funcs/test_outlier_detection.py index d23ef30b5..f6941947b 100644 --- a/tests/funcs/test_outlier_detection.py +++ b/tests/funcs/test_outlier_detection.py @@ -33,7 +33,7 @@ def test_flagMad(spiky_data): field, *_ = data.columns flags = initFlagsLike(data) qc = SaQC(data, flags).flagZScore( - field, window="1H", method="modified", thresh=3.5, flag=BAD + field, window="1h", method="modified", thresh=3.5, flag=BAD ) flag_result = qc.flags[field] test_sum = (flag_result.iloc[spiky_data[1]] == BAD).sum() diff --git a/tests/fuzzy/lib.py b/tests/fuzzy/lib.py index 3604bd472..a70a159f3 100644 --- a/tests/fuzzy/lib.py +++ b/tests/fuzzy/lib.py @@ -104,13 +104,13 @@ def daterangeIndexes(draw, min_size=0, max_size=100): max_date = pd.Timestamp("2099-12-31").to_pydatetime() start = draw(datetimes(min_value=min_date, max_value=max_date)) periods = draw(integers(min_value=min_size, max_value=max_size)) - freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"])) + freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"])) return pd.date_range(start, periods=periods, freq=freq) @composite def frequencyStrings(draw, _): - freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"])) + freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"])) mult = draw(integers(min_value=1, max_value=10)) value = f"{mult}{freq}" return value -- GitLab