diff --git a/CHANGELOG.md b/CHANGELOG.md index 9296a9dd5c6de68d61665315ce9b758b8bbaf068..f24a24883edc2216cada528de09b22940d77b918 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,12 @@ SPDX-License-Identifier: GPL-3.0-or-later - Option to change the flagging scheme after initialization - `flagByClick`: manually assign flags using a graphical user interface - `SaQC`: support for selection, slicing and setting of items by use of subscription on SaQC objects (e.g. `qc[key]` and `qc[key] = value`). - Selection works with single keys, collections of keys and string slices (e.g. `qc["a":"f"]`). Values can be SaQC objects, pd.Series, + Selection works with single keys, collections of keys and string slices (e.g. `qc["a":"f"]`). Values can be SaQC objects, pd.Series, Iterable of Series and dict-like with series values. - `transferFlags` is a multivariate function - `plot`: added `yscope` keyword - `setFlags`: function to replace `flagManual` -- `flagUniLOF`: added defaultly applied correction to mitigate phenomenon of overflagging at relatively steep data value slopes. (parameter `slope_correct`). +- `flagUniLOF`: added defaultly applied correction to mitigate phenomenon of overflagging at relatively steep data value slopes. (parameter `slope_correct`). ### Changed ### Removed ### Fixed diff --git a/requirements.txt b/requirements.txt index c9553ab49916a6ba411ae6533e51b90a48285995..d79dee2b7ca59bfa5cef580592402cdb54a68ca6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ matplotlib==3.8.3 numpy==1.26.4 outlier-utils==0.0.5 pyarrow==15.0.0 -pandas==2.1.4 +pandas==2.2.1 scikit-learn==1.4.1.post1 scipy==1.12.0 typing_extensions==4.5.0 diff --git a/saqc/core/translation/basescheme.py b/saqc/core/translation/basescheme.py index c072907c8ddb477ccd8e112d23822d4414212192..66f9cb8db159faa7f4eccd46e78510d9e7a6a1ea 100644 --- a/saqc/core/translation/basescheme.py +++ b/saqc/core/translation/basescheme.py @@ -148,7 +148,8 @@ class MappingScheme(TranslationScheme): out = DictOfSeries() expected = pd.Index(trans_map.values()) for field in flags.columns: - out[field] = flags[field].replace(trans_map) + with pd.option_context("future.no_silent_downcasting", True): + out[field] = flags[field].replace(trans_map).infer_objects() diff = pd.Index(out[field]).difference(expected) if not diff.empty: raise ValueError( diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 4dbcf710813b7668b60a716609fb0183ea443c83..feb33af8e8c5e637cea386e5ec8a58b33e472a52 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -177,7 +177,24 @@ def periodicMask( x[e:e] = True return x - freq = "1" + "mmmhhhdddMMMYYY"[len(season_start)] + freq = ( + "1", + "m", + "m", + "m", + "h", + "h", + "h", + "d", + "d", + "d", + "M", + "M", + "M", + "YE", + "YE", + "YE", + )[len(season_start)] out = mask.groupby(pd.Grouper(freq=freq)).transform(_selector) if invert: out = ~out diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py index e567835680c85db4f5b1cedc6abbc227fa32ede6..d956e39b1a1fb6532b328bd93672966c8cc41c55 100644 --- a/tests/funcs/test_functions.py +++ b/tests/funcs/test_functions.py @@ -52,7 +52,7 @@ def test_flagRange(data, field): assert all(flagged == expected) -def test_flagSeasonalRange(data, field): +def test_selectTime(data, field): data[field].iloc[::2] = 0 data[field].iloc[1::2] = 50 nyears = len(data[field].index.year.unique()) diff --git a/tests/funcs/test_outlier_detection.py b/tests/funcs/test_outlier_detection.py index d23ef30b5d5ef0983cc49a043c4d06cfec9e26f6..f6941947beb31cbdf7a82f288b7debafb23a3d39 100644 --- a/tests/funcs/test_outlier_detection.py +++ b/tests/funcs/test_outlier_detection.py @@ -33,7 +33,7 @@ def test_flagMad(spiky_data): field, *_ = data.columns flags = initFlagsLike(data) qc = SaQC(data, flags).flagZScore( - field, window="1H", method="modified", thresh=3.5, flag=BAD + field, window="1h", method="modified", thresh=3.5, flag=BAD ) flag_result = qc.flags[field] test_sum = (flag_result.iloc[spiky_data[1]] == BAD).sum() diff --git a/tests/fuzzy/lib.py b/tests/fuzzy/lib.py index 3604bd472b9b4435ff3fb830e583c1d4aeedf4aa..a70a159f3ad1fede24e2c295ce74d6a672a8ffb4 100644 --- a/tests/fuzzy/lib.py +++ b/tests/fuzzy/lib.py @@ -104,13 +104,13 @@ def daterangeIndexes(draw, min_size=0, max_size=100): max_date = pd.Timestamp("2099-12-31").to_pydatetime() start = draw(datetimes(min_value=min_date, max_value=max_date)) periods = draw(integers(min_value=min_size, max_value=max_size)) - freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"])) + freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"])) return pd.date_range(start, periods=periods, freq=freq) @composite def frequencyStrings(draw, _): - freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"])) + freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"])) mult = draw(integers(min_value=1, max_value=10)) value = f"{mult}{freq}" return value