Skip to content
Snippets Groups Projects
Commit a5ab4192 authored by David Schäfer's avatar David Schäfer
Browse files

Merge branch 'develop' into 'flag-aggregation'

# Conflicts:
#   CHANGELOG.md
parents c91652f4 0671a6a7
No related branches found
No related tags found
1 merge request!826added option to change History.squeeze behavior
Pipeline #205152 passed with stages
in 5 minutes and 41 seconds
......@@ -9,7 +9,7 @@ matplotlib==3.8.3
numpy==1.26.4
outlier-utils==0.0.5
pyarrow==15.0.0
pandas==2.1.4
pandas==2.2.1
scikit-learn==1.4.1.post1
scipy==1.12.0
typing_extensions==4.5.0
......
......@@ -148,7 +148,8 @@ class MappingScheme(TranslationScheme):
out = DictOfSeries()
expected = pd.Index(trans_map.values())
for field in flags.columns:
out[field] = flags[field].replace(trans_map)
with pd.option_context("future.no_silent_downcasting", True):
out[field] = flags[field].replace(trans_map).infer_objects()
diff = pd.Index(out[field]).difference(expected)
if not diff.empty:
raise ValueError(
......
......@@ -177,7 +177,24 @@ def periodicMask(
x[e:e] = True
return x
freq = "1" + "mmmhhhdddMMMYYY"[len(season_start)]
freq = (
"1",
"m",
"m",
"m",
"h",
"h",
"h",
"d",
"d",
"d",
"M",
"M",
"M",
"YE",
"YE",
"YE",
)[len(season_start)]
out = mask.groupby(pd.Grouper(freq=freq)).transform(_selector)
if invert:
out = ~out
......
......@@ -52,7 +52,7 @@ def test_flagRange(data, field):
assert all(flagged == expected)
def test_flagSeasonalRange(data, field):
def test_selectTime(data, field):
data[field].iloc[::2] = 0
data[field].iloc[1::2] = 50
nyears = len(data[field].index.year.unique())
......
......@@ -33,7 +33,7 @@ def test_flagMad(spiky_data):
field, *_ = data.columns
flags = initFlagsLike(data)
qc = SaQC(data, flags).flagZScore(
field, window="1H", method="modified", thresh=3.5, flag=BAD
field, window="1h", method="modified", thresh=3.5, flag=BAD
)
flag_result = qc.flags[field]
test_sum = (flag_result.iloc[spiky_data[1]] == BAD).sum()
......
......@@ -104,13 +104,13 @@ def daterangeIndexes(draw, min_size=0, max_size=100):
max_date = pd.Timestamp("2099-12-31").to_pydatetime()
start = draw(datetimes(min_value=min_date, max_value=max_date))
periods = draw(integers(min_value=min_size, max_value=max_size))
freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]))
freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"]))
return pd.date_range(start, periods=periods, freq=freq)
@composite
def frequencyStrings(draw, _):
freq = draw(sampled_from(["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]))
freq = draw(sampled_from(["D", "h", "min", "s", "ms", "us", "ns"]))
mult = draw(integers(min_value=1, max_value=10))
value = f"{mult}{freq}"
return value
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment