Skip to content
Snippets Groups Projects
Commit a76297d2 authored by David Schäfer's avatar David Schäfer
Browse files

fixed the constants test

parent ea65d16f
No related branches found
No related tags found
No related merge requests found
......@@ -314,14 +314,13 @@ doi:10.2136/vzj2012.0097.
### constant
```
constant(eps, length, thmin=None)
constant(window, eps=0)
```
| parameter | data type | default value | description |
| ------ | ------ | ------ | ---- |
| eps | | | |
| length | | | |
| thmin | | `None` | |
| parameter | data type | default value | description |
| ------ | ------ | ------ | ---- |
| window | [offset string](docs/ParameterDescriptions.md#offset-strings)/integer | | |
| eps | float | 0 | |
### constants_varianceBased
......
......@@ -16,33 +16,20 @@ from saqc.lib.tools import (
# todo: maybe generalize flag_constant to work on non harmonized data as well.
@register("constant")
def flagConstant(data, field, flagger, eps, length, thmin=None, **kwargs):
datacol = data[field]
def flagConstant(data, field, flagger, eps, window, **kwargs):
length = (
(pd.to_timedelta(length) - data.index.freq).to_timedelta64().astype(np.int64)
)
values = datacol.mask((datacol < thmin) | datacol.isnull()).values.astype(np.int64)
dates = datacol.index.values.astype(np.int64)
mask = np.isfinite(values)
for start_idx, end_idx in slidingWindowIndices(datacol.index, length):
mask_chunk = mask[start_idx:end_idx]
values_chunk = values[start_idx:end_idx][mask_chunk]
dates_chunk = dates[start_idx:end_idx][mask_chunk]
window = pd.tseries.frequencies.to_offset(window)
def _func(ts):
if np.all(ts) and (ts.index[-1] - ts.index[0]) >= window:
return ts
return ts & False
# we might have removed dates from the start/end of the
# chunk resulting in a period shorter than 'length'
# print (start_idx, end_idx)
if valueRange(dates_chunk) < length:
continue
if valueRange(values_chunk) < eps:
flagger = flagger.setFlags(field, loc=slice(start_idx, end_idx), **kwargs)
diffs = data[field].diff().abs() <= eps
mask = diffs.groupby(diffs).apply(_func)
# NOTE: the first value of a constant period is missed by the groupby
mask[np.where(mask)[0] - 1] = True
data[field] = datacol
flagger = flagger.setFlags(field, mask, **kwargs)
return data, flagger
......
......@@ -19,6 +19,19 @@ def data():
return constants_data
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_flagConstants(data, flagger):
data.iloc[5:25] = 200
expected = np.arange(5, 25)
field, *_ = data.columns
flagger = flagger.initFlags(data)
data, flagger_result = flagConstant(
data, field, flagger, window="15Min", eps=.1,
)
flags = flagger_result.getFlags(field)
assert np.all(flags[expected] == flagger.BAD)
@pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_flagConstants_varianceBased(data, flagger):
data.iloc[5:25] = 200
......@@ -28,16 +41,7 @@ def test_flagConstants_varianceBased(data, flagger):
data, flagger_result1 = flagConstant_varianceBased(
data, field, flagger, plateau_window_min="1h"
)
#data, flagger_result2 = flagConstant(
# data, field, flagger, eps=0.1, length='1h'
#)
flag_result1 = flagger_result1.getFlags(field)
test_sum = (flag_result1[expected] == flagger.BAD).sum()
assert test_sum == len(expected)
#flag_result2 = flagger_result2.getFlags(field)
#test_sum = (flag_result2[expected] == flagger.BAD).sum()
#assert test_sum == len(expected)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment