Skip to content
Snippets Groups Projects
Commit fa8eed68 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

boosted and simplified constants detection

parent 20509db2
No related branches found
No related tags found
No related merge requests found
......@@ -64,7 +64,7 @@ def flagConstant(data, flags, field, flagger, eps,
@register("constants_varianceBased")
def flagConstants_VarianceBased(data, flags, field, flagger, plateau_window_min='12h', plateau_var_limit=0.0005,
var_total_nans=None, var_consec_nans=None, **kwargs):
var_total_nans=np.inf, var_consec_nans=np.inf, **kwargs):
"""Function flags plateaus/series of constant values. Any interval of values y(t),..y(t+n) is flagged, if:
......@@ -113,10 +113,10 @@ def flagConstants_VarianceBased(data, flags, field, flagger, plateau_window_min=
'data_rate': {'value': data_rate,
'tests': {'not nan': lambda x: x is not np.nan}},
'var_total_nans': {'value': var_total_nans,
'type': [int, type(None)],
'type': [int, float],
'range': [0, np.inf]},
'var_consec_nans': {'value': var_consec_nans,
'type': [int, type(None)],
'type': [int, float],
'range': [0, np.inf]}
},
kwargs['func_name'])
......@@ -128,23 +128,17 @@ def flagConstants_VarianceBased(data, flags, field, flagger, plateau_window_min=
min_periods = int(offset2periods(plateau_window_min, data_rate))
# identify minimal plateaus:
plateaus = dataseries.rolling(window=plateau_window_min).apply(lambda x: (var_qc(x, var_total_nans, var_consec_nans)
> plateau_var_limit) |
(x.size < min_periods), raw=False)
plateaus = (~plateaus.astype(bool))
plateaus = dataseries.rolling(window=plateau_window_min, min_periods=min_periods).apply(
lambda x: True if var_qc(x, var_total_nans, var_consec_nans) < plateau_var_limit else np.nan, raw=False)
# are there any candidates for beeing flagged plateau-ish
if plateaus.sum() == 0:
return data, flags
# nice reverse trick to cover total interval size
plateaus_reverse = pd.Series(np.flip(plateaus.values), index=plateaus.index)
reverse_check = plateaus_reverse.rolling(window=plateau_window_min).apply(
lambda x: True if True in x.values else False, raw=False).astype(bool)
plateaus.fillna(method='bfill', limit=min_periods, inplace=True)
# result:
plateaus = pd.Series(np.flip(reverse_check.values), index=plateaus.index)
plateaus = (plateaus[plateaus == 1.0]).index
if isinstance(flags, pd.Series):
flags.loc[plateaus] = flagger.setFlag(flags.loc[plateaus], **kwargs)
......
......@@ -19,7 +19,7 @@ TESTFLAGGERS = [
SimpleFlagger()]
@pytest.fixture(scope='module')
@pytest.fixture(scope='module')
def constants_data():
index = pd.date_range(start='1.1.2011 00:00:00', end='1.1.2011 03:00:00', freq='5min')
constants_series = pd.Series(np.linspace(-50, 50, index.size), index=index, name='constants_data')
......@@ -29,7 +29,7 @@ def constants_data():
return constants_series, flag_assertion
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
def test_flagConstants_VarianceBased(constants_data, flagger):
data = constants_data[0]
flags = flagger.initFlags(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment