Skip to content
Snippets Groups Projects
Commit 40cfc53f authored by Bert Palm's avatar Bert Palm 🎇
Browse files

rm dead code (patternByWavelet) that sneaked in again. Added more test for flagPattern_dtw

parent c0a61154
No related branches found
No related tags found
1 merge request!462More tests
Pipeline #94434 failed with stage
in 2 minutes and 14 seconds
...@@ -14,108 +14,6 @@ from saqc.core.register import flagging ...@@ -14,108 +14,6 @@ from saqc.core.register import flagging
from saqc.lib.tools import customRoller from saqc.lib.tools import customRoller
# todo should we mask `reference` even if the func fail if reference has NaNs
@flagging()
def flagPatternByWavelet(
data,
field,
flags,
reference,
widths=(1, 2, 4, 8),
waveform="mexh",
flag=BAD,
**kwargs,
):
"""
Pattern recognition via wavelets.
The steps are:
1. work on chunks returned by a moving window
2. each chunk is compared to the given pattern, using the wavelet algorithm as
presented in [1]
3. if the compared chunk is equal to the given pattern it gets flagged
Parameters
----------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
field : str
The fieldname of the data column, you want to correct.
flags : saqc.Flags
The flags belonging to ``data``.
reference: str
The fieldname in ``data`' which holds the pattern.
widths: tuple of int
Widths for wavelet decomposition. [1] recommends a dyadic scale.
Default: (1,2,4,8)
waveform: str
Wavelet to be used for decomposition. Default: 'mexh'. See [2] for a list.
Returns
-------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
Data values may have changed relatively to the data input.
flags : saqc.Flags
The flags belongiong to `data`.
References
----------
The underlying pattern recognition algorithm using wavelets is documented here:
[1] Maharaj, E.A. (2002): Pattern Recognition of Time Series using Wavelets. In: Härdle W., Rönz B. (eds) Compstat. Physica, Heidelberg, 978-3-7908-1517-7.
The documentation of the python package used for the wavelt decomposition can be found here:
[2] https://pywavelets.readthedocs.io/en/latest/ref/cwt.html#continuous-wavelet-families
"""
dat = data[field]
ref = data[reference].to_numpy()
cwtmat_ref, _ = pywt.cwt(ref, widths, waveform)
wavepower_ref = np.power(cwtmat_ref, 2)
len_width = len(widths)
sz = len(ref)
assert len_width
assert sz
def func(x, y):
return x.sum() / y.sum()
def pvalue(chunk):
cwtmat_chunk, _ = pywt.cwt(chunk, widths, waveform)
wavepower_chunk = np.power(cwtmat_chunk, 2)
# Permutation test on Powersum of matrix
for i in range(len_width):
x = wavepower_ref[i]
y = wavepower_chunk[i]
pval = permutation_test(
x, y, method="approximate", num_rounds=200, func=func, seed=0
)
pval = min(pval, 1 - pval)
return pval # noqa # existence ensured by assert
rolling = customRoller(dat, window=sz, min_periods=sz, forward=True)
pvals = rolling.apply(pvalue, raw=True)
markers = pvals > 0.01 # nans -> False
# the markers are set on the left edge of the window. thus we must propagate
# `sz`-many True's to the right of every marker.
rolling = customRoller(markers, window=sz, min_periods=sz)
mask = rolling.sum().fillna(0).astype(bool)
flags[mask, field] = flag
return data, flags
def calculateDistanceByDTW( def calculateDistanceByDTW(
data: pd.Series, reference: pd.Series, forward=True, normalize=True data: pd.Series, reference: pd.Series, forward=True, normalize=True
): ):
......
...@@ -14,6 +14,7 @@ from saqc.constants import BAD, UNFLAGGED ...@@ -14,6 +14,7 @@ from saqc.constants import BAD, UNFLAGGED
from saqc.core import initFlagsLike from saqc.core import initFlagsLike
from saqc.funcs.pattern import flagPatternByDTW from saqc.funcs.pattern import flagPatternByDTW
from tests.common import initData from tests.common import initData
from pandas.testing import assert_series_equal
@pytest.fixture @pytest.fixture
...@@ -26,7 +27,9 @@ def field(data): ...@@ -26,7 +27,9 @@ def field(data):
return data.columns[0] return data.columns[0]
def test_flagPattern_dtw(): @pytest.mark.parametrize('plot', [True,False])
@pytest.mark.parametrize('normalize', [True,False])
def test_flagPattern_dtw(plot, normalize):
data = pd.Series(0, index=pd.date_range(start="2000", end="2001", freq="1d")) data = pd.Series(0, index=pd.date_range(start="2000", end="2001", freq="1d"))
data.iloc[10:18] = [0, 5, 6, 7, 6, 8, 5, 0] data.iloc[10:18] = [0, 5, 6, 7, 6, 8, 5, 0]
pattern = data.iloc[10:18] pattern = data.iloc[10:18]
...@@ -34,9 +37,17 @@ def test_flagPattern_dtw(): ...@@ -34,9 +37,17 @@ def test_flagPattern_dtw():
data = dios.DictOfSeries(dict(data=data, pattern_data=pattern)) data = dios.DictOfSeries(dict(data=data, pattern_data=pattern))
flags = initFlagsLike(data, name="data") flags = initFlagsLike(data, name="data")
data, flags = flagPatternByDTW( data, flags = flagPatternByDTW(
data, "data", flags, reference="pattern_data", flag=BAD data,
"data",
flags,
reference="pattern_data",
plot=plot,
normalize=normalize,
flag=BAD,
) )
assert all(flags["data"].iloc[10:18] == BAD) assert all(flags["data"].iloc[10:18] == BAD)
assert all(flags["data"].iloc[:9] == UNFLAGGED) assert all(flags["data"].iloc[:9] == UNFLAGGED)
assert all(flags["data"].iloc[18:] == UNFLAGGED) assert all(flags["data"].iloc[18:] == UNFLAGGED)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment