flagJumps behaving weirdly
Summary
Using the example-data as created in the Getting Started documentation, running flagJumps gives an error. However, if flagRange is run first and then flagJumps it works as expected.
Side note: the documentation only demands window
to be a string. From the wording, it is difficult to understand whether this is supposed to be a frequency string or an integer (passed as string).
Reproducible Example
import numpy as np
import pandas as pd
from saqc import SaQC
### from docu
# we need some dummy data
values = np.array([12, 24, 36, 33, 89, 87, 45, 31, 18, 99])
dates = pd.date_range(start="2020-01-01", periods=len(values), freq="D")
data = pd.DataFrame({"a": values}, index=dates)
# let's insert some constant values ...
data.iloc[3:6] = values.mean()
# ... and an outlier
data.iloc[8] = 175
# initialize saqc
qc = SaQC(data=data, scheme="simple")
### new, not from docu
#fails
qc = qc.flagJumps(field="a",thresh=1, window="2D")
# Running Range first, then flagJumps - works
qc1 = SaQC(data=data, scheme="simple")
qc1 = qc1.flagRange("a", min=20, max=80)
qc1 = qc1.flagJumps(field="a",thresh=1, window="2D")
What is the current bug behavior?
fails with 'IndexError: index 7 is out of bounds for axis 0 with size 7'
What is the expected correct behavior?
execute normally without requiring prior range-test
Stacktrace
Click to expand
In [15]: qc = qc.flagJumps(field="a",thresh=1, window="2D")
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In [15], line 1
----> 1 qc = qc.flagJumps(field="a",thresh=1, window="2D")
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/core/modules/breaks.py:44, in Breaks.flagJumps(self, field, thresh, window, min_periods, flag, **kwargs)
34 @doc(saqc.funcs.breaks.flagJumps.__doc__)
35 def flagJumps(
36 self,
(...)
42 **kwargs,
43 ) -> saqc.SaQC:
---> 44 return self._defer("flagJumps", locals())
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/core/modules/__init__.py:50, in FunctionsMixin._defer(self, fname, flocals)
48 flocals.pop("self", None)
49 fkwargs = flocals.pop("kwargs", {})
---> 50 return self._wrap(FUNC_MAP[fname])(**flocals, **fkwargs)
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/core/core.py:214, in SaQC._wrap.<locals>.inner(field, target, regex, flag, *args, **kwargs)
212 if not func.handles_target:
213 fkwargs["field"] = fkwargs.pop("target")
--> 214 out = out._callFunction(func, *args, **fkwargs)
216 return out
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/core/core.py:228, in SaQC._callFunction(self, function, field, *args, **kwargs)
220 def _callFunction(
221 self,
222 function: Callable,
(...)
225 **kwargs: Any,
226 ) -> SaQC:
--> 228 res = function(data=self._data, flags=self._flags, field=field, *args, **kwargs)
230 # keep consistence: if we modify data and flags inplace in a function,
231 # but data is the original and flags is a copy (as currently implemented),
232 # data and flags of the original saqc obj may change inconsistently.
233 self._data, self._flags = res
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/core/register.py:161, in FunctionWrapper.__call__(self, data, field, flags, *args, **kwargs)
158 self.stored_data = stored
160 args, kwargs = self._prepareArgs()
--> 161 data, flags = self.func(*args, **kwargs)
163 # find columns that need squeezing
164 columns = self._argnamesToColumns(self.decorator_squeeze, all_args)
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/funcs/breaks.py:208, in flagJumps(data, field, flags, thresh, window, min_periods, flag, **kwargs)
167 @flagging()
168 def flagJumps(
169 data: DictOfSeries,
(...)
176 **kwargs,
177 ) -> Tuple[DictOfSeries, Flags]:
178 """
179 Flag jumps and drops in data.
180
(...)
206 Flag to set.
207 """
--> 208 return _assignChangePointCluster(
209 data,
210 field,
211 flags,
212 stat_func=lambda x, y: np.abs(np.mean(x) - np.mean(y)),
213 thresh_func=lambda x, y: thresh,
214 window=window,
215 min_periods=min_periods,
216 set_flags=True,
217 model_by_resids=False,
218 assign_cluster=False,
219 flag=flag,
220 **kwargs,
221 )
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/funcs/changepoints.py:336, in _assignChangePointCluster(data, field, flags, stat_func, thresh_func, window, min_periods, closed, reduce_window, reduce_func, model_by_resids, set_flags, assign_cluster, flag, **kwargs)
331 roller = customRoller(detected, window=reduce_window, min_periods=1)
332 start, end = roller.window_indexer.get_window_bounds(
333 num_values=l, min_periods=1, closed="both", center=True
334 )
--> 336 detected = _reduceCPCluster(
337 stat_arr[result_arr], thresh_arr[result_arr], start, end, reduce_func, l
338 )
339 det_index = det_index[detected]
341 if assign_cluster:
File ~/.pyenv/versions/3.9.4/envs/test_saqc/lib/python3.9/site-packages/saqc/funcs/changepoints.py:390, in _reduceCPCluster(stat_arr, thresh_arr, start, end, obj_func, num_val)
388 pos = s + obj_func(x, y) + 1
389 out_arr[s:e] = False
--> 390 out_arr[pos] = True
392 return out_arr
IndexError: index 7 is out of bounds for axis 0 with size 7```
</details>
## Possible fixes
(If you can, link to the line of code that might be responsible for the problem)