diff --git a/saqc/funcs/spikes_detection.py b/saqc/funcs/spikes_detection.py index e20a4960b31e262f9c8e36157f3c4e6efb6339a6..93a23c6d0d55da53fe48221ec2057c408c8512a6 100644 --- a/saqc/funcs/spikes_detection.py +++ b/saqc/funcs/spikes_detection.py @@ -940,7 +940,7 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick to_roll = post_jumps.reindex(dataseries.index, method="bfill", tolerance=window, fill_value=False).dropna() # define spike testing function to roll with: - def spike_tester(chunk, thresh=thresh, tol=tolerance): + def spikeTester(chunk, thresh=thresh, tol=tolerance): # signum change!!! chunk_stair = (np.abs(chunk - chunk[-1]) < thresh)[::-1].cumsum() initial = np.searchsorted(chunk_stair, 2) @@ -957,21 +957,21 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick engine=None if roll_mask.sum() > numba_kickin: engine = 'numba' - result = customRolling(to_roll, window, spike_tester, roll_mask, closed='both', engine=engine) + result = customRolling(to_roll, window, spikeTester, roll_mask, closed='both', engine=engine) + group_col = np.nancumsum(result) group_frame = pd.DataFrame({'group_col': group_col[:-1], 'diff_col': np.diff(group_col).astype(int)}, index=result.index[:-1]) - groups = group_frame.groupby('group_col') - def g_func(x): - r = np.array([False] * x.shape[0]) + def gFunc(x): + r = np.zeros(shape=x.shape[0], dtype=np.bool) r[-x[-1]:] = True return r - to_flag = groups['diff_col'].transform(g_func) - flagger = flagger.setFlags(field, to_flag, **kwargs) + to_flag = groups['diff_col'].transform(gFunc) + flagger = flagger.setFlags(field, to_flag[to_flag == True].index, **kwargs) return data, flagger diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 646ddec08abf2b0a899cf293b2f5628a66d4de1a..395e88e8b03e00af4c8b7c9f2dc5cbc102faadd5 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -460,26 +460,25 @@ def customRolling(to_roll, winsz, func, roll_mask, min_periods=1, center=False, """ i_roll = to_roll.copy() - i_roll.index = np.arange(to_roll.shape[0]) + i_roll.index = np.arange(to_roll.shape[0], dtype=np.int64) if isinstance(winsz, str): - winsz = int(pd.Timedelta(winsz).total_seconds()*10**9) + winsz = np.int64(pd.Timedelta(winsz).total_seconds()*10**9) indexer = FreqIndexer(window_size=winsz, win_points=roll_mask, - index_array=to_roll.index.to_numpy(int), + index_array=to_roll.index.to_numpy(np.int64), center=center, closed=closed) elif isinstance(winsz, int): indexer = PeriodsIndexer(window_size=winsz, - win_points=roll_mask, - center=center, - closed=closed) + win_points=roll_mask, + center=center, + closed=closed) i_roll = i_roll.rolling(indexer, min_periods=min_periods, center=center, closed=closed).apply(func, raw=raw, engine=engine) - return pd.Series(i_roll.values, index=to_roll.index)