Skip to content
Snippets Groups Projects
Commit 42759c50 authored by David Schäfer's avatar David Schäfer
Browse files

Merge branch 'spikeBasicFixes' into 'develop'

Fixes to the speed improved spikes.flagBasic function

See merge request !115
parents e92c86f4 3d55bdc5
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!115Fixes to the speed improved spikes.flagBasic function
Pipeline #8389 passed with stages
in 9 minutes and 54 seconds
......@@ -940,7 +940,7 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick
to_roll = post_jumps.reindex(dataseries.index, method="bfill", tolerance=window, fill_value=False).dropna()
# define spike testing function to roll with:
def spike_tester(chunk, thresh=thresh, tol=tolerance):
def spikeTester(chunk, thresh=thresh, tol=tolerance):
# signum change!!!
chunk_stair = (np.abs(chunk - chunk[-1]) < thresh)[::-1].cumsum()
initial = np.searchsorted(chunk_stair, 2)
......@@ -957,21 +957,21 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick
engine=None
if roll_mask.sum() > numba_kickin:
engine = 'numba'
result = customRolling(to_roll, window, spike_tester, roll_mask, closed='both', engine=engine)
result = customRolling(to_roll, window, spikeTester, roll_mask, closed='both', engine=engine)
group_col = np.nancumsum(result)
group_frame = pd.DataFrame({'group_col': group_col[:-1],
'diff_col': np.diff(group_col).astype(int)},
index=result.index[:-1])
groups = group_frame.groupby('group_col')
def g_func(x):
r = np.array([False] * x.shape[0])
def gFunc(x):
r = np.zeros(shape=x.shape[0], dtype=np.bool)
r[-x[-1]:] = True
return r
to_flag = groups['diff_col'].transform(g_func)
flagger = flagger.setFlags(field, to_flag, **kwargs)
to_flag = groups['diff_col'].transform(gFunc)
flagger = flagger.setFlags(field, to_flag[to_flag == True].index, **kwargs)
return data, flagger
......
......@@ -460,26 +460,25 @@ def customRolling(to_roll, winsz, func, roll_mask, min_periods=1, center=False,
"""
i_roll = to_roll.copy()
i_roll.index = np.arange(to_roll.shape[0])
i_roll.index = np.arange(to_roll.shape[0], dtype=np.int64)
if isinstance(winsz, str):
winsz = int(pd.Timedelta(winsz).total_seconds()*10**9)
winsz = np.int64(pd.Timedelta(winsz).total_seconds()*10**9)
indexer = FreqIndexer(window_size=winsz,
win_points=roll_mask,
index_array=to_roll.index.to_numpy(int),
index_array=to_roll.index.to_numpy(np.int64),
center=center,
closed=closed)
elif isinstance(winsz, int):
indexer = PeriodsIndexer(window_size=winsz,
win_points=roll_mask,
center=center,
closed=closed)
win_points=roll_mask,
center=center,
closed=closed)
i_roll = i_roll.rolling(indexer,
min_periods=min_periods,
center=center,
closed=closed).apply(func, raw=raw, engine=engine)
return pd.Series(i_roll.values, index=to_roll.index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment