From 3d55bdc592cb4ab1b27ddb48d6123d53f8acaeae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Sch=C3=A4fer?= <david.schaefer@ufz.de> Date: Mon, 28 Sep 2020 10:47:30 +0200 Subject: [PATCH] fixing numba type checking: on windows we need to specify the word size of integers to make numba happy --- saqc/funcs/spikes_detection.py | 14 +++++++------- saqc/lib/tools.py | 13 ++++++------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/saqc/funcs/spikes_detection.py b/saqc/funcs/spikes_detection.py index e20a4960b..93a23c6d0 100644 --- a/saqc/funcs/spikes_detection.py +++ b/saqc/funcs/spikes_detection.py @@ -940,7 +940,7 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick to_roll = post_jumps.reindex(dataseries.index, method="bfill", tolerance=window, fill_value=False).dropna() # define spike testing function to roll with: - def spike_tester(chunk, thresh=thresh, tol=tolerance): + def spikeTester(chunk, thresh=thresh, tol=tolerance): # signum change!!! chunk_stair = (np.abs(chunk - chunk[-1]) < thresh)[::-1].cumsum() initial = np.searchsorted(chunk_stair, 2) @@ -957,21 +957,21 @@ def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kick engine=None if roll_mask.sum() > numba_kickin: engine = 'numba' - result = customRolling(to_roll, window, spike_tester, roll_mask, closed='both', engine=engine) + result = customRolling(to_roll, window, spikeTester, roll_mask, closed='both', engine=engine) + group_col = np.nancumsum(result) group_frame = pd.DataFrame({'group_col': group_col[:-1], 'diff_col': np.diff(group_col).astype(int)}, index=result.index[:-1]) - groups = group_frame.groupby('group_col') - def g_func(x): - r = np.array([False] * x.shape[0]) + def gFunc(x): + r = np.zeros(shape=x.shape[0], dtype=np.bool) r[-x[-1]:] = True return r - to_flag = groups['diff_col'].transform(g_func) - flagger = flagger.setFlags(field, to_flag, **kwargs) + to_flag = groups['diff_col'].transform(gFunc) + flagger = flagger.setFlags(field, to_flag[to_flag == True].index, **kwargs) return data, flagger diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 646ddec08..395e88e8b 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -460,26 +460,25 @@ def customRolling(to_roll, winsz, func, roll_mask, min_periods=1, center=False, """ i_roll = to_roll.copy() - i_roll.index = np.arange(to_roll.shape[0]) + i_roll.index = np.arange(to_roll.shape[0], dtype=np.int64) if isinstance(winsz, str): - winsz = int(pd.Timedelta(winsz).total_seconds()*10**9) + winsz = np.int64(pd.Timedelta(winsz).total_seconds()*10**9) indexer = FreqIndexer(window_size=winsz, win_points=roll_mask, - index_array=to_roll.index.to_numpy(int), + index_array=to_roll.index.to_numpy(np.int64), center=center, closed=closed) elif isinstance(winsz, int): indexer = PeriodsIndexer(window_size=winsz, - win_points=roll_mask, - center=center, - closed=closed) + win_points=roll_mask, + center=center, + closed=closed) i_roll = i_roll.rolling(indexer, min_periods=min_periods, center=center, closed=closed).apply(func, raw=raw, engine=engine) - return pd.Series(i_roll.values, index=to_roll.index) -- GitLab