From 64f848c84eceb730a2d00acf4f2ed2d1e8bfb741 Mon Sep 17 00:00:00 2001 From: David Schaefer <david.schaefer@ufz.de> Date: Fri, 14 Aug 2020 10:24:11 +0200 Subject: [PATCH] make the masking/unmasking considerably faster by moving the assignments to numpy arrays --- saqc/core/register.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/saqc/core/register.py b/saqc/core/register.py index 2863f1825..83c845e0a 100644 --- a/saqc/core/register.py +++ b/saqc/core/register.py @@ -171,13 +171,20 @@ class SaQCFunc(Func): return data_result, flagger_result def _maskData(self, data, flagger): + # TODO: this is heavily undertested to_mask = flagger.BAD if self.to_mask is None else self.to_mask mask = flagger.isFlagged(flag=to_mask, comparator='==') data = data.copy() - data[mask] = np.nan + for c in data.columns: + col_mask = mask[c].values + if np.any(col_mask): + col_data = data[c].values.astype(np.float64) + col_data[col_mask] = np.nan + data[c] = col_data return data def _unmaskData(self, data_old, flagger_old, data_new, flagger_new): + # TODO: this is heavily undertested to_mask = flagger_old.BAD if self.to_mask is None else self.to_mask mask_old = flagger_old.isFlagged(flag=to_mask, comparator="==") mask_new = flagger_new.isFlagged(flag=to_mask, comparator="==") @@ -190,8 +197,11 @@ class SaQCFunc(Func): if left.equals(right): # NOTE: Don't overwrite data, that was masked, but is not considered # flagged anymore and also respect newly set data on masked locations. - mask = mask_old[col] & mask_new[col] & data_new[col].isna() - data_new.loc[mask, col] = data_old.loc[mask, col] + mask = mask_old[col].values & mask_new[col].values & data_new[col].isna().values + if np.any(mask): + col_data = data_new[col].values + col_data[mask] = data_old[col].values[mask] + data_new[col] = col_data return data_new -- GitLab