Skip to content
Snippets Groups Projects
Commit f92fba5d authored by David Schäfer's avatar David Schäfer
Browse files

data is reduced to the fields needed by a test

parent 39dd1ce0
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!82Perf improvements
...@@ -162,7 +162,8 @@ class SaQCFunc(Func): ...@@ -162,7 +162,8 @@ class SaQCFunc(Func):
if field not in flagger.getFlags(): if field not in flagger.getFlags():
flagger = flagger.merge(flagger.initFlags(data=pd.Series(name=field))) flagger = flagger.merge(flagger.initFlags(data=pd.Series(name=field)))
data_in = self._maskData(data, flagger) columns_in = data.columns.intersection([field])
data_in = self._maskData(data.loc[:, columns_in], flagger)
data_result, flagger_result = self.func(data_in, field, flagger, *self.args, **self.kwargs) data_result, flagger_result = self.func(data_in, field, flagger, *self.args, **self.kwargs)
...@@ -189,20 +190,20 @@ class SaQCFunc(Func): ...@@ -189,20 +190,20 @@ class SaQCFunc(Func):
mask_old = flagger_old.isFlagged(flag=to_mask, comparator="==") mask_old = flagger_old.isFlagged(flag=to_mask, comparator="==")
mask_new = flagger_new.isFlagged(flag=to_mask, comparator="==") mask_new = flagger_new.isFlagged(flag=to_mask, comparator="==")
for col, left in data_new.indexes.iteritems(): for col, right in data_new.indexes.iteritems():
if col not in mask_old: if col not in mask_old:
continue continue
right = mask_old[col].index left = mask_old[col].index
col_data = data_new[col].values
# NOTE: ignore columns with changed indices (assumption: harmonization) # NOTE: ignore columns with changed indices (assumption: harmonization)
if left.equals(right): if left.equals(right):
# NOTE: Don't overwrite data, that was masked, but is not considered # NOTE: Don't overwrite data, that was masked, but is not considered
# flagged anymore and also respect newly set data on masked locations. # flagged anymore and also respect newly set data on masked locations.
mask = mask_old[col].values & mask_new[col].values & data_new[col].isna().values mask = mask_old[col].values & mask_new[col].values & data_new[col].isna().values
if np.any(mask): if np.any(mask):
col_data = data_new[col].values
col_data[mask] = data_old[col].values[mask] col_data[mask] = data_old[col].values[mask]
data_new[col] = col_data data_old[col] = col_data
return data_new return data_old
# NOTE: # NOTE:
......
...@@ -79,7 +79,7 @@ def test_assignVariable(flagger): ...@@ -79,7 +79,7 @@ def test_assignVariable(flagger):
pdata, pflagger = SaQC(flagger, data).flagAll(var1).flagAll(var2).getResult() pdata, pflagger = SaQC(flagger, data).flagAll(var1).flagAll(var2).getResult()
pflags = pflagger.getFlags() pflags = pflagger.getFlags()
assert (pflags.columns == [var1, var2]).all() assert (set(pflags.columns) == {var1, var2})
assert pflagger.isFlagged(var2).empty assert pflagger.isFlagged(var2).empty
...@@ -105,7 +105,6 @@ def test_masking(data, flagger): ...@@ -105,7 +105,6 @@ def test_masking(data, flagger):
test if flagged values are exluded during the preceding tests test if flagged values are exluded during the preceding tests
""" """
flagger = flagger.initFlags(data) flagger = flagger.initFlags(data)
flags = flagger.getFlags()
var1 = 'var1' var1 = 'var1'
mn = min(data[var1]) mn = min(data[var1])
mx = max(data[var1]) / 2 mx = max(data[var1]) / 2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment