Skip to content
Snippets Groups Projects
Commit 6050609f authored by David Schäfer's avatar David Schäfer
Browse files

dmpflagger is passing the flagger tests

parent 0472dead
No related branches found
No related tags found
No related merge requests found
...@@ -72,7 +72,8 @@ class BaseFlagger(FlaggerTemplate): ...@@ -72,7 +72,8 @@ class BaseFlagger(FlaggerTemplate):
field = field or slice(None) field = field or slice(None)
flags = self._flags.copy() flags = self._flags.copy()
mask = self._locator2Mask(field, loc, iloc) mask = self._locator2Mask(field, loc, iloc)
return flags[field][mask] # return flags[field][mask]
return self._assureDtype(flags.loc[mask, field])
def _locator2Mask(self, field=None, loc=None, iloc=None): def _locator2Mask(self, field=None, loc=None, iloc=None):
# get a single locator # get a single locator
...@@ -98,7 +99,6 @@ class BaseFlagger(FlaggerTemplate): ...@@ -98,7 +99,6 @@ class BaseFlagger(FlaggerTemplate):
data=flag, index=this.index, data=flag, index=this.index,
name=field, dtype=self.categories) name=field, dtype=self.categories)
def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, **kwargs): def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, **kwargs):
flag = self.BAD if flag is None else flag flag = self.BAD if flag is None else flag
...@@ -142,19 +142,13 @@ class BaseFlagger(FlaggerTemplate): ...@@ -142,19 +142,13 @@ class BaseFlagger(FlaggerTemplate):
return flag return flag
def _assureDtype(self, flags, field=None): def _assureDtype(self, flags):
# in: df/ser, out: df/ser, affect only the minimal set of columns
if isinstance(flags, pd.Series): if isinstance(flags, pd.Series):
return flags.astype(self.categories) flags = flags.astype(self.categories)
if field is not None:
flags[field] = self._assureDtype(flags[field])
return flags return flags
if field is None: for c in flags.columns:
for c in flags: flags[c] = flags[c].astype(self.categories)
flags[c] = self._assureDtype(flags[c])
return flags return flags
def _isSelfCategoricalType(self, f) -> bool: def _isSelfCategoricalType(self, f) -> bool:
......
...@@ -36,6 +36,8 @@ class DmpFlagger(BaseFlagger): ...@@ -36,6 +36,8 @@ class DmpFlagger(BaseFlagger):
shell=True, check=False, stdout=subprocess.PIPE).stdout shell=True, check=False, stdout=subprocess.PIPE).stdout
self.project_version = version.decode().strip() self.project_version = version.decode().strip()
self.signature = ("flag", "comment", "cause", "force") self.signature = ("flag", "comment", "cause", "force")
self._flags = None
def initFlags(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame: def initFlags(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
check_isdf(data, 'data', allow_multiindex=False) check_isdf(data, 'data', allow_multiindex=False)
...@@ -43,41 +45,35 @@ class DmpFlagger(BaseFlagger): ...@@ -43,41 +45,35 @@ class DmpFlagger(BaseFlagger):
[data.columns, self.flags_fields], [data.columns, self.flags_fields],
names=[ColumnLevels.VARIABLES, ColumnLevels.FLAGS]) names=[ColumnLevels.VARIABLES, ColumnLevels.FLAGS])
flags = pd.DataFrame(data=self.UNFLAGGED, columns=colindex, index=data.index) flags = pd.DataFrame(data=self.UNFLAGGED, columns=colindex, index=data.index)
return self._assureDtype(flags) self._flags = self._assureDtype(flags)
return self
def setFlags(self, flags, field, loc=None, iloc=None, flag=None, force=False, comment='', cause='', **kwargs):
comment = json.dumps(dict(comment=comment, commit=self.project_version, test=kwargs.get("func_name", ""))) def _assureDtype(self, flags):
# call is redirected to self._writeFlags() flags_only = flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1)
return super().setFlags(flags, field, loc, iloc, flag, force, comment=comment, cause=cause) checked = super()._assureDtype(flags_only)
for col in checked.columns:
def clearFlags(self, flags, field, loc=None, iloc=None, **kwargs): flags.loc[:, (col, FlagFields.FLAG)] = checked[col]
# call is redirected to self._writeFlags()
kwargs.pop('cause', None), kwargs.pop('comment', None)
flags = super().clearFlags(flags, field, loc=loc, iloc=iloc,
cause=self.UNFLAGGED, comment=self.UNFLAGGED, **kwargs)
return flags return flags
def _writeFlags(self, flags, rowindex, field, flag, cause=None, comment=None, **kwargs): def getFlags(self, field=None, loc=None, iloc=None, **kwargs):
assert comment is not None and cause is not None field = field or slice(None)
flags.loc[rowindex, field] = flag, cause, comment mask = self._locator2Mask(field, loc, iloc)
return flags flags = self._flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1).copy()
return super()._assureDtype(flags.loc[mask, field])
def _reduceColumns(self, flags, field=None, loc=None, iloc=None, **kwargs): def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, comment='', cause='', **kwargs):
flags = flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1)
return flags
def _checkFlags(self, flags, **kwargs): flag = self.BAD if flag is None else flag
check_isdfmi(flags, argname='flags')
return flags
def _assureDtype(self, flags, field=None, **kwargs): comment = json.dumps({"comment": comment,
if isinstance(flags, pd.DataFrame) and isinstance(flags.columns, pd.MultiIndex): "commit": self.project_version,
if field is None: "test": kwargs.get("func_name", "")})
cols = [c for c in flags.columns if FlagFields.FLAG in c]
else: this = self.getFlags(field=field)
cols = [(field, FlagFields.FLAG)] other = self._broadcastFlags(field=field, flag=flag)
for c in cols: mask = self._locator2Mask(field, loc, iloc)
flags[c] = super()._assureDtype(flags[c]) if not force:
else: mask &= (this < other).values
flags = super()._assureDtype(flags, field)
return flags self._flags.loc[mask, field] = other[mask], cause, comment
return self
...@@ -41,8 +41,8 @@ DATASETS = [ ...@@ -41,8 +41,8 @@ DATASETS = [
] ]
TESTFLAGGERS = [ TESTFLAGGERS = [
# BaseFlagger(['NIL', 'GOOD', 'BAD']), BaseFlagger(['NIL', 'GOOD', 'BAD']),
# DmpFlagger(), DmpFlagger(),
SimpleFlagger() SimpleFlagger()
] ]
...@@ -76,12 +76,6 @@ def test_getFlags(data, flagger): ...@@ -76,12 +76,6 @@ def test_getFlags(data, flagger):
assert flags1.shape[0] == data.shape[0] assert flags1.shape[0] == data.shape[0]
assert flags1.name in data.columns assert flags1.name in data.columns
# all the same
# NOTE: doesn't make sense here
# flags2 = flagger.getFlags(flags[[field]]).squeeze()
# assert (flags0[field] == flags1).all()
# assert (flags0[field] == flags2).all()
@pytest.mark.parametrize('data', DATASETS) @pytest.mark.parametrize('data', DATASETS)
@pytest.mark.parametrize('flagger', TESTFLAGGERS) @pytest.mark.parametrize('flagger', TESTFLAGGERS)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment