Skip to content
Snippets Groups Projects
Commit 6050609f authored by David Schäfer's avatar David Schäfer
Browse files

dmpflagger is passing the flagger tests

parent 0472dead
No related branches found
No related tags found
No related merge requests found
......@@ -72,7 +72,8 @@ class BaseFlagger(FlaggerTemplate):
field = field or slice(None)
flags = self._flags.copy()
mask = self._locator2Mask(field, loc, iloc)
return flags[field][mask]
# return flags[field][mask]
return self._assureDtype(flags.loc[mask, field])
def _locator2Mask(self, field=None, loc=None, iloc=None):
# get a single locator
......@@ -98,7 +99,6 @@ class BaseFlagger(FlaggerTemplate):
data=flag, index=this.index,
name=field, dtype=self.categories)
def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, **kwargs):
flag = self.BAD if flag is None else flag
......@@ -142,19 +142,13 @@ class BaseFlagger(FlaggerTemplate):
return flag
def _assureDtype(self, flags, field=None):
# in: df/ser, out: df/ser, affect only the minimal set of columns
def _assureDtype(self, flags):
if isinstance(flags, pd.Series):
return flags.astype(self.categories)
if field is not None:
flags[field] = self._assureDtype(flags[field])
flags = flags.astype(self.categories)
return flags
if field is None:
for c in flags:
flags[c] = self._assureDtype(flags[c])
for c in flags.columns:
flags[c] = flags[c].astype(self.categories)
return flags
def _isSelfCategoricalType(self, f) -> bool:
......
......@@ -36,6 +36,8 @@ class DmpFlagger(BaseFlagger):
shell=True, check=False, stdout=subprocess.PIPE).stdout
self.project_version = version.decode().strip()
self.signature = ("flag", "comment", "cause", "force")
self._flags = None
def initFlags(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
check_isdf(data, 'data', allow_multiindex=False)
......@@ -43,41 +45,35 @@ class DmpFlagger(BaseFlagger):
[data.columns, self.flags_fields],
names=[ColumnLevels.VARIABLES, ColumnLevels.FLAGS])
flags = pd.DataFrame(data=self.UNFLAGGED, columns=colindex, index=data.index)
return self._assureDtype(flags)
def setFlags(self, flags, field, loc=None, iloc=None, flag=None, force=False, comment='', cause='', **kwargs):
comment = json.dumps(dict(comment=comment, commit=self.project_version, test=kwargs.get("func_name", "")))
# call is redirected to self._writeFlags()
return super().setFlags(flags, field, loc, iloc, flag, force, comment=comment, cause=cause)
def clearFlags(self, flags, field, loc=None, iloc=None, **kwargs):
# call is redirected to self._writeFlags()
kwargs.pop('cause', None), kwargs.pop('comment', None)
flags = super().clearFlags(flags, field, loc=loc, iloc=iloc,
cause=self.UNFLAGGED, comment=self.UNFLAGGED, **kwargs)
self._flags = self._assureDtype(flags)
return self
def _assureDtype(self, flags):
flags_only = flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1)
checked = super()._assureDtype(flags_only)
for col in checked.columns:
flags.loc[:, (col, FlagFields.FLAG)] = checked[col]
return flags
def _writeFlags(self, flags, rowindex, field, flag, cause=None, comment=None, **kwargs):
assert comment is not None and cause is not None
flags.loc[rowindex, field] = flag, cause, comment
return flags
def getFlags(self, field=None, loc=None, iloc=None, **kwargs):
field = field or slice(None)
mask = self._locator2Mask(field, loc, iloc)
flags = self._flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1).copy()
return super()._assureDtype(flags.loc[mask, field])
def _reduceColumns(self, flags, field=None, loc=None, iloc=None, **kwargs):
flags = flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1)
return flags
def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, comment='', cause='', **kwargs):
def _checkFlags(self, flags, **kwargs):
check_isdfmi(flags, argname='flags')
return flags
flag = self.BAD if flag is None else flag
def _assureDtype(self, flags, field=None, **kwargs):
if isinstance(flags, pd.DataFrame) and isinstance(flags.columns, pd.MultiIndex):
if field is None:
cols = [c for c in flags.columns if FlagFields.FLAG in c]
else:
cols = [(field, FlagFields.FLAG)]
for c in cols:
flags[c] = super()._assureDtype(flags[c])
else:
flags = super()._assureDtype(flags, field)
return flags
comment = json.dumps({"comment": comment,
"commit": self.project_version,
"test": kwargs.get("func_name", "")})
this = self.getFlags(field=field)
other = self._broadcastFlags(field=field, flag=flag)
mask = self._locator2Mask(field, loc, iloc)
if not force:
mask &= (this < other).values
self._flags.loc[mask, field] = other[mask], cause, comment
return self
......@@ -41,8 +41,8 @@ DATASETS = [
]
TESTFLAGGERS = [
# BaseFlagger(['NIL', 'GOOD', 'BAD']),
# DmpFlagger(),
BaseFlagger(['NIL', 'GOOD', 'BAD']),
DmpFlagger(),
SimpleFlagger()
]
......@@ -76,12 +76,6 @@ def test_getFlags(data, flagger):
assert flags1.shape[0] == data.shape[0]
assert flags1.name in data.columns
# all the same
# NOTE: doesn't make sense here
# flags2 = flagger.getFlags(flags[[field]]).squeeze()
# assert (flags0[field] == flags1).all()
# assert (flags0[field] == flags2).all()
@pytest.mark.parametrize('data', DATASETS)
@pytest.mark.parametrize('flagger', TESTFLAGGERS)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment