From dbdade94288c97d8e6842c7733e7ecd12c852b9b Mon Sep 17 00:00:00 2001 From: David Schaefer <david.schaefer@ufz.de> Date: Thu, 27 Jun 2019 17:03:20 +0200 Subject: [PATCH] various modifactions --- saqc/flagger/baseflagger.py | 29 ++++++++++++++++------------- saqc/flagger/dmpflagger.py | 10 ++++------ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/saqc/flagger/baseflagger.py b/saqc/flagger/baseflagger.py index d2577cbd6..f367250b3 100644 --- a/saqc/flagger/baseflagger.py +++ b/saqc/flagger/baseflagger.py @@ -41,14 +41,14 @@ class BaseFlagger: pandas data structures tend to behave unpredictively in assignments, especially if a multi column index is used """ - if flag is None: - flag = self.flags.max() - else: - self._checkFlag(flag) + flag = self.flags.max() if flag is None else self._checkFlag(flag) flags = flags.copy() - # NOTE: conversion of "flags frame to np.array is done here already, since return argument is just the array - # anyway. For applying mulitdimensional indexing on the DataFrame "flags", you would have to stack it first, - # with .stack() (try flags.stack[flags<flag] = flag and than unstack.) + # NOTE: + # conversion of 'flags' frame to np.array is done here already, + # since return argument is just the array anyway. For applying + # mulitdimensional indexing on the DataFrame 'flags', you would + # have to stack it first + # (try flags.stack[flags<flag] = flag and than unstack.) flags = flags.values flags[flags < flag] = flag @@ -57,20 +57,23 @@ class BaseFlagger: def initFlags(self, data: pd.DataFrame) -> pd.DataFrame: out = data.copy() out[:] = self.flags[0] - # astype conversion of return Dataframe performed seperately, because pd.DataFrame(...,dtype=self.flags) - # wont give you categorical flag objects: + # NOTE: + # astype conversion of return Dataframe performed + # seperately, because pd.DataFrame(..., dtype=self.flags) + # wont give you categorical flag objects return out.astype(self.flags) def isFlagged(self, flags: ArrayLike, flag: T = None) -> ArrayLike: if flag is None: return pd.notnull(flags) & (flags > self.flags[0]) - self._checkFlag(flag) - return flags == flag + return flags == self._checkFlag(flag) def _checkFlag(self, flag): if flag not in self.flags: - raise ValueError(f"Invalid flag '{flag}'. " - f"Possible choices are {list(self.flags.categories)[1:]}") + raise ValueError( + f"Invalid flag '{flag}'. " + f"Possible choices are {list(self.flags.categories)[1:]}") + return flag def nextTest(self): pass diff --git a/saqc/flagger/dmpflagger.py b/saqc/flagger/dmpflagger.py index e1c6451cf..2ec2b2d44 100644 --- a/saqc/flagger/dmpflagger.py +++ b/saqc/flagger/dmpflagger.py @@ -36,10 +36,11 @@ class DmpFlagger(BaseFlagger): self.project_version = version.decode().strip() def initFlags(self, data, **kwargs): - columns = data.columns if isinstance(data, pd.DataFrame) else [data.name] + if isinstance(data, pd.Series): + data = data.to_frame() colindex = pd.MultiIndex.from_product( - [columns, self.flag_fields], + [data.columns, self.flag_fields], names=[ColumnLevels.VARIABLES, ColumnLevels.FLAGS]) out = pd.DataFrame(data=self.flags[0], @@ -53,10 +54,7 @@ class DmpFlagger(BaseFlagger): if not isinstance(flags, pd.DataFrame): raise TypeError - if flag is None: - flag = self.flags.max() - else: - self._checkFlag(flag) + flag = self.flags.max() if flag is None else self._checkFlag(flag) if Keywords.VERSION in comment: comment = comment.replace(Keywords.VERSION, self.project_version) -- GitLab