Skip to content
Snippets Groups Projects

Follow-Up Translations

Merged David Schäfer requested to merge translations into develop
Compare and Show latest version
16 files
+ 230
376
Compare changes
  • Side-by-side
  • Inline
Files
16
@@ -38,6 +38,18 @@ class DmpTranslator(Translator):
"BAD": BAD,
}
_QUALITY_CAUSES = {
"BATTERY_LOW",
"BELOW_MINIMUM",
"ABOVE_MAXIMUM",
"BELOW_OR_ABOVE_MIN_MAX",
"ISOLATED_SPIKE",
"DEFECTIVE_SENSOR",
"LEFT_CENSORED_DATA",
"RIGHT_CENSORED_DATA",
"OTHER",
}
def __init__(self):
super().__init__(forward=self._FORWARD)
@@ -59,8 +71,30 @@ class DmpTranslator(Translator):
Note
----
Could (and maybe should) be implemented as a method of `CallGraph`
Currently we work around the issue, that we keep track of the
computations we do on a variable using the variable name, but also
allow mutations of that name (i.e. our key) through `tools.rename`
in a somewhat hacky way. There are better ideas, to solve this (i.e.
global function pointers), but for the moment this has to do the trick
"""
return [SaQCFunction(name="")] + [f for l, f in call_stack if l.field == field]
# backtrack name changes and let's look, if our field
# originally had another name
for sel, func in call_stack[::-1]:
if func.name == "tools.rename":
new_name = func.keywords.get("new_name") or func.args[3]
if new_name == field:
field = sel.field
out = [SaQCFunction(name="")]
for sel, func in call_stack:
if sel.field == field:
out.append(func)
# forward track name changes
if func.name == "tools.rename":
field = func.keywords.get("new_name") or func.args[3]
return out
def forward(self, flags: pd.DataFrame) -> Tuple[Flags, MaterializedGraph]:
"""
@@ -144,6 +178,11 @@ class DmpTranslator(Translator):
flag_call_history = self._getFieldFunctions(field, call_graph)
flag_pos = flags.history[field].idxmax()
comments, causes = [], []
# NOTE:
# Strangely enough, this loop withstood all my efforts
# to speed it up through vectorization - the simple
# loop always outperformed even careful `pd.DataFrame.apply`
# versions. The latest try is left as a comment below.
for p in flag_pos:
func = flag_call_history[p]
cause = func.keywords.get("cause", self.ARGUMENTS["cause"])
@@ -158,10 +197,45 @@ class DmpTranslator(Translator):
causes.append(cause)
comments.append(comment)
# DMP quality_cause needs some special care as only certain values
# and combinations are allowed.
# See: https://wiki.intranet.ufz.de/wiki/dmp/index.php/Qualit%C3%A4tsflags
causes = pd.Series(causes, index=flags[field].index)
causes[
(causes == self.ARGUMENTS["cause"]) & (flags[field] > GOOD)
] = "OTHER"
if not ((causes == "") | causes.isin(self._QUALITY_CAUSES)).all():
raise ValueError(
f"quality causes needs to be one of {self._QUALITY_CAUSES}"
)
var_flags = {
"quality_flag": tflags[field],
"quality_comment": pd.Series(comments, index=flags[field].index),
"quality_cause": pd.Series(causes, index=flags[field].index),
"quality_cause": causes,
}
out[field] = pd.DataFrame(var_flags)
return pd.concat(out, axis="columns")
# for field in tflags.columns:
# call_history = []
# for func in self._getFieldFunctions(field, call_graph):
# func_info = {
# "cause": func.keywords.get("cause", self.ARGUMENTS["comment"]),
# "comment": json.dumps({
# "test": func.name,
# "comment": func.keywords.get("comment", self.ARGUMENTS["comment"]),
# })
# }
# call_history.append(func_info)
# functions = pd.DataFrame(call_history)
# flag_pos = flags.history[field].idxmax()
# var_flags = {
# "quality_flag": tflags[field].reset_index(drop=True),
# "quality_comment": functions.loc[flag_pos, "comment"].reset_index(drop=True),
# "quality_cause": functions.loc[flag_pos, "cause"].reset_index(drop=True),
# }
# out[field] = pd.DataFrame(var_flags, index=flag_pos.index)
# return pd.concat(out, axis="columns")
Loading