Skip to content
Snippets Groups Projects
Commit b52ff7b4 authored by David Schäfer's avatar David Schäfer
Browse files

DmpTranslator: enforce valid quality_cause

parent f0267bee
No related branches found
No related tags found
2 merge requests!271Static expansion of regular expressions,!260Follow-Up Translations
This commit is part of merge request !260. Comments created here will be created in the context of that merge request.
......@@ -22,6 +22,7 @@ from saqc.lib.types import MaterializedGraph
from saqc.core.translator.basetranslator import Translator, ForwardMap
class DmpTranslator(Translator):
"""
......@@ -38,6 +39,18 @@ class DmpTranslator(Translator):
"BAD": BAD,
}
_QUALITY_CAUSES = {
"BATTERY_LOW",
"BELOW_MINIMUM",
"ABOVE_MAXIMUM",
"BELOW_OR_ABOVE_MIN_MAX",
"ISOLATED_SPIKE",
"DEFECTIVE_SENSOR",
"LEFT_CENSORED_DATA",
"RIGHT_CENSORED_DATA",
"OTHER",
}
def __init__(self):
super().__init__(forward=self._FORWARD)
......@@ -178,10 +191,18 @@ class DmpTranslator(Translator):
causes.append(cause)
comments.append(comment)
# DMP quality_cause needs some special care as only certain values
# and combinations are allowed.
# See: https://wiki.intranet.ufz.de/wiki/dmp/index.php/Qualit%C3%A4tsflags
causes = pd.Series(causes, index=flags[field].index)
causes[(causes == self.ARGUMENTS["cause"]) & (flags[field] > GOOD)] = "OTHER"
if not ((causes == "") | causes.isin(self._QUALITY_CAUSES)).all():
raise ValueError(f"quality causes needs to be one of {self._QUALITY_CAUSES}")
var_flags = {
"quality_flag": tflags[field],
"quality_comment": pd.Series(comments, index=flags[field].index),
"quality_cause": pd.Series(causes, index=flags[field].index),
"quality_cause": causes,
}
out[field] = pd.DataFrame(var_flags)
return pd.concat(out, axis="columns")
......
......@@ -117,17 +117,36 @@ def test_dmpTranslator():
tflags.loc[:, ("var1", "quality_comment")]
== '{"test": "flagBar", "comment": "I did it"}'
).all(axis=None)
assert (
tflags.loc[:, ("var1", "quality_cause")]
== "OTHER"
).all(axis=None)
assert (tflags.loc[:, ("var2", "quality_flag")] == "BAD").all(axis=None)
assert (
tflags.loc[:, ("var2", "quality_comment")]
== '{"test": "flagFoo", "comment": ""}'
).all(axis=None)
assert (
tflags.loc[:, ("var2", "quality_cause")]
== "OTHER"
).all(axis=None)
assert (
tflags.loc[flags["var3"] == BAD, ("var3", "quality_comment")]
== '{"test": "flagInit", "comment": "initial flags"}'
).all(axis=None)
assert (
tflags.loc[flags["var3"] == BAD, ("var3", "quality_cause")]
== "OTHER"
).all(axis=None)
assert (
tflags.loc[flags["var3"] < DOUBTFUL, ("var3", "quality_cause")]
== ""
).all(axis=None)
def test_positionalTranslator():
......@@ -167,12 +186,12 @@ def test_positionalTranslatorIntegration():
def test_dmpTranslatorIntegration():
data = initData(3)
data = initData(1)
col = data.columns[0]
translator = DmpTranslator()
saqc = SaQC(data=data, translator=translator)
saqc = saqc.breaks.flagMissing(col).outliers.flagRange(col, min=3, max=10)
saqc = saqc.outliers.flagRange(col, min=3, max=10)
data, flags = saqc.getResult()
qflags = flags.xs("quality_flag", axis="columns", level=1)
......@@ -183,7 +202,7 @@ def test_dmpTranslatorIntegration():
assert qflags.isin(translator._forward.keys()).all(axis=None)
assert qfunc.isin({"", "breaks.flagMissing", "outliers.flagRange"}).all(axis=None)
assert (qcause == "").all(axis=None)
assert (qcause[qflags[col] == "BAD"] == "OTHER").all(axis=None)
round_trip = translator.backward(*translator.forward(flags))
......@@ -197,6 +216,22 @@ def test_dmpTranslatorIntegration():
flags.xs("quality_cause", axis="columns", level=1)
)
def test_dmpValidCause():
data = initData(1)
col = data.columns[0]
translator = DmpTranslator()
saqc = SaQC(data=data, translator=translator)
saqc = saqc.outliers.flagRange(col, min=3, max=10, cause="SOMETHING_STUPID")
with pytest.raises(ValueError):
data, flags = saqc.getResult()
saqc = saqc.outliers.flagRange(col, min=3, max=10, cause="BELOW_OR_ABOVE_MIN_MAX")
data, flags = saqc.getResult()
qflags = flags.xs("quality_flag", axis="columns", level=1)
qcause = flags.xs("quality_cause", axis="columns", level=1)
assert (qcause[qflags[col] == "BAD"] == "BELOW_OR_ABOVE_MIN_MAX").all(axis=None)
assert (qcause[qflags[col] != "BAD"] == "").all(axis=None)
def _buildupSaQCObjects():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment