diff --git a/saqc/core/modules/breaks.py b/saqc/core/modules/breaks.py index 1edfad5197ca94000849d5c0249ef6ccd61c1fd0..bd8652849de0a7574087a1080918d115678eb603 100644 --- a/saqc/core/modules/breaks.py +++ b/saqc/core/modules/breaks.py @@ -37,3 +37,8 @@ class Breaks(ModuleBase): **kwargs ) -> saqc.SaQC: return self.defer("flagJumps", locals()) + + def flagSparseCross( + self, field, fields, min_valid, flag=BAD, **kwargs + ) -> saqc.SaQC: + return self.defer("flagSparseCross", locals()) diff --git a/saqc/funcs/breaks.py b/saqc/funcs/breaks.py index e14d6826f6589e71c1c1b34c60c43dc1d15888a6..26a01b04aa6f9f99229b1b8dce78fb47d1d5f817 100644 --- a/saqc/funcs/breaks.py +++ b/saqc/funcs/breaks.py @@ -191,3 +191,43 @@ def flagJumps( flag=flag, **kwargs ) + + +@register(masking="all", module="breaks") +def flagSparseCross(data, field, flags, fields, min_valid, flag=BAD, **kwargs): + """ + Flag multiple columns horizontal if to less valid data is present. + + This works horizontal. Multiple columns (one row) are compared at once and if + the number of columns, which hold valid data is below `min_valid`, all columns + are flagged. + + Parameters + ---------- + data : + Data container + + field : + ignored - dummy parameter + + flags : + Flags container + + fields : list or iterable + The fields to cross-compare. + + min_valid : int + Minimal number of columns, which must be present, to consider the + row as valid. + + flag: float + The flag to set + """ + assert min_valid > 0 + + mask: pd.DataFrame = data[fields].to_df("outer").count(axis=1).lt(min_valid) + for c in fields: + m = mask.reindex(data[c].index) + flags[m, c] = flag + + return data, flags diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py index 7d6db45553cb147ce1225dc7140cc026f9a30100..e5e8af6c595e315a3b2dd1ecc1735aded2cf400e 100644 --- a/tests/funcs/test_functions.py +++ b/tests/funcs/test_functions.py @@ -1,10 +1,12 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +import pytest import dios import pandas as pd import numpy as np +import saqc from saqc.funcs.noise import flagByStatLowPass from saqc.constants import * from saqc.core import initFlagsLike @@ -316,3 +318,16 @@ def test_flagDriftFromNormal(dat): assert all(flags_norm["d3"] > UNFLAGGED) assert all(flags_ref["d3"] > UNFLAGGED) assert all(flags_scale["d3"] > UNFLAGGED) + + +def test_flagSparseCross(): + data = dios.example_DictOfSeries() + + qc = saqc.SaQC(data=data) + qc = qc.breaks.flagSparseCross("dummy", fields=data.columns, min_valid=3) + + # toFrame() insert NANs at index positions that are not shared by all columns + flags = qc._flags.toFrame() + # rebuild the condition from flagSparseCross + m = data.to_df().count(axis=1) < 3 + assert (flags[m].isna() | (flags[m] == BAD)).all(None)