From 0df868c3a0084809b7d690ea6f5668c8efede2e2 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Mon, 5 Jul 2021 15:57:01 +0200 Subject: [PATCH] added function, module, test --- saqc/core/modules/breaks.py | 5 +++++ saqc/funcs/breaks.py | 40 +++++++++++++++++++++++++++++++++++ tests/funcs/test_functions.py | 15 +++++++++++++ 3 files changed, 60 insertions(+) diff --git a/saqc/core/modules/breaks.py b/saqc/core/modules/breaks.py index 1edfad519..bd8652849 100644 --- a/saqc/core/modules/breaks.py +++ b/saqc/core/modules/breaks.py @@ -37,3 +37,8 @@ class Breaks(ModuleBase): **kwargs ) -> saqc.SaQC: return self.defer("flagJumps", locals()) + + def flagSparseCross( + self, field, fields, min_valid, flag=BAD, **kwargs + ) -> saqc.SaQC: + return self.defer("flagSparseCross", locals()) diff --git a/saqc/funcs/breaks.py b/saqc/funcs/breaks.py index e14d6826f..26a01b04a 100644 --- a/saqc/funcs/breaks.py +++ b/saqc/funcs/breaks.py @@ -191,3 +191,43 @@ def flagJumps( flag=flag, **kwargs ) + + +@register(masking="all", module="breaks") +def flagSparseCross(data, field, flags, fields, min_valid, flag=BAD, **kwargs): + """ + Flag multiple columns horizontal if to less valid data is present. + + This works horizontal. Multiple columns (one row) are compared at once and if + the number of columns, which hold valid data is below `min_valid`, all columns + are flagged. + + Parameters + ---------- + data : + Data container + + field : + ignored - dummy parameter + + flags : + Flags container + + fields : list or iterable + The fields to cross-compare. + + min_valid : int + Minimal number of columns, which must be present, to consider the + row as valid. + + flag: float + The flag to set + """ + assert min_valid > 0 + + mask: pd.DataFrame = data[fields].to_df("outer").count(axis=1).lt(min_valid) + for c in fields: + m = mask.reindex(data[c].index) + flags[m, c] = flag + + return data, flags diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py index 7d6db4555..e5e8af6c5 100644 --- a/tests/funcs/test_functions.py +++ b/tests/funcs/test_functions.py @@ -1,10 +1,12 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +import pytest import dios import pandas as pd import numpy as np +import saqc from saqc.funcs.noise import flagByStatLowPass from saqc.constants import * from saqc.core import initFlagsLike @@ -316,3 +318,16 @@ def test_flagDriftFromNormal(dat): assert all(flags_norm["d3"] > UNFLAGGED) assert all(flags_ref["d3"] > UNFLAGGED) assert all(flags_scale["d3"] > UNFLAGGED) + + +def test_flagSparseCross(): + data = dios.example_DictOfSeries() + + qc = saqc.SaQC(data=data) + qc = qc.breaks.flagSparseCross("dummy", fields=data.columns, min_valid=3) + + # toFrame() insert NANs at index positions that are not shared by all columns + flags = qc._flags.toFrame() + # rebuild the condition from flagSparseCross + m = data.to_df().count(axis=1) < 3 + assert (flags[m].isna() | (flags[m] == BAD)).all(None) -- GitLab