From 0df868c3a0084809b7d690ea6f5668c8efede2e2 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Mon, 5 Jul 2021 15:57:01 +0200
Subject: [PATCH] added function, module, test

---
 saqc/core/modules/breaks.py   |  5 +++++
 saqc/funcs/breaks.py          | 40 +++++++++++++++++++++++++++++++++++
 tests/funcs/test_functions.py | 15 +++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/saqc/core/modules/breaks.py b/saqc/core/modules/breaks.py
index 1edfad519..bd8652849 100644
--- a/saqc/core/modules/breaks.py
+++ b/saqc/core/modules/breaks.py
@@ -37,3 +37,8 @@ class Breaks(ModuleBase):
         **kwargs
     ) -> saqc.SaQC:
         return self.defer("flagJumps", locals())
+
+    def flagSparseCross(
+        self, field, fields, min_valid, flag=BAD, **kwargs
+    ) -> saqc.SaQC:
+        return self.defer("flagSparseCross", locals())
diff --git a/saqc/funcs/breaks.py b/saqc/funcs/breaks.py
index e14d6826f..26a01b04a 100644
--- a/saqc/funcs/breaks.py
+++ b/saqc/funcs/breaks.py
@@ -191,3 +191,43 @@ def flagJumps(
         flag=flag,
         **kwargs
     )
+
+
+@register(masking="all", module="breaks")
+def flagSparseCross(data, field, flags, fields, min_valid, flag=BAD, **kwargs):
+    """
+    Flag multiple columns horizontal if to less valid data is present.
+
+    This works horizontal. Multiple columns (one row) are compared at once and if
+    the number of columns, which hold valid data is below `min_valid`, all columns
+    are flagged.
+
+    Parameters
+    ----------
+    data :
+        Data container
+
+    field :
+        ignored - dummy parameter
+
+    flags :
+        Flags container
+
+    fields : list or iterable
+        The fields to cross-compare.
+
+    min_valid : int
+        Minimal number of columns, which must be present, to consider the
+        row as valid.
+
+    flag: float
+        The flag to set
+    """
+    assert min_valid > 0
+
+    mask: pd.DataFrame = data[fields].to_df("outer").count(axis=1).lt(min_valid)
+    for c in fields:
+        m = mask.reindex(data[c].index)
+        flags[m, c] = flag
+
+    return data, flags
diff --git a/tests/funcs/test_functions.py b/tests/funcs/test_functions.py
index 7d6db4555..e5e8af6c5 100644
--- a/tests/funcs/test_functions.py
+++ b/tests/funcs/test_functions.py
@@ -1,10 +1,12 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
+import pytest
 
 import dios
 import pandas as pd
 import numpy as np
 
+import saqc
 from saqc.funcs.noise import flagByStatLowPass
 from saqc.constants import *
 from saqc.core import initFlagsLike
@@ -316,3 +318,16 @@ def test_flagDriftFromNormal(dat):
     assert all(flags_norm["d3"] > UNFLAGGED)
     assert all(flags_ref["d3"] > UNFLAGGED)
     assert all(flags_scale["d3"] > UNFLAGGED)
+
+
+def test_flagSparseCross():
+    data = dios.example_DictOfSeries()
+
+    qc = saqc.SaQC(data=data)
+    qc = qc.breaks.flagSparseCross("dummy", fields=data.columns, min_valid=3)
+
+    # toFrame() insert NANs at index positions that are not shared by all columns
+    flags = qc._flags.toFrame()
+    # rebuild the condition from flagSparseCross
+    m = data.to_df().count(axis=1) < 3
+    assert (flags[m].isna() | (flags[m] == BAD)).all(None)
-- 
GitLab