Skip to content
Snippets Groups Projects
Commit 3a7362c0 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

added function, module, test

parent 0835c4b2
No related branches found
No related tags found
No related merge requests found
Pipeline #30500 passed with stage
in 2 minutes
......@@ -37,3 +37,8 @@ class Breaks(ModuleBase):
**kwargs
) -> saqc.SaQC:
return self.defer("flagJumps", locals())
def flagSparseCross(
self, field, fields, min_valid, flag=BAD, **kwargs
) -> saqc.SaQC:
return self.defer("flagSparseCross", locals())
......@@ -191,3 +191,43 @@ def flagJumps(
flag=flag,
**kwargs
)
@register(masking="all", module="breaks")
def flagSparseCross(data, field, flags, fields, min_valid, flag=BAD, **kwargs):
"""
Flag multiple columns horizontal if to less valid data is present.
This works horizontal. Multiple columns (one row) are compared at once and if
the number of columns, which hold valid data is below `min_valid`, all columns
are flagged.
Parameters
----------
data :
Data container
field :
ignored - dummy parameter
flags :
Flags container
fields : list or iterable
The fields to cross-compare.
min_valid : int
Minimal number of columns, which must be present, to consider the
row as valid.
flag: float
The flag to set
"""
assert min_valid > 0
mask: pd.DataFrame = data[fields].to_df("outer").count(axis=1).lt(min_valid)
for c in fields:
m = mask.reindex(data[c].index)
flags[m, c] = flag
return data, flags
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import pytest
import dios
import pandas as pd
import numpy as np
import saqc
from saqc.funcs.noise import flagByStatLowPass
from saqc.constants import *
from saqc.core import initFlagsLike
......@@ -316,3 +318,17 @@ def test_flagDriftFromNormal(dat):
assert all(flags_norm["d3"] > UNFLAGGED)
assert all(flags_ref["d3"] > UNFLAGGED)
assert all(flags_scale["d3"] > UNFLAGGED)
def test_flagSparseCross():
data = dios.example_DictOfSeries()
qc = saqc.SaQC(data=data)
qc = qc.breaks.flagSparseCross("dummy", fields=data.columns, min_valid=3)
# toFrame() insert NANs at index positions that are not shared by all columns
flags = qc._flags.toFrame()
# rebuild the condition from flagSparseCross
m = data.to_df().count(axis=1) < 3
# either the
assert (flags[m].isna() | (flags[m] == BAD)).all(None)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment