Skip to content
Snippets Groups Projects
Commit 0df868c3 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

added function, module, test

parent 0835c4b2
No related branches found
No related tags found
1 merge request!286Draft: flagSparseCross
Pipeline #30502 passed with stage
in 2 minutes and 2 seconds
......@@ -37,3 +37,8 @@ class Breaks(ModuleBase):
**kwargs
) -> saqc.SaQC:
return self.defer("flagJumps", locals())
def flagSparseCross(
self, field, fields, min_valid, flag=BAD, **kwargs
) -> saqc.SaQC:
return self.defer("flagSparseCross", locals())
......@@ -191,3 +191,43 @@ def flagJumps(
flag=flag,
**kwargs
)
@register(masking="all", module="breaks")
def flagSparseCross(data, field, flags, fields, min_valid, flag=BAD, **kwargs):
"""
Flag multiple columns horizontal if to less valid data is present.
This works horizontal. Multiple columns (one row) are compared at once and if
the number of columns, which hold valid data is below `min_valid`, all columns
are flagged.
Parameters
----------
data :
Data container
field :
ignored - dummy parameter
flags :
Flags container
fields : list or iterable
The fields to cross-compare.
min_valid : int
Minimal number of columns, which must be present, to consider the
row as valid.
flag: float
The flag to set
"""
assert min_valid > 0
mask: pd.DataFrame = data[fields].to_df("outer").count(axis=1).lt(min_valid)
for c in fields:
m = mask.reindex(data[c].index)
flags[m, c] = flag
return data, flags
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import pytest
import dios
import pandas as pd
import numpy as np
import saqc
from saqc.funcs.noise import flagByStatLowPass
from saqc.constants import *
from saqc.core import initFlagsLike
......@@ -316,3 +318,16 @@ def test_flagDriftFromNormal(dat):
assert all(flags_norm["d3"] > UNFLAGGED)
assert all(flags_ref["d3"] > UNFLAGGED)
assert all(flags_scale["d3"] > UNFLAGGED)
def test_flagSparseCross():
data = dios.example_DictOfSeries()
qc = saqc.SaQC(data=data)
qc = qc.breaks.flagSparseCross("dummy", fields=data.columns, min_valid=3)
# toFrame() insert NANs at index positions that are not shared by all columns
flags = qc._flags.toFrame()
# rebuild the condition from flagSparseCross
m = data.to_df().count(axis=1) < 3
assert (flags[m].isna() | (flags[m] == BAD)).all(None)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment