Skip to content
Snippets Groups Projects
Commit e650f8b6 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

rewrote flagConstant to make it easier, faster, and fix bugs

parent 979b9ae2
No related branches found
No related tags found
1 merge request!3rewrote flagConstant to make it easier, faster, and fix bugs
Pipeline #2378 passed with stage
in 8 minutes and 29 seconds
......@@ -31,6 +31,7 @@ def main(config, data, flagger, outfile, nodata, fail):
nodata=nodata,
error_policy="raise" if fail else "warn",
)
[cols_out].to_csv(outfile, header=True, index=True)
if outfile:
flags = flagger_result.getFlags()
......
......@@ -11,21 +11,31 @@ from saqc.lib.tools import valueRange, slidingWindowIndices, retrieveTrustworthy
@register("constant")
def flagConstant(data, field, flagger, thresh, window, **kwargs):
"""
Flag values are (semi-)constant.
:param data: dataframe
:param field: column in data
:param flagger: saqc flagger obj
:param thresh: the difference between two values must be below that
:param window: sliding window
"""
d = data[field]
# find all constant values in forward search
r = d.rolling(window=window)
mask = (r.max() - r.min()) <= thresh
window = pd.tseries.frequencies.to_offset(window)
# backward rolling for offset windows hack
bw = mask[::-1].copy()
bw.index = bw.index.max() - bw.index
col = data[field]
index = col.index
flagger_mask = pd.Series(data=np.zeros_like(col), index=index, name=field, dtype=bool)
# propagate the mask(!), backwards
bwmask = bw.rolling(window=window).sum() > 0
for srs in groupConsecutives(col.diff().abs().le(thresh)):
if np.all(srs):
start = index[index.get_loc(srs.index[0]) - 1]
stop = srs.index[-1]
if stop - start >= window:
flagger_mask[start:stop] = True
mask |= bwmask[::-1].values
flagger = flagger.setFlags(field, flagger_mask, **kwargs)
flagger = flagger.setFlags(field, mask, **kwargs)
return data, flagger
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment