Skip to content
Snippets Groups Projects
Commit a58391d8 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

Merge branch 'constantTestFix' into 'master'

rewrote flagConstant to make it easier, faster, and fix bugs

See merge request !3
parents 979b9ae2 e650f8b6
No related branches found
No related tags found
1 merge request!3rewrote flagConstant to make it easier, faster, and fix bugs
Pipeline #2379 passed with stage
in 6 minutes and 32 seconds
...@@ -31,6 +31,7 @@ def main(config, data, flagger, outfile, nodata, fail): ...@@ -31,6 +31,7 @@ def main(config, data, flagger, outfile, nodata, fail):
nodata=nodata, nodata=nodata,
error_policy="raise" if fail else "warn", error_policy="raise" if fail else "warn",
) )
[cols_out].to_csv(outfile, header=True, index=True)
if outfile: if outfile:
flags = flagger_result.getFlags() flags = flagger_result.getFlags()
......
...@@ -11,21 +11,31 @@ from saqc.lib.tools import valueRange, slidingWindowIndices, retrieveTrustworthy ...@@ -11,21 +11,31 @@ from saqc.lib.tools import valueRange, slidingWindowIndices, retrieveTrustworthy
@register("constant") @register("constant")
def flagConstant(data, field, flagger, thresh, window, **kwargs): def flagConstant(data, field, flagger, thresh, window, **kwargs):
  • Owner

    I'd love to get an introduction to this magic forward, backward, sidewards roller coaster

  • Please register or sign in to reply
"""
Flag values are (semi-)constant.
:param data: dataframe
:param field: column in data
:param flagger: saqc flagger obj
:param thresh: the difference between two values must be below that
:param window: sliding window
"""
d = data[field]
# find all constant values in forward search
r = d.rolling(window=window)
mask = (r.max() - r.min()) <= thresh
window = pd.tseries.frequencies.to_offset(window) # backward rolling for offset windows hack
bw = mask[::-1].copy()
bw.index = bw.index.max() - bw.index
col = data[field] # propagate the mask(!), backwards
index = col.index bwmask = bw.rolling(window=window).sum() > 0
flagger_mask = pd.Series(data=np.zeros_like(col), index=index, name=field, dtype=bool)
for srs in groupConsecutives(col.diff().abs().le(thresh)): mask |= bwmask[::-1].values
if np.all(srs):
start = index[index.get_loc(srs.index[0]) - 1]
stop = srs.index[-1]
if stop - start >= window:
flagger_mask[start:stop] = True
flagger = flagger.setFlags(field, flagger_mask, **kwargs) flagger = flagger.setFlags(field, mask, **kwargs)
return data, flagger return data, flagger
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment