From 5d665fadc3549bd04746fa8ad926027cb64fdcad Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Thu, 19 Aug 2021 13:35:54 +0200 Subject: [PATCH] added a calibration plot to flag_pattern, because its hard to find a suitable threshold without any feedback. --- saqc/funcs/pattern.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/saqc/funcs/pattern.py b/saqc/funcs/pattern.py index 89ac4b354..070974288 100644 --- a/saqc/funcs/pattern.py +++ b/saqc/funcs/pattern.py @@ -182,7 +182,15 @@ def calculateDistanceByDTW( @flagging(masking="field", module="pattern") def flagPatternByDTW( - data, field, flags, ref_field, max_distance=0.0, normalize=True, flag=BAD, **kwargs + data, + field, + flags, + ref_field, + max_distance=0.0, + normalize=True, + plot=False, + flag=BAD, + **kwargs ): """Pattern Recognition via Dynamic Time Warping. @@ -219,6 +227,15 @@ def flagPatternByDTW( processing. The distances then refer to the mean distance per datapoint, expressed in the datas units. + plot: bool, default False + Show a calibration plot, which can be quite helpful to find the right threshold + for `max_distance`. It works best with `normalize=True`. Do not use in automatic + setups / pipelines. The plot show three lines: + - data: the data the function was called on + - distances: the calculated distances by the algorithm + - indicator: have to distinct levels: `0` and the value of `max_distance`. + If `max_distance` is `0.0` it defaults to `1`. Everywhere where the + indicator is not `0` the data will be flagged. Returns ------- @@ -260,5 +277,12 @@ def flagPatternByDTW( rolling = customRoller(minima, window=winsz) mask = rolling.sum() > 0 + if plot: + df = pd.DataFrame() + df["data"] = dat + df["distances"] = distances + df["indicator"] = mask.astype(float) * (max_distance or 1) + df.plot() + flags[mask, field] = flag return data, flags -- GitLab