diff --git a/docs/FunctionDescriptions.md b/docs/FunctionDescriptions.md index 2d53066540da9e74a8b93696cb3afdfa934fb5f4..2cd642d808c95bb9ade4ba5fadf38e268074ab37 100644 --- a/docs/FunctionDescriptions.md +++ b/docs/FunctionDescriptions.md @@ -11,7 +11,7 @@ Main documentation of the implemented functions, their purpose and parameters an - [clear](#clear) - [force](#force) - [sliding_outlier](#sliding_outlier) - - [mad](#mad) + - [spikes_simpleMad](#spikes_simpleMad) - [spikes_Basic](#spikes_basic) - [Spikes_SpektrumBased](#spikes_spektrumbased) - [constant](#constant) @@ -188,22 +188,19 @@ with $` r, M, mad, z `$: data, data median, data median absolute deviation, `z`. See also: [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm -## mad +## spikes_simpleMad Flag outlier by simple median absolute deviation test. ``` -mad(length, z=3.5, freq=None) +spikes_simpleMad(length, z=3.5) ``` -| parameter | data type | default value | description | -| --------- | ----------- | ---- | ----------- | -| length | offset-string | `"1h"` | size of the sliding window, where the modified Z-score is applied on | -| z | float | `3.5` | z-parameter the modified Z-score | -| freq | | `None` | The frequency the data have | +| parameter | data type | default value | description | +| --------- | ----------- | ---- | ----------- | +| winsz | offset-string or int | `"1h"` | size of the sliding window, where the modified Z-score is applied on | +| z | float | `3.5` | z-parameter the modified Z-score | -Parameter note: If freq is omitted, it is tried to infer the correct frequency. This is not fail save (!), because -if no frequency can be found a error is thrown, but even worse, also a wrong frequency could be assumed. The *modified Z-score* [1] is used to detect outlier. All values are flagged as outlier, if in any slice of thw sliding window, a value fulfill: @@ -997,4 +994,4 @@ Interpolation of an inserted equidistant frequency grid of sampling rate `freq`. , and the result gets assigned to the next grid point. * `"nearest_agg"`: all flags in the range (+/- freq/2) of a grid point get aggregated with the function passed to `agg_func` and assigned to it. - \ No newline at end of file + diff --git a/saqc/funcs/spike_detection.py b/saqc/funcs/spike_detection.py index ce66bcfa6b73a7f211dfd442e1ce759e3a2a88a9..33eaeb35b3c0689e3c71312b2c7b2155da74168c 100644 --- a/saqc/funcs/spike_detection.py +++ b/saqc/funcs/spike_detection.py @@ -144,7 +144,7 @@ def flagSpikes_slidingZscore( @register("spikes_simpleMad") -def flagSpikes_simpleMad(data, field, flagger, length, z=3.5, freq=None, **kwargs): +def flagSpikes_simpleMad(data, field, flagger, winsz, z=3.5, **kwargs): """ The function represents an implementation of the modyfied Z-score outlier detection method, as introduced here: [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm @@ -156,21 +156,14 @@ def flagSpikes_simpleMad(data, field, flagger, length, z=3.5, freq=None, **kwarg time raster with seconds precision. :param field: Fieldname of the Soil moisture measurements field in data. :param flagger: A flagger - object. (saqc.flagger.X) - :param length: Offset String. Denoting the windows size that that th "Z-scored" values have to lie in. + :param winsz: Offset String. Denoting the windows size that that th "Z-scored" values have to lie in. :param z: Float. The value the Z-score is tested against. Defaulting to 3.5 (Recommendation of [1]) - :param freq: Frequencie. """ d = data[field].copy() - freq = inferFrequency(d) if freq is None else freq - if freq is None: - raise ValueError( - "freqency cannot inferred, provide `freq` as a param to mad()." - ) - winsz = int(pd.to_timedelta(length) / freq) - median = d.rolling(window=winsz, center=True, closed="both").median() + median = d.rolling(window=winsz, closed="both").median() diff = abs(d - median) - mad = diff.rolling(window=winsz, center=True, closed="both").median() + mad = diff.rolling(window=winsz, closed="both").median() mask = (mad > 0) & (0.6745 * diff > z * mad) flagger = flagger.setFlags(field, mask, **kwargs)