diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a5633e7d2f3b94c6d028ec4482e3de98be2af71..cd0e053e33d0f6a25382b3f8999a17b706e6c48c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ This changelog starts with version 2.0.0. Basically all parts of the system, inc ### Removed ### Fixed - `flagOffset` bug with zero-valued threshold +- `flagCrossStatistics` bug with unaligned input variables - `plot` fixed data loss when using *dfilter* kwarg ## [2.0.1](https://git.ufz.de/rdm-software/saqc/-/tags/v2.0.1) - 2021-12-20 diff --git a/saqc/core/modules/outliers.py b/saqc/core/modules/outliers.py index c794dd4405378b75dc961f4dcedf30252e3ca6a8..1f979b8e7fcef70a8b4423f4a6129ae1e14fcb4d 100644 --- a/saqc/core/modules/outliers.py +++ b/saqc/core/modules/outliers.py @@ -116,4 +116,4 @@ class Outliers: flag: float = BAD, **kwargs, ) -> saqc.SaQC: - return self._defer("flagCrossStatistic", locals()) + return self._defer("flagCrossStatistics", locals()) diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index 50d1e1550a397167f96789abbf7a61a25472279c..8c164819f0f9d53be397b5ad753d0bd7d090055d 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -14,6 +14,7 @@ import numba import numpy as np import numpy.polynomial.polynomial as poly import pandas as pd +import warnings from dios import DictOfSeries from outliers import smirnov_grubbs @@ -1369,6 +1370,14 @@ def flagCrossStatistics( The quality flags of data Flags values may have changed relatively to the input flags. + + Notes + ----- + + The input variables dont necessarily have to be aligned. If the variables are unaligned, scoring + and flagging will be only performed on the subset of inices shared among all input variables. + + References ---------- [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm @@ -1376,13 +1385,6 @@ def flagCrossStatistics( fields = toSequence(field) - for src in fields[1:]: - if (data[src].index != data[fields[0]].index).any(): - raise ValueError( - f"indices of '{fields[0]}' and '{src}' are not compatibble, " - "please resample all variables to a common (time-)grid" - ) - df = data[fields].loc[data[fields].index_of("shared")].to_df() if isinstance(method, str): @@ -1419,6 +1421,7 @@ def flagCrossStatistics( return data, flags for f in fields: - flags[mask[f], f] = flag + m = mask[f].reindex(index=flags[f].index, fill_value=False) + flags[m, f] = flag return data, flags