Skip to content
Snippets Groups Projects
Commit 21f74914 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

Merge z scoring methods

parent 9fce4593
No related branches found
No related tags found
1 merge request!700Merge z scoring methods
...@@ -57,7 +57,7 @@ SM2 ; shift(freq="15Min") ...@@ -57,7 +57,7 @@ SM2 ; shift(freq="15Min")
'SM(1|2)+' ; flagMissing() 'SM(1|2)+' ; flagMissing()
SM1 ; flagRange(min=10, max=60) SM1 ; flagRange(min=10, max=60)
SM2 ; flagRange(min=10, max=40) SM2 ; flagRange(min=10, max=40)
SM2 ; flagMAD(window="30d", z=3.5) SM2 ; flagZScore(window="30d", thresh=3.5, method='modified', center=False)
Dummy ; flagGeneric(field=["SM1", "SM2"], func=(isflagged(x) | isflagged(y))) Dummy ; flagGeneric(field=["SM1", "SM2"], func=(isflagged(x) | isflagged(y)))
``` ```
...@@ -98,7 +98,7 @@ saqc = (saqc ...@@ -98,7 +98,7 @@ saqc = (saqc
.flagMissing("SM(1|2)+", regex=True) .flagMissing("SM(1|2)+", regex=True)
.flagRange("SM1", min=10, max=60) .flagRange("SM1", min=10, max=60)
.flagRange("SM2", min=10, max=40) .flagRange("SM2", min=10, max=40)
.flagMAD("SM2", window="30d", z=3.5) .flagZScore("SM2", window="30d", thresh=3.5, method='modified', center=False)
.flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y)))) .flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y))))
``` ```
......
...@@ -3,4 +3,4 @@ varname ; test ...@@ -3,4 +3,4 @@ varname ; test
SM2 ; align(freq="15Min", method="nshift") SM2 ; align(freq="15Min", method="nshift")
SM2 ; flagMissing() SM2 ; flagMissing()
'SM(1|2)+' ; flagRange(min=10, max=60) 'SM(1|2)+' ; flagRange(min=10, max=60)
SM2 ; flagMAD(window="30d", z=3.5) SM2 ; flagZScore(window="30d", thresh=3.5, method='modified', center=False)
...@@ -3,5 +3,5 @@ SM2;align(freq="15Min", method="nshift");False ...@@ -3,5 +3,5 @@ SM2;align(freq="15Min", method="nshift");False
'.*';flagRange(min=10, max=60);False '.*';flagRange(min=10, max=60);False
SM2;flagMissing();False SM2;flagMissing();False
SM2;flagRange(min=10, max=60);False SM2;flagRange(min=10, max=60);False
SM2;flagMAD(window="30d", z=3.5);False SM2;flagZScore(window="30d", thresh=3.5, method='modified', center=False);False
Dummy;flag(func=(isflagged(SM1) | isflagged(SM2))) Dummy;flag(func=(isflagged(SM1) | isflagged(SM2)))
varname;test varname;test
#------;-------------------------- #------;--------------------------
SM2 ;flagRange(min=10, max=60) SM2 ;flagRange(min=10, max=60)
SM2 ;flagMAD(window="30d", z=3.5) SM2 ;flagZScore(window="30d", thresh=3.5, method="modified", center=False)
SM2 ;plot() SM2 ;plot()
\ No newline at end of file
varname;test varname;test
#------;-------------------------- #------;--------------------------
SM2 ;flagRange(min=-20, max=60) SM2 ;flagRange(min=-20, max=60)
SM2 ;flagMAD(window="30d", z=3.5) SM2 ;flagZScore(window="30d", thresh=3.5, method='modified', center=False)
SM2 ;plot() SM2 ;plot()
\ No newline at end of file
...@@ -2,8 +2,8 @@ varname;test ...@@ -2,8 +2,8 @@ varname;test
#------;-------------------------- #------;--------------------------
SM1;flagRange(min=10, max=60) SM1;flagRange(min=10, max=60)
SM2;flagRange(min=10, max=60) SM2;flagRange(min=10, max=60)
SM1;flagMAD(window="15d", z=3.5) SM1;flagZScore(window="15d", thresh=3.5, method='modified')
SM2;flagMAD(window="30d", z=3.5) SM2;flagZScore(window="30d", thresh=3.5, method='modified')
SM1;plot(path='../resources/temp/SM1processingResults') SM1;plot(path='../resources/temp/SM1processingResults')
SM2;plot(path='../resources/temp/SM2processingResults') SM2;plot(path='../resources/temp/SM2processingResults')
docs/resources/images/ZscorePopulation.png

793 KiB

SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
SPDX-License-Identifier: GPL-3.0-or-later
\ No newline at end of file
...@@ -24,6 +24,7 @@ from saqc import BAD, UNFLAGGED ...@@ -24,6 +24,7 @@ from saqc import BAD, UNFLAGGED
from saqc.core import DictOfSeries, Flags, flagging, register from saqc.core import DictOfSeries, Flags, flagging, register
from saqc.funcs.scores import _univarScoring from saqc.funcs.scores import _univarScoring
from saqc.lib.docs import DOC_TEMPLATES from saqc.lib.docs import DOC_TEMPLATES
from saqc.lib.rolling import windowRoller
from saqc.lib.tools import getFreqDelta, isflagged, toSequence from saqc.lib.tools import getFreqDelta, isflagged, toSequence
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -852,6 +853,13 @@ class OutliersMixin: ...@@ -852,6 +853,13 @@ class OutliersMixin:
---------- ----------
[1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
""" """
msg = """
The method `flagMAD` is deprecated and will be removed in verion 3.0 of saqc.
To achieve the same behavior use:
"""
call = f"qc.flagZScore(field={field}, window={window}, method='modified', thresh={z}, min_residuals={min_residuals}, min_periods={min_periods}, center={center})"
warnings.warn(f"{msg}`{call}`", DeprecationWarning)
self = self.flagZScore( self = self.flagZScore(
field, field,
...@@ -1226,68 +1234,53 @@ class OutliersMixin: ...@@ -1226,68 +1234,53 @@ class OutliersMixin:
---------- ----------
[1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
""" """
warnings.warn( msg = """
"The method `flagCrossStatistics` will be deprecated in a future version of saqc", The method `flagCrossStatistics` is deprecated and will be removed in verion 3.0 of saqc.
PendingDeprecationWarning, To achieve the same behavior use:
"""
new_method_string = {
"modZscore": "modified",
"Zscore": "standard",
np.mean: "standard",
np.median: "modified",
}
call = f"qc.flagZScore(field={field}, window=1, method={new_method_string[method]}, thresh={thresh}, axis=1)"
warnings.warn(f"{msg}`{call}`", DeprecationWarning)
return self.flagZScore(
field={field},
window=1,
method={new_method_string[method]},
thresh={thresh},
axis=1,
flag=flag,
) )
fields = toSequence(field) @register(
mask=["field"],
df = self._data[fields].to_pandas(how="inner") demask=["field"],
squeeze=["field"],
if isinstance(method, str): multivariate=True,
if method == "modZscore": docstring={"field": DOC_TEMPLATES["field"]},
MAD_series = df.subtract(df.median(axis=1), axis=0).abs().median(axis=1) )
diff_scores = (
(0.6745 * (df.subtract(df.median(axis=1), axis=0)))
.divide(MAD_series, axis=0)
.abs()
)
elif method == "Zscore":
diff_scores = (
df.subtract(df.mean(axis=1), axis=0)
.divide(df.std(axis=1), axis=0)
.abs()
)
else:
raise ValueError(method)
else:
try:
stat = getattr(df, method.__name__)(axis=1)
except AttributeError:
stat = df.aggregate(method, axis=1)
diff_scores = df.subtract(stat, axis=0).abs()
mask = diff_scores > thresh
if not mask.empty:
for f in fields:
m = mask[f].reindex(index=self._flags[f].index, fill_value=False)
self._flags[m, f] = flag
return self
@flagging()
def flagZScore( def flagZScore(
self: "SaQC", self: "SaQC",
field: str, field: Sequence[str],
method: Literal["standard", "modified"] = "standard",
window: str | int | None = None, window: str | int | None = None,
thresh: float = 3, thresh: float = 3,
min_residuals: int | None = None, min_residuals: int | None = None,
min_periods: int | None = None, min_periods: int | None = None,
model_func: Callable[[np.ndarray | pd.Series], float] = np.nanmean,
norm_func: Callable[[np.ndarray | pd.Series], float] = np.nanstd,
center: bool = True, center: bool = True,
axis: int = 0,
flag: float = BAD, flag: float = BAD,
**kwargs, **kwargs,
) -> "SaQC": ) -> "SaQC":
""" """
Flag data where its (rolling) Zscore exceeds a threshold. Flag data where its (rolling) Zscore exceeds a threshold.
The function implements flagging derived from a basic Zscore calculation. To handle non The function implements flagging derived from standard or modified Zscore calculation. To handle non
stationary data, the Zscoring can be applied with a rolling window. Therefor, the function stationary data, the Zscoring can be applied with a rolling window. Therefor, the function
allows for a minimum residual to be specified in order to mitigate overflagging in local allows for a minimum residual to be specified in order to mitigate overflagging in local
regimes of low variance. regimes of low variance.
...@@ -1297,51 +1290,164 @@ class OutliersMixin: ...@@ -1297,51 +1290,164 @@ class OutliersMixin:
Parameters Parameters
---------- ----------
window : window :
Size of the window. Either determined via an Offset String, denoting the windows temporal Size of the window. Either determined via an offset string, denoting the windows temporal
extension or by an integer, denoting the windows number of periods. ``NaN`` also count as extension or by an integer, denoting the windows number of periods. ``NaN`` also count as
periods. If ``None`` is passed, all data points share the same scoring window, which than periods. If ``None`` is passed, all data points share the same scoring window, which than
equals the whole data. equals the whole data.
method :
Which method to use for ZScoring:
* `"standard"`: standard Zscoring, using *mean* for the expectation and *standard deviation (std)* as scaling factor
* `"modified"`: modified Zscoring, using *median* as the expectation and *median absolute deviation (MAD)* as the scaling Factor
See notes section for detailed scoring formula
thresh : thresh :
Cutoff level for the Zscores, above which associated points are marked as outliers. Cutoff level for the Zscores, above which associated points are marked as outliers.
min_residuals : min_residuals :
Minimum residual value points must have to be considered outliers. Minimum residual value points must have to be considered outliers.
min_periods : min_periods :
Minimum number of valid meassurements in a scoring window, to consider the resulting score valid. Minimum number of valid meassurements in a scoring window, to consider the resulting score valid.
model_func : default mean
Function to calculate the center moment in every window.
norm_func : default std
Function to calculate the scaling for every window.
center : center :
Weather or not to center the target value in the scoring window. If ``False``, the Weather or not to center the target value in the scoring window. If ``False``, the
target value is the last value in the window. target value is the last value in the window.
axis :
Along which axis to calculate the scoring statistics:
* `0` (default) - calculate statistics along time axis
* `1` - calculate statistics over multiple variables
See Notes section for a visual clarification of the workings
of `axis` and `window`.
Notes Notes
----- -----
Steps of calculation:
The flag for :math:`x` is determined as follows:
1. Consider a window :math:`W` of successive points :math:`W = x_{1},...x_{w}`
containing the value :math:`y_{K}` which is to be checked. 1. Depending on ``window`` and ``axis``, the context population :math:`X` is collected (see pictures below)
(The index of :math:`K` depends on the selection of the parameter :py:attr:`center`.)
2. The "moment" :math:`M` for the window gets calculated via :math:`M=` :py:attr:`model_func` :math:`(W)`. * If ``axis=0``, any value is flagged in the context of those values of the same variable (``field``), that are
3. The "scaling" :math:`N` for the window gets calculated via :math:`N=` :py:attr:`norm_func` :math:`(W)`. in `window` range.
4. The "score" :math:`S` for the point :math:`x_{k}` gets calculated via :math:`S=(x_{k} - M) / N`. * If ``axis=1``, any value is flagged in the context of all values of all variables (``fields``), that are
5. Finally, :math:`x_{k}` gets flagged, if :math:`|S| >` :py:attr:`thresh` and in `window` range.
:math:`|M - x_{k}| >=` :py:attr:`min_residuals`. * If ``axis=0`` and ``window=1``, any value is flagged in the context of all values of all variables (``fields``),
that share the same timestamp.
.. figure:: /resources/images/ZscorePopulation.png
:class: with-border
2. Depending on ``method``, a score :math:`Z` is calculated for :math:`x` via :math:`Z = \\frac{|E(X) - X|}{S(X)}`
* ``method="standard"``: :math:`E(X)=mean(X)`, :math:`S(X)=std(X)`
* ``method="modified"``: :math:`E(X)=median(X)`, :math:`S(X)=MAD(X)`
3. :math:`x` is flagged, if :math:`Z >` ``thresh``
""" """
datser = self._data[field]
if "norm_func" in kwargs or "model_func" in kwargs:
warnings.warn(
"Parameters norm_func and model_func are deprecated, use parameter method instead.\n"
'To model with mean and scale with standard deviation, use method="standard".\n'
'To model with median and scale with median absolute deviation (MAD) use method="modified".\n'
"Other/Custom model and scaling functions are not supported any more"
)
if (
"mean" in kwargs.get("model_func", "").__name__
or "std" in kwargs.get("norm_func", "").__name__
):
method = "standard"
elif (
"median" in kwargs.get("model_func", lambda x: x).__name__
or "median" in kwargs.get("norm_func", lambda x: x).__name__
):
method = "modified"
else:
raise ValueError(
"Support for scoring with functions not similar to either Zscore or modified Zscore is "
"not supported anymore"
)
dat = self._data[field].to_pandas(how="outer")
if min_residuals is None: if min_residuals is None:
min_residuals = 0 min_residuals = 0
score, model, _ = _univarScoring( if dat.empty:
datser, return self
window=window,
norm_func=norm_func, if min_periods is None:
model_func=model_func, min_periods = 0
center=center,
min_periods=min_periods, if window is None:
) if dat.notna().sum().sum() >= min_periods:
to_flag = (score.abs() > thresh) & ((model - datser).abs() >= min_residuals) if method == "standard":
self._flags[to_flag, field] = flag mod = pd.DataFrame(
{f: dat[f].mean() for f in dat.columns}, index=dat.index
)
norm = pd.DataFrame(
{f: dat[f].std() for f in dat.columns}, index=dat.index
)
else:
mod = pd.DataFrame(
{f: dat[f].median() for f in dat.columns}, index=dat.index
)
norm = pd.DataFrame(
{f: (dat[f] - mod[f]).abs().median() for f in dat.columns},
index=dat.index,
)
else:
return self
else: # window is not None
if axis == 0:
if method == "standard":
mod = dat.rolling(
window, center=center, min_periods=min_periods
).mean()
norm = dat.rolling(
window, center=center, min_periods=min_periods
).std()
else:
mod = dat.rolling(
window, center=center, min_periods=min_periods
).median()
norm = (
(mod - dat)
.abs()
.rolling(window, center=center, min_periods=min_periods)
.median()
)
else: # axis == 1:
if window == 1:
if method == "standard":
mod = dat.mean(axis=1)
norm = dat.std(axis=1)
else: # method == 'modified'
mod = dat.median(axis=1)
norm = (dat.subtract(mod, axis=0)).abs().median(axis=1)
else: # window > 1
if method == "standard":
mod = windowRoller(dat, window, "mean", min_periods, center)
norm = windowRoller(dat, window, "std", min_periods, center)
else: # method == 'modified'
mod = windowRoller(dat, window, "median", min_periods, center)
norm = windowRoller(
dat.subtract(mod, axis=0).abs(),
window,
"median",
min_periods,
center,
)
residuals = dat.subtract(mod, axis=0).abs()
score = residuals.divide(norm, axis=0)
to_flag = (score.abs() > thresh) & (residuals >= min_residuals)
for f in field:
self._flags[to_flag[f], f] = flag
return self return self
......
...@@ -6,10 +6,73 @@ from __future__ import annotations ...@@ -6,10 +6,73 @@ from __future__ import annotations
import functools import functools
import math import math
from typing import Literal
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from saqc.lib.tools import getFreqDelta
def windowRoller(
data: pd.DataFrame,
window,
func: Literal["mean", "median", "std", "var", "sum"],
min_periods: int = 0,
center=True,
):
"""
pandas-rolling style computation with 2 dimensional windows, ranging over all df columns.
* implements efficient 2d rolling in case of regular timestamps or integer defined window
* else: dispatches to not optimized (no-numba) version in case of irregular timestamp
"""
supportedFuncs = ["mean", "median", "std", "var", "sum"]
if func not in supportedFuncs:
raise ValueError(f'"func" has to be one of {supportedFuncs}. Got {func}.')
func_kwargs = {}
if func in ["std", "var"]:
func_kwargs.update({"ddof": 1})
roll_func = getattr(np, "nan" + func)
regularFreq = getFreqDelta(data.index)
vals = data.values
if regularFreq is not None:
window = (
int(pd.Timedelta(window) / pd.Timedelta(regularFreq))
if isinstance(window, str)
else window
)
ramp = np.empty(((window - 1), vals.shape[1]))
ramp.fill(np.nan)
vals = np.concatenate([ramp, vals])
if center:
vals = np.roll(vals, axis=0, shift=-int(window / 2))
views = np.lib.stride_tricks.sliding_window_view(
vals, (window, vals.shape[1])
).squeeze()
result = roll_func(views, axis=(1, 2), **func_kwargs)
if min_periods > 0:
invalid_wins = (~np.isnan(views)).sum(axis=(1, 2)) < min_periods
result[invalid_wins] = np.nan
out = pd.Series(result, index=data.index, name="result")
else: # regularFreq is None
i_ser = pd.Series(range(data.shape[0]), index=data.index, name="result")
result = i_ser.rolling(window=window, center=center).apply(
raw=True,
func=lambda x: roll_func(
data.values[x.astype(int), :], axis=(0, 1), **func_kwargs
),
)
if min_periods > 0:
invalid_wins = (
i_ser.rolling(window=window, center=center).apply(
lambda x: (~np.isnan(data.values[x.astype(int), :])).sum()
)
) < min_periods
result[invalid_wins] = np.nan
out = result
return out
def removeRollingRamps( def removeRollingRamps(
data: pd.Series, data: pd.Series,
......
...@@ -55,7 +55,7 @@ DMP = [ ...@@ -55,7 +55,7 @@ DMP = [
"2016-04-01 00:05:48,3573.0,NIL,,,32.685,NIL,,,nan,nan,nan,nan\n", "2016-04-01 00:05:48,3573.0,NIL,,,32.685,NIL,,,nan,nan,nan,nan\n",
"2016-04-01 00:15:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3157,NIL,,\n", "2016-04-01 00:15:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3157,NIL,,\n",
"2016-04-01 00:20:42,3572.0,NIL,,,32.7428,NIL,,,nan,nan,nan,nan\n", "2016-04-01 00:20:42,3572.0,NIL,,,32.7428,NIL,,,nan,nan,nan,nan\n",
'2016-04-01 00:30:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3679,BAD,OTHER,"{""test"": ""flagMAD"", ""comment"": """"}"\n', '2016-04-01 00:30:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3679,BAD,OTHER,"{""test"": ""flagZScore"", ""comment"": """"}"\n',
"2016-04-01 00:35:37,3572.0,NIL,,,32.6186,NIL,,,nan,nan,nan,nan\n", "2016-04-01 00:35:37,3572.0,NIL,,,32.6186,NIL,,,nan,nan,nan,nan\n",
"2016-04-01 00:45:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3679,NIL,,\n", "2016-04-01 00:45:00,nan,nan,nan,nan,nan,nan,nan,nan,29.3679,NIL,,\n",
] ]
......
...@@ -32,7 +32,9 @@ def test_flagMad(spiky_data): ...@@ -32,7 +32,9 @@ def test_flagMad(spiky_data):
data = spiky_data[0] data = spiky_data[0]
field, *_ = data.columns field, *_ = data.columns
flags = initFlagsLike(data) flags = initFlagsLike(data)
qc = SaQC(data, flags).flagMAD(field, "1H", flag=BAD) qc = SaQC(data, flags).flagZScore(
field, window="1H", method="modified", thresh=3.5, flag=BAD
)
flag_result = qc.flags[field] flag_result = qc.flags[field]
test_sum = (flag_result[spiky_data[1]] == BAD).sum() test_sum = (flag_result[spiky_data[1]] == BAD).sum()
assert test_sum == len(spiky_data[1]) assert test_sum == len(spiky_data[1])
...@@ -127,34 +129,35 @@ def test_grubbs(dat): ...@@ -127,34 +129,35 @@ def test_grubbs(dat):
@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")]) @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
def test_flagCrossStatistics(dat): @pytest.mark.parametrize(
data1, characteristics = dat(initial_level=0, final_level=0, out_val=0) "parameters",
data2, characteristics = dat(initial_level=0, final_level=0, out_val=10) [("standard", 1), ("modified", 1), ("modified", 3), ("standard", "3h")],
fields = ["field1", "field2"] )
s1, s2 = data1["data"], data2["data"] def test_flagCrossStatistics(dat, parameters):
s1 = pd.Series(data=s1.values, index=s1.index) fields = [f"data{i}" for i in range(6)]
s2 = pd.Series(data=s2.values, index=s1.index) data = pd.DataFrame(
data = DictOfSeries(field1=s1, field2=s2) 0, columns=fields, index=pd.date_range("2000", freq="1h", periods=10)
)
bad_idx = (np.random.randint(0, 10), np.random.randint(0, 6))
data.iloc[bad_idx[0], bad_idx[1]] = 10
flags = initFlagsLike(data) flags = initFlagsLike(data)
qc = SaQC(data, flags).flagZScore(
fields, thresh=2, method=parameters[0], flag=BAD, axis=1, window=parameters[1]
)
with pytest.deprecated_call(): isflagged = qc.flags.to_pandas() > UNFLAGGED
qc = SaQC(data, flags).flagCrossStatistics( assert isflagged.iloc[bad_idx[0], bad_idx[1]]
fields, thresh=3, method=np.mean, flag=BAD assert isflagged.sum().sum() == 1
)
for field in fields:
isflagged = qc.flags[field] > UNFLAGGED
assert isflagged[characteristics["raise"]].all()
def test_flagZScores(): def test_flagZScoresUV():
np.random.seed(seed=1) np.random.seed(seed=1)
data = pd.Series( data = pd.DataFrame(
[np.random.normal() for k in range(100)], {"data": [np.random.normal() for k in range(100)]},
index=pd.date_range("2000", freq="1D", periods=100), index=pd.date_range("2000", freq="1D", periods=100),
name="data",
) )
data.iloc[[5, 80]] = 5 data.iloc[[5, 80], 0] = 5
data.iloc[[40]] = -6 data.iloc[[40], 0] = -6
qc = saqc.SaQC(data) qc = saqc.SaQC(data)
qc = qc.flagZScore("data", window=None) qc = qc.flagZScore("data", window=None)
...@@ -176,6 +179,39 @@ def test_flagZScores(): ...@@ -176,6 +179,39 @@ def test_flagZScores():
assert (qc.flags.to_pandas().iloc[[40, 80], 0] > 0).all() assert (qc.flags.to_pandas().iloc[[40, 80], 0] > 0).all()
def test_flagZScoresMV():
np.random.seed(seed=1)
data = pd.DataFrame(
{
"data": [np.random.normal() for k in range(100)],
"data2": [np.random.normal() for k in range(100)],
},
index=pd.date_range("2000", freq="1D", periods=100),
)
data.iloc[[5, 80], 0] = 5
data.iloc[[40], 0] = -6
data.iloc[[60], 1] = 10
qc = saqc.SaQC(data)
qc = qc.flagZScore(["data", "data2"], window=None)
assert (qc.flags.to_pandas().iloc[[5, 40, 80], 0] > 0).all()
assert (qc.flags.to_pandas().iloc[[60], 1] > 0).all()
qc = saqc.SaQC(data)
qc = qc.flagZScore("data", window=None, min_residuals=10)
assert (qc.flags.to_pandas()[["data", "data2"]] < 0).all().all()
qc = saqc.SaQC(data)
qc = qc.flagZScore(["data", "data2"], window="20D")
assert (qc.flags.to_pandas().iloc[[40, 80], 0] > 0).all()
qc = saqc.SaQC(data)
qc = qc.flagZScore("data", window=20)
assert (qc.flags.to_pandas().iloc[[40, 80], 0] > 0).all()
@pytest.mark.parametrize("n", [1, 10]) @pytest.mark.parametrize("n", [1, 10])
@pytest.mark.parametrize("p", [1, 2]) @pytest.mark.parametrize("p", [1, 2])
@pytest.mark.parametrize("thresh", ["auto", 2]) @pytest.mark.parametrize("thresh", ["auto", 2])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment