Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • berntm/saqc
  • rdm-software/saqc
  • schueler/saqc
3 results
Show changes
......@@ -33,7 +33,7 @@ setup(
name=name,
version=versioneer.get_version(), # keep this line as it is
cmdclass=versioneer.get_cmdclass(), # keep this line as it is
author="Bert Palm, David Schaefer, Florian Gransee, Peter Luenenschloss",
author="David Schaefer, Bert Palm, Peter Luenenschloss",
author_email="david.schaefer@ufz.de",
description="A timeseries data quality control and processing tool/framework",
long_description=long_description,
......@@ -55,6 +55,9 @@ setup(
"scipy",
"typing_extensions",
],
extras_require={
"FM": ["momentfm"],
},
license_files=("LICENSE.md", "LICENSES/GPL-3.0-or-later.txt"),
entry_points={
"console_scripts": ["saqc=saqc.__main__:main"],
......
......@@ -13,7 +13,8 @@ import pandas as pd
import pytest
from saqc import BAD, FILTER_ALL, FILTER_NONE, UNFLAGGED, SaQC
from saqc.core import DictOfSeries, Flags, flagging, initFlagsLike, processing, register
from saqc.core import DictOfSeries, Flags, flagging, processing, register
from saqc.core.flags import initFlagsLike
from saqc.lib.types import OptionalNone
from tests.common import initData
......
......@@ -206,7 +206,7 @@ def test_set_flags(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]
@pytest.mark.parametrize("data", testdata)
def test_set_flags_with_mask(
data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]
data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]],
):
flags = Flags(data)
......@@ -253,7 +253,7 @@ def test_set_flags_with_mask(
@pytest.mark.parametrize("data", testdata)
def test_set_flags_with_index(
data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]
data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]],
):
flags = Flags(data)
......
#! /usr/bin/env python
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
# -*- coding: utf-8 -*-
#! /usr/bin/env python
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import pytest
from saqc import SaQC
@pytest.fixture
def data():
dat = pd.DataFrame(
{"d" + str(k): np.random.random(1000) for k in range(2)},
index=pd.date_range("2000", freq="10min", periods=1000),
)
dat.iloc[np.random.randint(0, 1000, 10), 0] = np.nan
return dat
@pytest.mark.parametrize("field", ["d0", ["d1", "d0"]])
@pytest.mark.parametrize("ratio", [2, 4])
@pytest.mark.parametrize("context", [512, 256])
def test_fitFMmoment(data, field, ratio, context):
qc = SaQC(data)
qc.fitMomentFM(field, ratio, context)
......@@ -7,10 +7,11 @@
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import pytest
from saqc import BAD, UNFLAGGED
from saqc.core import SaQC, initFlagsLike
from saqc import BAD, UNFLAGGED, SaQC
from saqc.core.flags import initFlagsLike
from tests.common import initData
......@@ -24,6 +25,15 @@ def data():
return constants_data
@pytest.fixture
def data_const_tail():
constants_data = pd.DataFrame(
{"a": [1, 2, 3, 4, 5, 9, 9, 9, 9, 9]},
index=pd.date_range("2000", freq="1h", periods=10),
)
return constants_data
def test_constants_flagBasic(data):
field, *_ = data.columns
flags = initFlagsLike(data)
......@@ -35,6 +45,16 @@ def test_constants_flagBasic(data):
assert np.all(flagscol[25 + 1 :] == UNFLAGGED)
@pytest.mark.parametrize("window", [3, "3h", 5, "5h"])
def test_constants_tail(data_const_tail, window):
field, *_ = data_const_tail.columns
qc = SaQC(data_const_tail)
qc = qc.flagConstants(field, thresh=1, window=window, flag=BAD)
flagscol = qc._flags[field]
assert np.all(flagscol[-5:] == BAD)
assert np.all(flagscol[:-5] == UNFLAGGED)
def test_constants_flagVarianceBased(data):
field, *_ = data.columns
flags = initFlagsLike(data)
......
......@@ -10,9 +10,8 @@ import numpy as np
import pandas as pd
import pytest
import saqc
from saqc import BAD, DOUBTFUL, UNFLAGGED, SaQC
from saqc.core import DictOfSeries, initFlagsLike
from saqc import BAD, DOUBTFUL, UNFLAGGED, DictOfSeries, SaQC
from saqc.core.flags import initFlagsLike
from tests.common import initData
from tests.fixtures import char_dict, course_1 # noqa, todo: fix fixtures
......
......@@ -12,8 +12,9 @@ import numpy as np
import pandas as pd
import pytest
from saqc import BAD, UNFLAGGED, SaQC
from saqc.core import DictOfSeries, Flags, initFlagsLike, register
from saqc import BAD, UNFLAGGED, DictOfSeries, Flags, SaQC
from saqc.core import register
from saqc.core.flags import initFlagsLike
from saqc.funcs.generic import _execGeneric
from saqc.parsing.reader import _ConfigReader
from saqc.parsing.visitor import ConfigFunctionParser
......
......@@ -13,8 +13,8 @@ import pandas as pd
import pytest
import saqc
from saqc import BAD, UNFLAGGED
from saqc.core import DictOfSeries, SaQC, initFlagsLike
from saqc import BAD, UNFLAGGED, DictOfSeries, SaQC
from saqc.core.flags import initFlagsLike
from tests.fixtures import char_dict, course_1, course_2, course_3, course_4
......@@ -205,13 +205,16 @@ def test_flagZScoresMV():
assert (qc.flags.to_pandas().iloc[[40, 80], 0] > 0).all()
@pytest.mark.filterwarnings("ignore:Number of distinct clusters")
@pytest.mark.parametrize("n", [1, 10])
@pytest.mark.parametrize("p", [1, 2])
@pytest.mark.parametrize("thresh", ["auto", 2])
def test_flagUniLOF(spiky_data, n, p, thresh):
@pytest.mark.parametrize(
"cutoff", [{}, {"probability": 0.99}, {"thresh": "auto"}, {"thresh": 2}]
)
def test_flagUniLOF(spiky_data, n, p, cutoff):
data = spiky_data[0]
field, *_ = data.columns
qc = SaQC(data).flagUniLOF(field, n=n, p=p, thresh=thresh)
qc = SaQC(data).flagUniLOF(field, n=n, p=p, **cutoff)
flag_result = qc.flags[field]
test_sum = (flag_result.iloc[spiky_data[1]] == BAD).sum()
assert test_sum == len(spiky_data[1])
......
......@@ -6,11 +6,14 @@
# -*- coding: utf-8 -*-
import os
import numpy as np
import pandas as pd
import pytest
from saqc import BAD, UNFLAGGED, SaQC
from saqc.core import DictOfSeries, initFlagsLike
from saqc import BAD, UNFLAGGED, DictOfSeries, SaQC
from saqc.core.flags import initFlagsLike
from tests.common import initData
......@@ -24,6 +27,68 @@ def field(data):
return data.columns[0]
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_flagPlateau():
path = os.path.join(
os.path.abspath(""), "docs/resources/data/turbidity_plateaus.csv"
)
dat = pd.read_csv(path, parse_dates=[0], index_col=0)
dat = dat.interpolate("linear")
dat = dat.ffill().bfill()
qc = SaQC(dat)
qc = qc.flagPlateau(
"base3", min_length="10min", max_length="7d", granularity="20min"
)
anomalies = [
(0, 0),
(5313, 5540),
(10000, 10200),
(15000, 15500),
(17000, 17114),
(17790, 17810),
]
f = qc["base3"].flags.to_pandas().squeeze() > 0
for i in range(1, len(anomalies)):
a_slice = slice(anomalies[i][0], anomalies[i][1])
na_slice = slice(anomalies[i - 1][1], anomalies[i][0])
assert f.iloc[a_slice].all()
assert not (f.iloc[na_slice].any())
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_flagPlateau_long():
path = os.path.join(
os.path.abspath(""), "docs/resources/data/turbidity_plateaus.csv"
)
dat = pd.read_csv(path, parse_dates=[0], index_col=0)
dat = dat.interpolate("linear")
dat = dat.ffill().bfill()
_long = np.append(dat.values, [dat.values] * 10)
dat = pd.Series(
_long,
index=pd.date_range("2000", freq="10min", periods=len(_long)),
name="base3",
)
qc = SaQC(dat)
qc = qc.flagPlateau(
"base3", min_length="10min", max_length="7d", granularity="20min"
)
anomalies = [
(0, 0),
(5313, 5540),
(10000, 10200),
(15000, 15500),
(17000, 17114),
(17790, 17810),
]
f = qc["base3"].flags.to_pandas().squeeze() > 0
for i in range(1, len(anomalies)):
a_slice = slice(anomalies[i][0], anomalies[i][1])
na_slice = slice(anomalies[i - 1][1], anomalies[i][0])
assert f.iloc[a_slice].all()
assert not (f.iloc[na_slice].any())
@pytest.mark.parametrize("plot", [True, False])
@pytest.mark.parametrize("normalize", [True, False])
def test_flagPattern_dtw(plot, normalize):
......
......@@ -14,12 +14,33 @@ import pandas as pd
import pytest
import saqc
from saqc import UNFLAGGED, SaQC
from saqc.core import DictOfSeries, initFlagsLike
from saqc import UNFLAGGED, DictOfSeries, SaQC
from saqc.core.flags import initFlagsLike
from saqc.lib.ts_operators import linearInterpolation, polynomialInterpolation
from tests.fixtures import char_dict, course_3, course_5 # noqa, todo: fix fixtures
@pytest.mark.parametrize(
("window", "center", "expected"),
[
(1, True, [3, 2, 3, 2]),
(2, False, [np.nan, 5, 5, 5]),
(3, True, [np.nan, 8, 7, np.nan]),
("20min", True, [5, 5, 5, np.nan]),
],
)
def test_multivariateRolling(window, center, expected):
data = pd.DataFrame(
{"a": [1, np.nan, 3, 4], "b": [1, 2, 3, 4], "c": [1, 2, 3, np.nan]},
index=pd.date_range("2000", periods=4, freq="10min"),
)
qc = saqc.SaQC(data)
qc = qc.rolling(
["a", "b", "c"], func="count", target="count", window=window, center=center
)
assert np.array_equal(qc.data["count"].values, expected, equal_nan=True)
def test_rollingInterpolateMissing(course_5):
data, characteristics = course_5(periods=10, nan_slice=[5, 6])
field = data.columns[0]
......
......@@ -14,7 +14,7 @@ import pandas as pd
import pytest
from saqc import SaQC
from saqc.core import initFlagsLike
from saqc.core.flags import initFlagsLike
from saqc.funcs.resampling import (
_aggregationGrouper,
_constructAggregationReindexer,
......
......@@ -23,8 +23,8 @@ from hypothesis.strategies import (
)
from hypothesis.strategies._internal.types import _global_type_lookup
from saqc import BAD
from saqc.core import DictOfSeries, initFlagsLike
from saqc import BAD, DictOfSeries
from saqc.core.flags import initFlagsLike
from saqc.core.register import FUNC_MAP
MAX_EXAMPLES = 50
......