Skip to content
Snippets Groups Projects
Commit 953ff5b3 authored by David Schäfer's avatar David Schäfer
Browse files

Merge branch 'butterworth' into 'develop'

Add `fitButterworth`

See merge request !564
parents 907c3114 cfa4f98b
No related branches found
No related tags found
5 merge requests!685Release 2.4,!684Release 2.4,!567Release 2.2.1,!566Release 2.2,!564Add `fitButterworth`
Pipeline #124090 canceled with stages
......@@ -15,6 +15,7 @@ This changelog starts with version 2.0.0. Basically all parts of the system, inc
- translation of `dfilter`
- new generic function `clip`
- parameter `min_periods` to `SaQC.flagConstants`
- function `fitButterworth`
### Changed
- test function interface changed to `func(saqc: SaQC, field: str | Sequence[str], *args, **kwargs)`
- lib function `butterFilter` returns `NaN` for too-short series
......
......@@ -11,12 +11,14 @@ from typing import TYPE_CHECKING, Tuple, Union
import numpy as np
import pandas as pd
from typing_extensions import Literal
from dios import DictOfSeries
from saqc.core.flags import Flags
from saqc.core.register import register
from saqc.lib.tools import getFreqDelta
from saqc.lib.ts_operators import (
butterFilter,
polyRoller,
polyRollerIrregular,
polyRollerNoMissing,
......@@ -27,6 +29,18 @@ from saqc.lib.ts_operators import (
if TYPE_CHECKING:
from saqc.core.core import SaQC
_FILL_METHODS = Literal[
"linear",
"nearest",
"zero",
"slinear",
"quadratic",
"cubic",
"spline",
"barycentric",
"polynomial",
]
class CurvefitMixin:
@register(mask=["field"], demask=[], squeeze=[])
......@@ -61,7 +75,7 @@ class CurvefitMixin:
Parameters
----------
field : str
A column in flags and data.
A column in flags and data.
window : str, int
Size of the window you want to use for fitting. If an integer is passed,
......@@ -97,6 +111,54 @@ class CurvefitMixin:
)
return self
@register(mask=["field"], demask=[], squeeze=[])
def fitLowpassFilter(
self: "SaQC",
field: str,
cutoff: float | str,
nyq: float = 0.5,
filter_order: int = 2,
fill_method: _FILL_METHODS = "linear",
**kwargs,
):
"""
Fits the data using the butterworth filter.
Note
----
The data is expected to be regularly sampled.
Parameters
----------
field: str
A column in flags and data.
cutoff: {float, str}
The cutoff-frequency, either an offset freq string, or expressed in multiples of the sampling rate.
nyq: float
The niquist-frequency. expressed in multiples if the sampling rate.
fill_method: Literal[‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘spline’, ‘barycentric’, ‘polynomial’]
Fill method to be applied on the data before filtering (butterfilter cant
handle ''np.nan''). See documentation of pandas.Series.interpolate method for
details on the methods associated with the different keywords.
filter_type: Literal["lowpass", "highpass", "bandpass", "bandstop"]
The type of filter. Default is ‘lowpass’.
"""
self._data[field] = butterFilter(
self._data[field],
cutoff=cutoff,
nyq=nyq,
filter_order=filter_order,
fill_method=fill_method,
filter_type="lowpass",
)
return self
def _fitPolynomial(
data: DictOfSeries,
......
......@@ -459,7 +459,7 @@ def shift2Freq(
def butterFilter(
x, cutoff, nyq=0.5, filter_order=2, fill_method="linear", filter_type="low"
x, cutoff, nyq=0.5, filter_order=2, fill_method="linear", filter_type="lowpass"
):
"""
Applies butterworth filter.
......@@ -481,6 +481,8 @@ def butterFilter(
handle ''np.nan''). See documentation of pandas.Series.interpolate method for
details on the methods associated with the different keywords.
filter_type: Literal["lowpass", "highpass", "bandpass", "bandstop"]
The type of filter. Default is ‘lowpass’.
Returns
-------
......
......@@ -12,7 +12,6 @@ import numpy as np
import pandas as pd
import dios
from saqc.constants import BAD
from saqc.core import Flags
from saqc.core.history import History, createHistoryFromData
......
#! /usr/bin/env python
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
# see test/functs/fixtures.py for global fixtures "course_..."
import pytest
import dios
from saqc import BAD, UNFLAGGED, SaQC
from saqc.core import initFlagsLike
from tests.fixtures import char_dict, course_1, course_2
@pytest.mark.filterwarnings("ignore: The fit may be poorly conditioned")
@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
def test_modelling_polyFit_forRegular(dat):
data, _ = dat(
freq="10min", periods=30, initial_level=0, final_level=100, out_val=-100
)
# add some nice sine distortion
data = data + 10 * np.sin(np.arange(0, len(data.indexes[0])))
data = dios.DictOfSeries(data)
flags = initFlagsLike(data)
qc1 = SaQC(data, flags).calculatePolynomialResiduals("data", 11, 2, numba=False)
qc2 = SaQC(data, flags).calculatePolynomialResiduals("data", 11, 2, numba=True)
assert (qc1.data["data"] - qc2.data["data"]).abs().max() < 10**-10
qc3 = SaQC(data, flags).calculatePolynomialResiduals(
"data", "110min", 2, numba=False
)
assert qc3.data["data"].equals(qc1.data["data"])
qc4 = SaQC(data, flags).calculatePolynomialResiduals(
"data", 11, 2, numba=True, min_periods=11
)
assert (qc4.data["data"] - qc2.data["data"]).abs().max() < 10**-10
data.iloc[13:16] = np.nan
qc5 = SaQC(data, flags).calculatePolynomialResiduals(
"data", 11, 2, numba=True, min_periods=9
)
assert qc5.data["data"].iloc[10:19].isna().all()
@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
def test_modelling_rollingMean_forRegular(dat):
data, _ = dat(
freq="10min", periods=30, initial_level=0, final_level=100, out_val=-100
)
data = dios.DictOfSeries(data)
flags = initFlagsLike(data)
qc = SaQC(data, flags)
qc.calculateRollingResiduals(
"data",
5,
func=np.mean,
min_periods=0,
center=True,
)
qc.calculateRollingResiduals(
"data",
5,
func=np.mean,
min_periods=0,
center=False,
)
@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_1")])
def test_modelling_mask(dat):
data, _ = dat()
data = dios.DictOfSeries(data)
flags = initFlagsLike(data)
qc = SaQC(data, flags)
field = "data"
# set flags everywhere to test unflagging
flags[:, field] = BAD
common = dict(field=field, mode="periodic")
result = qc.selectTime(**common, start="20:00", end="40:00", closed=False)
flagscol = result.flags[field]
m = (20 > flagscol.index.minute) | (flagscol.index.minute > 40)
assert all(result.flags[field][m] == UNFLAGGED)
assert all(result.data[field][m].isna())
result = qc.selectTime(**common, start="15:00:00", end="02:00:00")
flagscol = result.flags[field]
m = (15 <= flagscol.index.hour) & (flagscol.index.hour <= 2)
assert all(result.flags[field][m] == UNFLAGGED)
assert all(result.data[field][m].isna())
result = qc.selectTime(**common, start="03T00:00:00", end="10T00:00:00")
flagscol = result.flags[field]
m = (3 <= flagscol.index.hour) & (flagscol.index.hour <= 10)
assert all(result.flags[field][m] == UNFLAGGED)
assert all(result.data[field][m].isna())
mask_ser = pd.Series(False, index=data["data"].index)
mask_ser[::5] = True
data["mask_ser"] = mask_ser
flags = initFlagsLike(data)
result = SaQC(data, flags).selectTime(
"data", mode="selection_field", selection_field="mask_ser"
)
m = mask_ser
assert all(result.flags[field][m] == UNFLAGGED)
assert all(result.data[field][m].isna())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment