Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • berntm/saqc
  • rdm-software/saqc
  • schueler/saqc
3 results
Show changes
......@@ -143,13 +143,13 @@ class InterpolationMixin:
field: str,
method: _SUPPORTED_METHODS,
order: int = 2,
limit: int = 2,
downgrade: bool = False,
limit: int | None = None,
extrapolate: Literal["forward", "backward", "both"] = None,
flag: float = UNFLAGGED,
**kwargs,
) -> "SaQC":
"""
Function to interpolate nan values in the data.
Function to interpolate nan values in data.
There are available all the interpolation methods from the pandas.interpolate method and they are applicable by
the very same key words, that you would pass to the ``pd.Series.interpolate``'s method parameter.
......@@ -167,9 +167,11 @@ class InterpolationMixin:
If there your selected interpolation method can be performed at different 'orders' - here you pass the desired
order.
limit : int, default 2
Maximum number of consecutive 'nan' values allowed for a gap to be interpolated. This really restricts the
interpolation to chunks, containing not more than `limit` successive nan entries.
limit : int or str, default None
Upper limit of missing index values (with respect to `freq`) to fill. The limit can either be expressed
as the number of consecutive missing values (integer) or temporal extension of the gaps to be filled
(Offset String).
If `None` is passed, no Limit is set.
flag : float or None, default UNFLAGGED
Flag that is set for interpolated values. If ``None``, no flags are set at all.
......@@ -187,8 +189,8 @@ class InterpolationMixin:
self._data[field],
method,
order=order,
inter_limit=limit,
downgrade_interpolation=downgrade,
gap_limit=limit,
extrapolate=extrapolate,
)
interpolated = self._data[field].isna() & inter_data.notna()
......@@ -210,15 +212,12 @@ class InterpolationMixin:
freq: str,
method: _SUPPORTED_METHODS,
order: int = 2,
limit: int = 2,
downgrade: bool = False,
limit: int | None = 2,
extrapolate: Literal["forward", "backward", "both"] = None,
**kwargs,
) -> "SaQC":
"""
Function to interpolate the data at regular (equidistant) timestamps (or Grid points).
Note, that the interpolation will only be calculated, for grid timestamps that have a preceding AND a succeeding
valid data value within "freq" range.
Function to interpolate the data at regular (äquidistant) timestamps (or Grid points).
Parameters
----------
......@@ -234,18 +233,22 @@ class InterpolationMixin:
The interpolation method you want to apply.
order : int, default 2
If there your selected interpolation method can be performed at different 'orders' - here you pass the desired
If your selected interpolation method can be performed at different 'orders' - here you pass the desired
order.
limit : int, default 2
Maximum number of consecutive 'nan' values allowed for a gap to be interpolated. This really restricts the
interpolation to chunks, containing not more than `limit` successive nan entries.
limit : int, optional
Upper limit of missing index values (with respect to `freq`) to fill. The limit can either be expressed
as the number of consecutive missing values (integer) or temporal extension of the gaps to be filled
(Offset String).
If `None` is passed, no Limit is set.
downgrade : bool, default False
If `True` and the interpolation can not be performed at current order, retry with a lower order.
This can happen, because the chosen ``method`` does not support the passed ``order``, or
simply because not enough values are present in a interval.
extraplate : {'forward', 'backward', 'both'}, default None
Use parameter to perform extrapolation instead of interpolation onto the trailing and/or leading chunks of
NaN values in data series.
* 'None' (default) - perform interpolation
* 'forward'/'backward' - perform forward/backward extrapolation
* 'both' - perform forward and backward extrapolation
Returns
-------
......@@ -283,8 +286,8 @@ class InterpolationMixin:
data=datcol,
method=method,
order=order,
inter_limit=limit,
downgrade_interpolation=downgrade,
gap_limit=limit,
extrapolate=extrapolate,
)
# override falsely interpolated values:
......@@ -307,7 +310,7 @@ class InterpolationMixin:
"method": method,
"order": order,
"limit": limit,
"downgrade": downgrade,
"extrapolate": extrapolate,
**kwargs,
},
}
......
......@@ -334,9 +334,10 @@ class ResamplingMixin:
"inverse_interpolation",
"match",
] = "match",
freq: Optional[str] = None,
drop: Optional[bool] = False,
squeeze: Optional[bool] = False,
freq: str | None = None,
drop: bool = False,
squeeze: bool = False,
overwrite: bool = False,
**kwargs,
) -> "SaQC":
"""
......@@ -394,6 +395,10 @@ class ResamplingMixin:
If set to `True`, the appended flags frame will be squeezed - resulting in function specific flags informations
getting lost.
overwrite: bool, default False
If set to True, the newly appended flags will overwrite exsiting flags. This might result in a loss of previous
flagging information.
Returns
-------
saqc.SaQC
......@@ -449,17 +454,24 @@ class ResamplingMixin:
raise ValueError(f"unknown method {method}")
history = self._flags.history[field].apply(dummy.index, func, func_kws)
if overwrite is False:
mask = _isflagged(self._flags[target], thresh=kwargs["dfilter"])
history._hist[mask] = np.nan
if squeeze:
history = history.squeeze(raw=True)
meta = {
"func": f"concatFlags({field})",
"args": (field, target),
"func": f"concatFlags",
"args": (field,),
"kwargs": {
"target": target,
"method": method,
"freq": freq,
"drop": drop,
"squeeze": squeeze,
"overwrite": overwrite,
**kwargs,
},
}
......
......@@ -234,6 +234,7 @@ class ToolsMixin:
xscope: Optional[slice] = None,
phaseplot: Optional[str] = None,
store_kwargs: Optional[dict] = None,
ax: mpl.axes.Axes | None = None,
ax_kwargs: Optional[dict] = None,
dfilter: float = FILTER_NONE,
**kwargs,
......@@ -297,7 +298,6 @@ class ToolsMixin:
"""
data, flags = self._data.copy(), self._flags.copy()
interactive = path is None
level = kwargs.get("flag", UNFLAGGED)
if dfilter < np.inf:
......@@ -309,9 +309,8 @@ class ToolsMixin:
if ax_kwargs is None:
ax_kwargs = {}
if interactive:
if not path:
mpl.use(_MPL_DEFAULT_BACKEND)
else:
mpl.use("Agg")
......@@ -324,13 +323,14 @@ class ToolsMixin:
history=history,
xscope=xscope,
phaseplot=phaseplot,
ax=ax,
ax_kwargs=ax_kwargs,
)
if interactive:
if ax is None and not path:
plt.show()
else:
if path:
if store_kwargs.pop("pickle", False):
with open(path, "wb") as f:
pickle.dump(fig, f)
......
......@@ -6,6 +6,8 @@
# -*- coding: utf-8 -*-
from __future__ import annotations
import itertools
from typing import Optional, Union
......@@ -58,11 +60,12 @@ def makeFig(
field: str,
flags: Flags,
level: float,
max_gap: Optional[str] = None,
history: Union[Optional[Literal["valid", "complete"]], list] = "valid",
xscope: Optional[slice] = None,
phaseplot: Optional[str] = None,
ax_kwargs: Optional[dict] = None,
max_gap: str | None = None,
history: Literal["valid", "complete"] | None | list[str] = "valid",
xscope: slice | None = None,
phaseplot: str | None = None,
ax: mpl.axes.Axes | None = None,
ax_kwargs: dict | None = None,
):
"""
Returns a figure object, containing data graph with flag marks for field.
......@@ -152,9 +155,10 @@ def makeFig(
d = _insertBlockingNaNs(d, max_gap)
# figure composition
fig = mpl.pyplot.figure(constrained_layout=True, **FIG_KWARGS)
grid = fig.add_gridspec()
ax = fig.add_subplot(grid[0])
if ax is None:
fig = mpl.pyplot.figure(constrained_layout=True, **FIG_KWARGS)
grid = fig.add_gridspec()
ax = fig.add_subplot(grid[0])
_plotVarWithFlags(
ax,
......@@ -172,7 +176,7 @@ def makeFig(
)
plt.rcParams["font.size"] = default
return fig
return ax.figure
def _plotVarWithFlags(
......
......@@ -276,91 +276,129 @@ def meanQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
)
def interpolateNANs(
data, method, order=2, inter_limit=2, downgrade_interpolation=False
def _interpolWrapper(
x, order=1, method="time", limit_area="inside", limit_direction=None
):
"""
Function that automatically modifies the interpolation level or returns uninterpolated
input data if the data configuration breaks the interpolation method at the selected degree.
"""
min_vals_dict = {
"nearest": 2,
"slinear": 2,
"quadratic": 3,
"cubic": 4,
"spline": order + 1,
"polynomial": order + 1,
"piecewise_polynomial": 2,
"pchip": 2,
"akima": 2,
"cubicspline": 2,
}
min_vals = min_vals_dict.get(method, 0)
if (x.size < 3) | (x.count() < min_vals):
return x
else:
return x.interpolate(
method=method,
order=order,
limit_area=limit_area,
limit_direction=limit_direction,
)
def interpolateNANs(data, method, order=2, gap_limit=2, extrapolate=None):
"""
The function interpolates nan-values (and nan-grids) in timeseries data. It can
be passed all the method keywords from the pd.Series.interpolate method and will
than apply this very methods. Note, that the limit keyword really restricts
the interpolation to chunks, not containing more than "limit" nan entries (
the interpolation to gaps, not containing more than "limit" nan entries (
thereby not being identical to the "limit" keyword of pd.Series.interpolate).
:param data: pd.Series or np.array. The data series to be interpolated
:param method: String. Method keyword designating interpolation method to use.
:param order: Integer. If your desired interpolation method needs an order to be passed -
here you pass it.
:param inter_limit: Integer. Default = 2. Limit up to which consecutive nan - values in the data get
replaced by interpolation.
:param gap_limit: Integer or Offset String. Default = 2.
Number up to which consecutive nan - values in the data get
replaced by interpolated values.
Its default value suits an interpolation that only will apply to points of an
inserted frequency grid. (regularization by interpolation)
Gaps wider than "limit" will NOT be interpolated at all.
:param downgrade_interpolation: Boolean. Default False. If True:
Gaps of size "limit" or greater will NOT be interpolated at all.
:param extrapolate: Str or None. Default None. If True:
If a data chunk not contains enough values for interpolation of the order "order",
the highest order possible will be selected for that chunks interpolation.
:return:
"""
inter_limit = int(inter_limit)
data = pd.Series(data, copy=True)
gap_mask = data.isna().rolling(inter_limit, min_periods=0).sum() != inter_limit
if inter_limit == 2:
gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
# helper variable for checking numerical value of gap limit, if its a numeric value (to avoid comparison to str)
gap_check = np.nan if isinstance(gap_limit, str) else gap_limit
data = pd.Series(data, copy=True)
limit_area = "inside" if not extrapolate else "outside"
if gap_check is None:
# if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps
gap_mask = pd.Series(True, index=data.index, name=data.name)
else:
gap_mask = (
gap_mask.replace(True, np.nan)
.fillna(method="bfill", limit=inter_limit)
.replace(np.nan, True)
.astype(bool)
)
if gap_check < 2:
# breaks execution down the line and is thus catched here since it basically means "do nothing"
return data
else:
# if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right
# side of each too-large gap with a rolling.sum combo
gap_mask = data.rolling(gap_limit, min_periods=0).count() > 0
# correction for initial gap
if isinstance(gap_limit, int):
gap_mask.iloc[:gap_limit] = True
if gap_limit == 2:
# for the common case of gap_limit=2 (default "harmonisation"), we efficiently back propagate the False
# value to fill the whole too-large gap by a shift and a conjunction.
gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
else:
# If the gap_size is bigger we make a flip-rolling combo to backpropagate the False values
gap_mask = ~(
(~gap_mask[::-1]).rolling(gap_limit, min_periods=0).sum() > 0
)[::-1]
# memorizing the index for later reindexing
pre_index = data.index
if data[gap_mask].empty:
# drop the gaps that are too large with regard to the gap_limit from the data-to-be interpolated
data = data[gap_mask]
if data.empty:
return data
else:
data = data[gap_mask]
if method in ["linear", "time"]:
# in the case of linear interpolation, not much can go wrong/break so this conditional branch has efficient
# finish by just calling pandas interpolation routine to fill the gaps remaining in the data:
data.interpolate(
method=method, inplace=True, limit=inter_limit - 1, limit_area="inside"
method=method,
inplace=True,
limit_area=limit_area,
limit_direction=extrapolate,
)
else:
dat_name = data.name
gap_mask = (~gap_mask).cumsum()
data = pd.merge(gap_mask, data, how="inner", left_index=True, right_index=True)
def _interpolWrapper(x, wrap_order=order, wrap_method=method):
if wrap_order < 0:
return x
elif x.count() > wrap_order:
try:
return x.interpolate(method=wrap_method, order=int(wrap_order))
except (NotImplementedError, ValueError):
warnings.warn(
f"Interpolation with method {method} is not supported at order "
f"{wrap_order}. and will be performed at order {wrap_order - 1}"
)
return _interpolWrapper(x, int(wrap_order - 1), wrap_method)
elif x.size < 3:
return x
else:
if downgrade_interpolation:
return _interpolWrapper(x, int(x.count() - 1), wrap_method)
else:
return x
data = data.groupby(data.columns[0]).transform(_interpolWrapper)
# squeezing the 1-dimensional frame resulting from groupby for consistency
# reasons
data = data.squeeze(axis=1)
data.name = dat_name
# if the method that is interpolated with, depends on not only the left and right border points of any gap,
# but includes more points, it has to be applied on any data chunk seperated by the too-big gaps individually.
# So we use the gap_mask to group the data into chunks and perform the interpolation on every chunk seperatly
# with the .transform method of the grouper.
gap_mask = (~gap_mask).cumsum()[data.index]
chunk_groups = data.groupby(by=gap_mask)
data = chunk_groups.transform(
_interpolWrapper,
**{
"order": order,
"method": method,
"limit_area": limit_area,
"limit_direction": extrapolate,
},
)
# finally reinsert the dropped data gaps
data = data.reindex(pre_index)
return data
......@@ -599,10 +637,8 @@ def linearDriftModel(x, origin, target):
def linearInterpolation(data, inter_limit=2):
return interpolateNANs(data, "time", inter_limit=inter_limit)
return interpolateNANs(data, "time", gap_limit=inter_limit)
def polynomialInterpolation(data, inter_limit=2, inter_order=2):
return interpolateNANs(
data, "polynomial", inter_limit=inter_limit, order=inter_order
)
return interpolateNANs(data, "polynomial", gap_limit=inter_limit, order=inter_order)
......@@ -4,4 +4,4 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
__version__ = "2.2.1"
__version__ = "2.3"
......@@ -28,13 +28,13 @@ setup(
long_description_content_type="text/markdown",
url="https://git.ufz.de/rdm-software/saqc",
packages=find_packages(exclude=("tests", "docs")),
python_requires=">=3.7",
python_requires=">=3.8",
install_requires=[
"Click",
"dtw",
"matplotlib>=3.4",
"numba",
"numpy",
"numpy<1.24",
"outlier-utils",
"pyarrow",
"pandas>=1.2,<1.5",
......
......@@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
import pytest
from pandas.api.types import is_categorical_dtype, is_float_dtype
from saqc.core.history import History, createHistoryFromData
from tests.common import dummyHistory
......@@ -75,7 +76,10 @@ def check_invariants(hist):
assert isinstance(hist.hist, pd.DataFrame)
assert isinstance(hist.meta, list)
assert all(
[isinstance(dtype, (float, pd.CategoricalDtype)) for dtype in hist.hist.dtypes]
[
is_float_dtype(dtype) or is_categorical_dtype(dtype)
for dtype in hist.hist.dtypes
]
)
assert all([isinstance(e, dict) for e in hist.meta])
assert hist.columns is hist.hist.columns
......
......@@ -16,7 +16,7 @@ import pytest
from saqc.constants import BAD, DOUBTFUL, FILTER_NONE, UNFLAGGED
from saqc.core.core import SaQC
from saqc.core.flags import Flags
from saqc.core.translation import DmpScheme, PositionalScheme, TranslationScheme
from saqc.core.translation import DmpScheme, MappingScheme, PositionalScheme
from tests.common import initData
......@@ -27,7 +27,7 @@ def _genTranslators():
dtype(-1): BAD,
**{dtype(f * 10): float(f) for f in range(10)},
}
scheme = TranslationScheme(flags, {v: k for k, v in flags.items()})
scheme = MappingScheme(flags, {v: k for k, v in flags.items()})
yield flags, scheme
......@@ -60,7 +60,7 @@ def test_backwardTranslation():
for _, scheme in _genTranslators():
keys = tuple(scheme._backward.keys())
flags = _genFlags({field: np.array(keys)})
translated = scheme.backward(flags)
translated = scheme.toExternal(flags)
expected = set(scheme._backward.values())
assert not (set(translated[field]) - expected)
......@@ -72,7 +72,7 @@ def test_backwardTranslationFail():
# add an scheme invalid value to the flags
flags = _genFlags({field: np.array(keys + (max(keys) + 1,))})
with pytest.raises(ValueError):
scheme.backward(flags)
scheme.toExternal(flags)
def test_dmpTranslator():
......@@ -94,7 +94,7 @@ def test_dmpTranslator():
{"func": "flagFoo", "kwargs": {"cause": "BELOW_OR_ABOVE_MIN_MAX"}}
)
tflags = scheme.backward(flags)
tflags = scheme.toExternal(flags)
assert set(tflags.columns.get_level_values(1)) == {
"quality_flag",
......@@ -137,7 +137,7 @@ def test_positionalTranslator():
flags[1::3, "var1"] = DOUBTFUL
flags[2::3, "var1"] = BAD
tflags = scheme.backward(flags)
tflags = scheme.toExternal(flags)
assert (tflags["var2"].replace(-9999, np.nan).dropna() == 90).all(axis=None)
assert (tflags["var1"].iloc[1::3] == 90210).all(axis=None)
assert (tflags["var1"].iloc[2::3] == 90002).all(axis=None)
......@@ -156,7 +156,7 @@ def test_positionalTranslatorIntegration():
for field in flags.columns:
assert flags[field].astype(str).str.match("^9[012]*$").all()
round_trip = scheme.backward(scheme.forward(flags))
round_trip = scheme.toExternal(scheme.toInternal(flags))
assert (flags.values == round_trip.values).all()
assert (flags.index == round_trip.index).all()
......@@ -183,7 +183,7 @@ def test_dmpTranslatorIntegration():
assert qfunc.isin({"", "flagMissing", "flagRange"}).all(axis=None)
assert (qcause[qflags[col] == "BAD"] == "OTHER").all(axis=None)
round_trip = scheme.backward(scheme.forward(flags))
round_trip = scheme.toExternal(scheme.toInternal(flags))
assert round_trip.xs("quality_flag", axis="columns", level=1).equals(qflags)
......@@ -276,8 +276,8 @@ def test_positionalMulitcallsPreserveState():
scheme = PositionalScheme()
flags1 = saqc1._flags
flags2 = saqc2._flags
tflags1 = scheme.backward(flags1).astype(str)
tflags2 = scheme.backward(flags2).astype(str)
tflags1 = scheme.toExternal(flags1).astype(str)
tflags2 = scheme.toExternal(flags2).astype(str)
for k in flags2.columns:
expected = tflags1[k].str.slice(start=1) * 2
......
......@@ -101,7 +101,9 @@ def test_flagSesonalRange(data, field):
flag=BAD,
)
qc = qc.flagRange(newfield, min=test["min"], max=test["max"], flag=BAD)
qc = qc.concatFlags(newfield, method="match", target=field, flag=BAD)
qc = qc.concatFlags(
newfield, method="match", target=field, flag=BAD, overwrite=True
)
qc = qc.dropField(newfield)
flagged = qc._flags[field] > UNFLAGGED
assert flagged.sum() == expected
......@@ -284,9 +286,10 @@ def test_transferFlags():
data = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]})
qc = saqc.SaQC(data)
qc = qc.flagRange("a", max=1.5)
qc = qc.transferFlags(["a", "a"], ["b", "c"])
assert np.all(qc.flags["b"].values == np.array([UNFLAGGED, BAD]))
assert np.all(qc.flags["c"].values == np.array([UNFLAGGED, BAD]))
with pytest.deprecated_call():
qc = qc.transferFlags(["a", "a"], ["b", "c"])
assert np.all(qc.flags["b"].values == np.array([UNFLAGGED, BAD]))
assert np.all(qc.flags["c"].values == np.array([UNFLAGGED, BAD]))
def test_flagJumps():
......
......@@ -124,7 +124,7 @@ def test_gridInterpolation(data, method, fill_history):
res = qc.interpolate(
field,
freq,
order=10,
order=9,
method=method,
downcast_interpolation=True,
)
......@@ -375,3 +375,55 @@ def test_harmSingleVarInterpolationShift(data, params, expected):
qc = qc.dropField(h_field)
assert qc.data[field].equals(pre_data[field])
assert qc.flags[field].equals(pre_flags[field])
def test_concatFlags():
index = pd.to_datetime(
[
"2020-01-01 00:00",
"2020-01-01 00:10",
"2020-01-01 00:30",
"2020-01-01 00:40",
"2020-01-01 01:00",
]
)
df = pd.DataFrame(
data={
"a": [
1,
2,
5,
4,
3,
]
},
index=index,
)
qc = SaQC(df)
qc = qc.flagRange(field="a", max=4)
# branch out to another variable
qc = qc.flagRange(field="a", target="b", max=3)
# bring the flags back again
qc_overwrite = qc.concatFlags("b", target="a", overwrite=True, squeeze=True)
hist_overwrite = qc_overwrite._flags.history["a"].hist.astype(float)
assert hist_overwrite[0].equals(
pd.Series([np.nan, np.nan, 255.0, np.nan, np.nan], index=index)
)
assert hist_overwrite[1].equals(
pd.Series([np.nan, np.nan, 255.0, 255.0, np.nan], index=index)
)
# bring the flags back again
qc_respect = qc.concatFlags("b", target="a", overwrite=False, squeeze=True)
hist_respect = qc_respect._flags.history["a"].hist.astype(float)
assert hist_respect[0].equals(
pd.Series([np.nan, np.nan, 255.0, np.nan, np.nan], index=index)
)
assert hist_respect[1].equals(
pd.Series([np.nan, np.nan, np.nan, 255.0, np.nan], index=index)
)
......@@ -2,6 +2,8 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
from pathlib import Path
import numpy as np
import pandas as pd
import pytest
......@@ -11,7 +13,7 @@ import saqc
@pytest.mark.slow
def test_makeFig():
def test_makeFig(tmp_path):
# just testing for no errors to occure...
data = dios.DictOfSeries(
pd.Series(
......@@ -28,11 +30,11 @@ def test_makeFig():
)
# not interactive, no storing
dummy_path = ""
outfile = str(Path(tmp_path, "test.png")) # the filesystem's temp dir
d_saqc = d_saqc.plot(field="data", path="")
d_saqc = d_saqc.plot(field="data", path=dummy_path, history="valid", stats=True)
d_saqc = d_saqc.plot(field="data", path=dummy_path, history="complete")
d_saqc = d_saqc.plot(field="data", path=outfile)
d_saqc = d_saqc.plot(field="data", path=outfile, history="valid", stats=True)
d_saqc = d_saqc.plot(field="data", path=outfile, history="complete")
d_saqc = d_saqc.plot(
field="data", path=dummy_path, ax_kwargs={"ylabel": "data is data"}, stats=True
field="data", path=outfile, ax_kwargs={"ylabel": "data is data"}, stats=True
)
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations
import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_array_equal, assert_equal
from pandas.testing import assert_series_equal
import saqc.lib.ts_operators as tsops
from saqc.lib.ts_operators import interpolateNANs
def test_butterFilter():
......@@ -193,3 +195,66 @@ def test_rateOfChange(data, expected):
result = rateOfChange(data)
assert_series_equal(result, expected, check_names=False)
@pytest.mark.parametrize(
"limit,extrapolate,data,expected",
[
(
1,
None,
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
),
(
2,
"backward",
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[0, 0, np.nan, np.nan, np.nan, 4, np.nan],
),
(
2,
None,
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
),
(
3,
None,
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
),
(
3,
"forward",
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, np.nan, np.nan, np.nan, 4, 4],
),
(
4,
None,
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, 1, 2, 3, 4, np.nan],
),
(
4,
"both",
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, 1, 2, 3, 4, np.nan],
),
(
None,
None,
[np.nan, 0, np.nan, np.nan, np.nan, 4, np.nan],
[np.nan, 0, 1, 2, 3, 4, np.nan],
),
],
)
def test_interpolatNANs(limit, extrapolate, data, expected):
got = interpolateNANs(
pd.Series(data), gap_limit=limit, method="linear", extrapolate=extrapolate
)
try:
assert got.equals(pd.Series(expected, dtype=float))
except AssertionError:
print("stop")
......@@ -2,8 +2,9 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
beautifulsoup4==4.11.1
hypothesis==6.61.0
Markdown==3.3.7
pytest==7.1.3
pytest-lazy-fixture==0.6.3
Markdown==3.3.7
beautifulsoup4==4.11.1
requests==2.27.1