-
David Schäfer authored
This reverts merge request !351
1b060f51
test_history.py 5.78 KiB
#!/usr/bin/env python
import pytest
import numpy as np
import pandas as pd
from saqc.core.history import History, createHistoryFromData
from tests.common import dummyHistory
# see #GH143 combined backtrack
# (adjusted to current implementation)
example1 = (
# flags
np.array(
[
[0, np.nan, 50, 99, np.nan],
[0, np.nan, 50, np.nan, 25],
[0, 99, 99, 99, 25],
[0, 99, np.nan, np.nan, 25],
]
),
# expected from max()
np.array([99, 25, 25, 25]),
)
# see #GH143
example2 = (
# flags
np.array(
[
[0, 99, np.nan, 0],
[0, np.nan, 99, np.nan],
[0, np.nan, np.nan, np.nan],
[0, np.nan, np.nan, 0],
]
),
# expected from max()
np.array([0, 99, 0, 0]),
)
data = [
np.array([[]]),
np.zeros((1, 1)),
np.zeros((3, 4)),
np.ones((3, 4)),
np.ones((3, 4)) * np.nan,
np.array(
[
[0, 0, 0, 0],
[0, 1, 2, 3],
[0, 1, 2, 3],
]
),
np.array(
[
[0, 0, 0, 0],
[0, 1, np.nan, 3],
[0, 1, 2, 3],
]
),
]
def check_invariants(hist):
"""
This can be called for **any** FH.
The assertions must hold in any case.
"""
# basics
assert isinstance(hist, History)
assert isinstance(hist.hist, pd.DataFrame)
assert isinstance(hist.meta, list)
assert all(
[isinstance(dtype, (float, pd.CategoricalDtype)) for dtype in hist.hist.dtypes]
)
assert all([isinstance(e, dict) for e in hist.meta])
assert hist.columns is hist.hist.columns
assert hist.index is hist.hist.index
assert len(hist) == len(hist.columns) == len(hist.meta)
# advanced
assert hist.columns.equals(pd.Index(range(len(hist))))
assert isinstance(hist.max(), pd.Series)
def is_equal(hist1: History, hist2: History):
"""
Check if two FH are (considered) equal, namely have equal 'hist'
"""
return hist1.hist.equals(hist2.hist)
@pytest.mark.parametrize("data", data + [None])
def test_init(data: np.array):
# init
df = pd.DataFrame(data, dtype=float)
hist = History(df.index)
check_invariants(hist)
@pytest.mark.parametrize("data", data + [None])
def test_createHistory(data: np.array):
# init
df = pd.DataFrame(data, dtype=float)
meta = [{}] * len(df.columns)
hist = createHistoryFromData(df, meta)
check_invariants(hist)
# shape would fail
if data is not None:
assert len(hist.index) == data.shape[0]
assert len(hist.columns) == data.shape[1]
@pytest.mark.parametrize("data", data + [None])
def test_copy(data):
# init
df = pd.DataFrame(data, dtype=float)
hist = History(df.index)
for _, s in df.items():
hist.append(s)
shallow = hist.copy(deep=False)
deep = hist.copy(deep=True)
# checks
for copy in [deep, shallow]:
check_invariants(copy)
assert copy is not hist
assert copy.hist is not hist.hist
assert copy.meta is not hist.meta
assert is_equal(copy, hist)
assert deep is not shallow
assert is_equal(deep, shallow)
# underling pandas data was only copied with deep=True
assert shallow.hist.index is hist.hist.index
assert deep.hist.index is not hist.hist.index
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize("data", data + [None])
def test_reindex_trivial_cases(data, copy):
df = pd.DataFrame(data, dtype=float)
orig = dummyHistory(hist=df)
# checks
for index in [df.index, pd.Index([])]:
ref = orig.copy()
hist = ref.reindex(index, copy=copy)
if copy:
assert hist is not ref
else:
assert hist is ref
check_invariants(hist)
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize("data", data + [None])
def test_reindex_missing_indicees(data, copy):
df = pd.DataFrame(data, dtype=float)
orig = dummyHistory(hist=df)
index = df.index[1:-1]
hist = orig.reindex(index, copy=copy)
if copy:
assert hist is not orig
else:
assert hist is orig
check_invariants(hist)
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize("data", data + [None])
def test_reindex_extra_indicees(data, copy):
df = pd.DataFrame(data, dtype=float)
orig = dummyHistory(hist=df)
index = df.index.append(pd.Index(range(len(df.index), len(df.index) + 5)))
hist = orig.reindex(index, copy=copy)
if copy:
assert hist is not orig
else:
assert hist is orig
check_invariants(hist)
@pytest.mark.parametrize(
"s, meta",
[
(pd.Series(0, index=range(6), dtype=float), None),
(pd.Series(0, index=range(6), dtype=float), {}),
(pd.Series(1, index=range(6), dtype=float), {"foo": "bar"}),
],
)
def test_append_with_meta(s, meta):
hist = History(s.index)
hist.append(s, meta=meta)
check_invariants(hist)
if meta is None:
meta = {}
assert hist.meta[0] is not meta
assert hist.meta == [meta]
hist.append(s, meta=meta)
check_invariants(hist)
assert hist.meta == [meta, meta]
@pytest.fixture(scope="module")
def __hist():
# this FH is filled by
# - test_append
# - test_append_force
return History(index=pd.Index(range(6)))
# this test append more rows to the resulting
# FH from the former test
@pytest.mark.parametrize(
"s, max_val",
[
(pd.Series(0, index=range(6), dtype=float), 0),
(pd.Series(1, index=range(6), dtype=float), 1),
(pd.Series(np.nan, index=range(6), dtype=float), 1),
(pd.Series(0, index=range(6), dtype=float), 0),
],
)
def test_append_force(__hist, s, max_val):
hist = __hist
hist.append(s)
check_invariants(hist)
assert all(hist.max() == max_val)