From a59cbb9f15dcc8e109b91b60278631e401988b47 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Wed, 11 Mar 2020 16:41:57 +0100 Subject: [PATCH] improved tests --- dios/dios.py | 4 +- test/test_dflike.py | 4 +- test/test_dflike__setget__.py | 8 +- test/test_methods.py | 46 +++++------ test/test_setup.py | 144 ++++++++++++++++------------------ 5 files changed, 99 insertions(+), 107 deletions(-) diff --git a/dios/dios.py b/dios/dios.py index ba6fad5..0675e3d 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -53,7 +53,7 @@ def _is_bool_series(obj): return isinstance(obj, pd.Series) and obj.dtype == bool -def __inject_to_dios(): +def __monkey_patch_pandas(): def to_dios(self): return DictOfSeries(data=self) @@ -61,7 +61,7 @@ def __inject_to_dios(): pd.DataFrame.to_dios = to_dios -__inject_to_dios() +__monkey_patch_pandas() class DictOfSeries: diff --git a/test/test_dflike.py b/test/test_dflike.py index e0ec811..f68db0a 100644 --- a/test/test_dflike.py +++ b/test/test_dflike.py @@ -47,6 +47,6 @@ def test_dios_create(data, with_column_param): assert dios.columns.equals(df.columns) - for c in df.columns: - assert np.all(dios[c] == df[c].dropna()) + eq, msg = dios_eq_df(dios, df, with_msg=True) + assert eq, msg diff --git a/test/test_dflike__setget__.py b/test/test_dflike__setget__.py index 4ad250a..5a51868 100644 --- a/test/test_dflike__setget__.py +++ b/test/test_dflike__setget__.py @@ -5,15 +5,15 @@ import pytest def _test(res, exp): if isinstance(exp, pd.DataFrame): - t = dios_eq_df(res, exp, with_msg=True) - assert t[0], t[1] + eq, msg = dios_eq_df(res, exp, with_msg=True) + assert eq, msg else: assert type(exp) == type(res) if isinstance(exp, pd.Series): - t = diosSeries_eq_dfSeries(res, exp, with_msg=True) - assert t[0], t[1] + eq, msg = diosSeries_eq_dfSeries(res, exp, with_msg=True) + assert eq, msg # scalars else: diff --git a/test/test_methods.py b/test/test_methods.py index d8ab08a..df5a111 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -1,35 +1,35 @@ from .test_setup import * -def test_copy_copy_empty(getDtDiosAligned): - dios = getDtDiosAligned.copy() - shallow = dios.copy(deep=False) - deep = dios.copy(deep=True) - empty_w_cols = dios.copy_empty(columns=True) - empty_no_cols = dios.copy_empty(columns=False) - - assert dios is not shallow - assert dios is not deep - assert dios is not empty_w_cols - assert dios is not empty_no_cols +def test_copy_copy_empty(dios_): + di = dios_ + shallow = di.copy(deep=False) + deep = di.copy(deep=True) + empty_w_cols = di.copy_empty(columns=True) + empty_no_cols = di.copy_empty(columns=False) + + assert di is not shallow + assert di is not deep + assert di is not empty_w_cols + assert di is not empty_no_cols for attr in ['itype', '_itype', '_policy', ]: - dios_attr = getattr(dios, attr) + dios_attr = getattr(di, attr) for cop in [shallow, deep, empty_w_cols, empty_no_cols]: copy_attr = getattr(cop, attr) assert dios_attr == copy_attr - assert dios.columns.equals(shallow.columns) - assert dios.columns.equals(deep.columns) - assert dios.columns.equals(empty_w_cols.columns) - assert not dios.columns.equals(empty_no_cols.columns) - - for i in dios: - assert dios[i].index is shallow[i].index - assert dios[i].index is not deep[i].index - dios[i][0] = 999999 - assert dios[i][0] == shallow[i][0] - assert dios[i][0] != deep[i][0] + assert di.columns.equals(shallow.columns) + assert di.columns.equals(deep.columns) + assert di.columns.equals(empty_w_cols.columns) + assert not di.columns.equals(empty_no_cols.columns) + + for i in di: + assert di[i].index is shallow[i].index + assert di[i].index is not deep[i].index + di[i][0] = 999999 + assert di[i][0] == shallow[i][0] + assert di[i][0] != deep[i][0] @pytest.mark.parametrize('left', diosFromMatr(DATA_UNALIGNED)) diff --git a/test/test_setup.py b/test/test_setup.py index 57effb8..aaa5750 100644 --- a/test/test_setup.py +++ b/test/test_setup.py @@ -32,54 +32,70 @@ def dios_(): return dios__().copy() -def __fail(msg, with_msg, raiseassert): - if raiseassert: - if with_msg: - assert False, msg - assert False - if with_msg: - return False, msg - return False - +def diosSeries_eq_dfSeries(df_s, di_s, with_msg=False, df_s_name="di_s", di_s_name="df_s"): -def diosSeries_eq_dfSeries(diosser, dfser, with_msg=False, raiseassert=False): def fail(msg): - return __fail(msg, with_msg, raiseassert) + if with_msg: + return False, msg + return False + + assert isinstance(df_s, pd.Series) + assert isinstance(di_s, pd.Series) - assert isinstance(dfser, pd.Series) - assert isinstance(diosser, pd.Series) + if df_s.empty and not di_s.empty: + return fail(f"value mismatch: " + f"{df_s_name} is missing, but " + f"{di_s_name} == {di_s}") - s1, s2 = dfser, diosser - if s1.empty and not s2.empty: - return fail(f"df-ser is empty, but dios-ser == {s2}") + idiff = di_s.index.difference(df_s.index) + if not idiff.empty: + return fail(f"index mismatch: " + f"{di_s_name}.index: {di_s.index.to_list()}, " + f"{df_s_name}.index: {df_s.index.to_list()}, " + f"diff: {idiff.to_list()}") # compare series - for i in s1.index: - exp = s1.loc[i] + for i in df_s.index: + exp = df_s.loc[i] # Normally df-nans, from selecting are just not present values - # in a dios. But if a Nan was inserted in the dios it is a valid - # value, so we firstly try to access the value. + # in a dios. But if a Nan was inserted in dios on purpose, it is + # a valid value, so we try to access the value first. try: - val = s2.loc[i] + val = di_s.loc[i] except KeyError: + # nan in df, missing in dios -> OK if np.isnan(exp): continue - else: - return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] does not exist") - - if np.isnan(exp) and not np.isnan(val): - return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}") - if exp != val: - return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}") + # valid val in df, missing in dios -> FAIL + else: + return fail(f"value mismatch: " + f"{di_s_name}.loc[{i}] == {exp}, but " + f"{df_s_name}.loc[{i}] does not exist") + + # inf = np.isinf(exp) and np.isinf(val) + # sig = np.sign(exp) == np.sign(val) + # eq_nan = np.isnan(exp) and np.isnan(val) + # eq_inf = inf and sig + # eq_vals = exp == val + # eq = eq_nan or eq_inf or eq_vals + eq = np.equal(val, exp) + assert np.isscalar(eq) + + if not eq: + return fail(f"value mismatch: " + f"{di_s_name}.loc[{i}] == {exp}, but " + f"{df_s_name}.loc[{i}] == {val}") return True, "equal" if with_msg else True -def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raiseassert=False): +def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False): def fail(msg): - return __fail(msg, with_msg, raiseassert) + if with_msg: + return False, msg + return False assert isinstance(df, pd.DataFrame) assert isinstance(dios, DictOfSeries) @@ -87,62 +103,50 @@ def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raisea # check: dios has not more/other cols than df notmore = [c for c in dios if c not in df] if notmore: - return fail(f"dios has more columns, than df: {notmore}") + return fail(f"columns mismatch. " + f"dios: {dios.columns.to_list()}, " + f"df: {df.columns.to_list()}, " + f"diff: {notmore}") + # check: may df has empty cols and dios has no cols + # at this locations miss = [c for c in df if c not in dios] - # check: df has empty cols, if dios has missing cols if miss: if dios_dropped_empty_colums: + tmp = [] for c in miss: if not df[c].dropna().empty: - return fail(f"dios missing a column: {c}") + tmp += [c] + if tmp: + return fail(f"columns mismatch: " + f"dios missing column(s): {tmp}") else: - return fail(f"dios missing columns: {miss}") + return fail(f"columns mismatch: " + f"dios missing column(s): {miss}") - for c in df: - s1 = df[c] - s2 = dios[c] + cols = df.columns.intersection(dios.columns) - if s1.empty and not s2.empty: - return fail(f"df[{c}] is empty, but dios[{c}] == {s2}") - - # compare series - for i in s1.index: - exp = s1.loc[i] - - # Normally df-nans, from selecting are just not present values - # in a dios. But if a Nan was inserted in the dios it is a valid - # value, so we firstly try to access the value. - try: - val = s2.loc[i] - except KeyError: - if np.isnan(exp): - continue - else: - return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] does not exist") - - if np.isnan(exp) and not np.isnan(val): - return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}") - - if exp != val: - return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}") + for c in cols: + ok, m = diosSeries_eq_dfSeries(df[c], dios[c], di_s_name=f"di[{c}]", df_s_name=f"df[{c}]", with_msg=True) + if not ok: + return fail(m) return True, "equal" if with_msg else True # 0,1 -NICE_SLICE = [slice(None), slice(None,None,3)] +NICE_SLICE = [slice(None), slice(None, None, 3)] R_BLIST = [True, False, False, False, True] * 2 C_BLIST = [True, False, False, True] # 0,1, 2, 3 -R_LOC_SLICE = NICE_SLICE + [slice(2), slice(2,8)] +R_LOC_SLICE = NICE_SLICE + [slice(2), slice(2, 8)] # 4 5 6 -R_LOC_LIST = [[1], [3,4,5], pd.Series([3, 7])] +R_LOC_LIST = [[1], [3, 4, 5], pd.Series([3, 7])] # 7 8 9 R_LOC_BLIST = [R_BLIST, pd.Series(R_BLIST), pd.Series(R_BLIST).values] -C_LOC_SLICE = NICE_SLICE + [slice('b'), slice('b','c')] +C_LOC_SLICE = NICE_SLICE + [slice('b'), slice('b', 'c')] C_LOC_LIST = [['a'], ['a', 'c'], pd.Series(['a', 'c'])] C_LOC_BLIST = [C_BLIST, pd.Series(C_BLIST, index=list("abcd")), pd.Series(C_BLIST).values] @@ -166,7 +170,6 @@ C_LOC_INDEXER = C_LOC_SLICE + C_LOC_LIST + C_LOC_BLIST + EMPTYIDEXER R_iLOC_INDEXER = RC_iLOC_SLICE + R_iLOC_LIST + R_iLOC_BLIST C_iLOC_INDEXER = RC_iLOC_SLICE + C_iLOC_LIST + C_iLOC_BLIST - O = [[0, 0, 0], [0, 0, 0]] I = [[1, 1, 1], [1, 1, 1]] A = [[1, 2, 3], [4, 5, 6]] @@ -202,14 +205,3 @@ def diosFromMatr(mlist): l.append(DictOfSeries(m.copy())) return tuple(l) - -def _get_dios(ser, i): - dios = DictOfSeries() - for i in range(i): - dios[f'c{i}'] = ser.copy() * (i + 1) // 2 - return dios - - -@pytest.fixture() -def getDtDiosAligned(datetime_series): - return _get_dios(datetime_series, 5) -- GitLab