From a59cbb9f15dcc8e109b91b60278631e401988b47 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 11 Mar 2020 16:41:57 +0100
Subject: [PATCH] improved tests

---
 dios/dios.py                  |   4 +-
 test/test_dflike.py           |   4 +-
 test/test_dflike__setget__.py |   8 +-
 test/test_methods.py          |  46 +++++------
 test/test_setup.py            | 144 ++++++++++++++++------------------
 5 files changed, 99 insertions(+), 107 deletions(-)

diff --git a/dios/dios.py b/dios/dios.py
index ba6fad5..0675e3d 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -53,7 +53,7 @@ def _is_bool_series(obj):
     return isinstance(obj, pd.Series) and obj.dtype == bool
 
 
-def __inject_to_dios():
+def __monkey_patch_pandas():
     def to_dios(self):
         return DictOfSeries(data=self)
 
@@ -61,7 +61,7 @@ def __inject_to_dios():
     pd.DataFrame.to_dios = to_dios
 
 
-__inject_to_dios()
+__monkey_patch_pandas()
 
 
 class DictOfSeries:
diff --git a/test/test_dflike.py b/test/test_dflike.py
index e0ec811..f68db0a 100644
--- a/test/test_dflike.py
+++ b/test/test_dflike.py
@@ -47,6 +47,6 @@ def test_dios_create(data, with_column_param):
 
     assert dios.columns.equals(df.columns)
 
-    for c in df.columns:
-        assert np.all(dios[c] == df[c].dropna())
+    eq, msg = dios_eq_df(dios, df, with_msg=True)
+    assert eq, msg
 
diff --git a/test/test_dflike__setget__.py b/test/test_dflike__setget__.py
index 4ad250a..5a51868 100644
--- a/test/test_dflike__setget__.py
+++ b/test/test_dflike__setget__.py
@@ -5,15 +5,15 @@ import pytest
 def _test(res, exp):
 
     if isinstance(exp, pd.DataFrame):
-        t = dios_eq_df(res, exp, with_msg=True)
-        assert t[0], t[1]
+        eq, msg = dios_eq_df(res, exp, with_msg=True)
+        assert eq, msg
 
     else:
         assert type(exp) == type(res)
 
         if isinstance(exp, pd.Series):
-            t = diosSeries_eq_dfSeries(res, exp, with_msg=True)
-            assert t[0], t[1]
+            eq, msg = diosSeries_eq_dfSeries(res, exp, with_msg=True)
+            assert eq, msg
 
         # scalars
         else:
diff --git a/test/test_methods.py b/test/test_methods.py
index d8ab08a..df5a111 100644
--- a/test/test_methods.py
+++ b/test/test_methods.py
@@ -1,35 +1,35 @@
 from .test_setup import *
 
 
-def test_copy_copy_empty(getDtDiosAligned):
-    dios = getDtDiosAligned.copy()
-    shallow = dios.copy(deep=False)
-    deep = dios.copy(deep=True)
-    empty_w_cols = dios.copy_empty(columns=True)
-    empty_no_cols = dios.copy_empty(columns=False)
-
-    assert dios is not shallow
-    assert dios is not deep
-    assert dios is not empty_w_cols
-    assert dios is not empty_no_cols
+def test_copy_copy_empty(dios_):
+    di = dios_
+    shallow = di.copy(deep=False)
+    deep = di.copy(deep=True)
+    empty_w_cols = di.copy_empty(columns=True)
+    empty_no_cols = di.copy_empty(columns=False)
+
+    assert di is not shallow
+    assert di is not deep
+    assert di is not empty_w_cols
+    assert di is not empty_no_cols
 
     for attr in ['itype', '_itype', '_policy', ]:
-        dios_attr = getattr(dios, attr)
+        dios_attr = getattr(di, attr)
         for cop in [shallow, deep, empty_w_cols, empty_no_cols]:
             copy_attr = getattr(cop, attr)
             assert dios_attr == copy_attr
 
-    assert dios.columns.equals(shallow.columns)
-    assert dios.columns.equals(deep.columns)
-    assert dios.columns.equals(empty_w_cols.columns)
-    assert not dios.columns.equals(empty_no_cols.columns)
-
-    for i in dios:
-        assert dios[i].index is shallow[i].index
-        assert dios[i].index is not deep[i].index
-        dios[i][0] = 999999
-        assert dios[i][0] == shallow[i][0]
-        assert dios[i][0] != deep[i][0]
+    assert di.columns.equals(shallow.columns)
+    assert di.columns.equals(deep.columns)
+    assert di.columns.equals(empty_w_cols.columns)
+    assert not di.columns.equals(empty_no_cols.columns)
+
+    for i in di:
+        assert di[i].index is shallow[i].index
+        assert di[i].index is not deep[i].index
+        di[i][0] = 999999
+        assert di[i][0] == shallow[i][0]
+        assert di[i][0] != deep[i][0]
 
 
 @pytest.mark.parametrize('left', diosFromMatr(DATA_UNALIGNED))
diff --git a/test/test_setup.py b/test/test_setup.py
index 57effb8..aaa5750 100644
--- a/test/test_setup.py
+++ b/test/test_setup.py
@@ -32,54 +32,70 @@ def dios_():
     return dios__().copy()
 
 
-def __fail(msg, with_msg, raiseassert):
-    if raiseassert:
-        if with_msg:
-            assert False, msg
-        assert False
-    if with_msg:
-        return False, msg
-    return False
-
+def diosSeries_eq_dfSeries(df_s, di_s, with_msg=False, df_s_name="di_s", di_s_name="df_s"):
 
-def diosSeries_eq_dfSeries(diosser, dfser, with_msg=False, raiseassert=False):
     def fail(msg):
-        return __fail(msg, with_msg, raiseassert)
+        if with_msg:
+            return False, msg
+        return False
+
+    assert isinstance(df_s, pd.Series)
+    assert isinstance(di_s, pd.Series)
 
-    assert isinstance(dfser, pd.Series)
-    assert isinstance(diosser, pd.Series)
+    if df_s.empty and not di_s.empty:
+        return fail(f"value mismatch: "
+                    f"{df_s_name} is missing, but "
+                    f"{di_s_name} == {di_s}")
 
-    s1, s2 = dfser, diosser
-    if s1.empty and not s2.empty:
-        return fail(f"df-ser is empty, but dios-ser == {s2}")
+    idiff = di_s.index.difference(df_s.index)
+    if not idiff.empty:
+        return fail(f"index mismatch: "
+                    f"{di_s_name}.index: {di_s.index.to_list()}, "
+                    f"{df_s_name}.index: {df_s.index.to_list()}, "
+                    f"diff: {idiff.to_list()}")
 
     # compare series
-    for i in s1.index:
-        exp = s1.loc[i]
+    for i in df_s.index:
+        exp = df_s.loc[i]
 
         # Normally df-nans, from selecting are just not present values
-        # in a dios. But if a Nan was inserted in the dios it is a valid
-        # value, so we firstly try to access the value.
+        # in a dios. But if a Nan was inserted in dios on purpose, it is
+        # a valid value, so we try to access the value first.
         try:
-            val = s2.loc[i]
+            val = di_s.loc[i]
         except KeyError:
+            # nan in df, missing in dios -> OK
             if np.isnan(exp):
                 continue
-            else:
-                return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] does not exist")
-
-        if np.isnan(exp) and not np.isnan(val):
-            return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}")
 
-        if exp != val:
-            return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}")
+            # valid val in df, missing in dios -> FAIL
+            else:
+                return fail(f"value mismatch: "
+                            f"{di_s_name}.loc[{i}] == {exp}, but "
+                            f"{df_s_name}.loc[{i}] does not exist")
+
+        # inf = np.isinf(exp) and np.isinf(val)
+        # sig = np.sign(exp) == np.sign(val)
+        # eq_nan = np.isnan(exp) and np.isnan(val)
+        # eq_inf = inf and sig
+        # eq_vals = exp == val
+        # eq = eq_nan or eq_inf or eq_vals
+        eq = np.equal(val, exp)
+        assert np.isscalar(eq)
+
+        if not eq:
+            return fail(f"value mismatch: "
+                        f"{di_s_name}.loc[{i}] == {exp}, but "
+                        f"{df_s_name}.loc[{i}] == {val}")
 
     return True, "equal" if with_msg else True
 
 
-def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raiseassert=False):
+def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False):
     def fail(msg):
-        return __fail(msg, with_msg, raiseassert)
+        if with_msg:
+            return False, msg
+        return False
 
     assert isinstance(df, pd.DataFrame)
     assert isinstance(dios, DictOfSeries)
@@ -87,62 +103,50 @@ def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raisea
     # check: dios has not more/other cols than df
     notmore = [c for c in dios if c not in df]
     if notmore:
-        return fail(f"dios has more columns, than df: {notmore}")
+        return fail(f"columns mismatch. "
+                    f"dios: {dios.columns.to_list()}, "
+                    f"df: {df.columns.to_list()}, "
+                    f"diff: {notmore}")
 
+    # check: may df has empty cols and dios has no cols
+    # at this locations
     miss = [c for c in df if c not in dios]
-    # check: df has empty cols, if dios has missing cols
     if miss:
         if dios_dropped_empty_colums:
+            tmp = []
             for c in miss:
                 if not df[c].dropna().empty:
-                    return fail(f"dios missing a column: {c}")
+                    tmp += [c]
+            if tmp:
+                return fail(f"columns mismatch: "
+                            f"dios missing column(s): {tmp}")
         else:
-            return fail(f"dios missing columns: {miss}")
+            return fail(f"columns mismatch: "
+                        f"dios missing column(s): {miss}")
 
-    for c in df:
-        s1 = df[c]
-        s2 = dios[c]
+    cols = df.columns.intersection(dios.columns)
 
-        if s1.empty and not s2.empty:
-            return fail(f"df[{c}] is empty, but dios[{c}] == {s2}")
-
-        # compare series
-        for i in s1.index:
-            exp = s1.loc[i]
-
-            # Normally df-nans, from selecting are just not present values
-            # in a dios. But if a Nan was inserted in the dios it is a valid
-            # value, so we firstly try to access the value.
-            try:
-                val = s2.loc[i]
-            except KeyError:
-                if np.isnan(exp):
-                    continue
-                else:
-                    return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] does not exist")
-
-            if np.isnan(exp) and not np.isnan(val):
-                return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}")
-
-            if exp != val:
-                return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}")
+    for c in cols:
+        ok, m = diosSeries_eq_dfSeries(df[c], dios[c], di_s_name=f"di[{c}]", df_s_name=f"df[{c}]", with_msg=True)
+        if not ok:
+            return fail(m)
 
     return True, "equal" if with_msg else True
 
 
 # 0,1
-NICE_SLICE = [slice(None), slice(None,None,3)]
+NICE_SLICE = [slice(None), slice(None, None, 3)]
 R_BLIST = [True, False, False, False, True] * 2
 C_BLIST = [True, False, False, True]
 
 #              0,1,              2,     3
-R_LOC_SLICE = NICE_SLICE + [slice(2), slice(2,8)]
+R_LOC_SLICE = NICE_SLICE + [slice(2), slice(2, 8)]
 #              4      5       6
-R_LOC_LIST = [[1], [3,4,5], pd.Series([3, 7])]
+R_LOC_LIST = [[1], [3, 4, 5], pd.Series([3, 7])]
 #              7            8                  9
 R_LOC_BLIST = [R_BLIST, pd.Series(R_BLIST), pd.Series(R_BLIST).values]
 
-C_LOC_SLICE = NICE_SLICE + [slice('b'), slice('b','c')]
+C_LOC_SLICE = NICE_SLICE + [slice('b'), slice('b', 'c')]
 C_LOC_LIST = [['a'], ['a', 'c'], pd.Series(['a', 'c'])]
 C_LOC_BLIST = [C_BLIST, pd.Series(C_BLIST, index=list("abcd")), pd.Series(C_BLIST).values]
 
@@ -166,7 +170,6 @@ C_LOC_INDEXER = C_LOC_SLICE + C_LOC_LIST + C_LOC_BLIST + EMPTYIDEXER
 R_iLOC_INDEXER = RC_iLOC_SLICE + R_iLOC_LIST + R_iLOC_BLIST
 C_iLOC_INDEXER = RC_iLOC_SLICE + C_iLOC_LIST + C_iLOC_BLIST
 
-
 O = [[0, 0, 0], [0, 0, 0]]
 I = [[1, 1, 1], [1, 1, 1]]
 A = [[1, 2, 3], [4, 5, 6]]
@@ -202,14 +205,3 @@ def diosFromMatr(mlist):
         l.append(DictOfSeries(m.copy()))
     return tuple(l)
 
-
-def _get_dios(ser, i):
-    dios = DictOfSeries()
-    for i in range(i):
-        dios[f'c{i}'] = ser.copy() * (i + 1) // 2
-    return dios
-
-
-@pytest.fixture()
-def getDtDiosAligned(datetime_series):
-    return _get_dios(datetime_series, 5)
-- 
GitLab