import pytest from ..dios import * from ..dios.operators import _OP1_MAP, _OP2_DIV_MAP, _OP2_ARITH_MAP, _OP2_BOOL_MAP, _OP2_COMP_MAP import pandas as pd import numpy as np from copy import deepcopy from pandas.tests.series.conftest import datetime_series a = pd.Series(range(0, 70, 7)) b = pd.Series(range(5, 15, 1)) c = pd.Series(range(7, 107, 10)) d = pd.Series(range(0, 10, 1)) s1, s2, s3, s4 = a, b, c, d def df__(): return pd.DataFrame(dict(a=a.copy(), b=b.copy(), c=c.copy(), d=d.copy(), )) def dios__(): return DictOfSeries(dict(a=a.copy(), b=b.copy(), c=c.copy(), d=d.copy(), )) @pytest.fixture def df_(): return df__().copy() @pytest.fixture def dios_(): return dios__().copy() def __fail(msg, with_msg, raiseassert): if raiseassert: if with_msg: assert False, msg assert False if with_msg: return False, msg return False def diosSeries_eq_dfSeries(diosser, dfser, with_msg=False, raiseassert=False): def fail(msg): return __fail(msg, with_msg, raiseassert) assert isinstance(dfser, pd.Series) assert isinstance(diosser, pd.Series) s1, s2 = dfser, diosser if s1.empty and not s2.empty: return fail(f"df-ser is empty, but dios-ser == {s2}") # compare series for i in s1.index: exp = s1.loc[i] # Normally df-nans, from selecting are just not present values # in a dios. But if a Nan was inserted in the dios it is a valid # value, so we firstly try to access the value. try: val = s2.loc[i] except KeyError: if np.isnan(exp): continue else: return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] does not exist") if np.isnan(exp) and not np.isnan(val): return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}") if exp != val: return fail(f"df-ser.loc[{i}] == {exp}, but dios-ser.loc[{i}] == {val}") return True, "equal" if with_msg else True def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raiseassert=False): def fail(msg): return __fail(msg, with_msg, raiseassert) assert isinstance(df, pd.DataFrame) assert isinstance(dios, DictOfSeries) # check: dios has not more/other cols than df notmore = [c for c in dios if c not in df] if notmore: return fail(f"dios has more columns, than df: {notmore}") miss = [c for c in df if c not in dios] # check: df has empty cols, if dios has missing cols if miss: if dios_dropped_empty_colums: for c in miss: if not df[c].dropna().empty: return fail(f"dios missing a column: {c}") else: return fail(f"dios missing columns: {miss}") for c in df: s1 = df[c] s2 = dios[c] if s1.empty and not s2.empty: return fail(f"df[{c}] is empty, but dios[{c}] == {s2}") # compare series for i in s1.index: exp = s1.loc[i] # Normally df-nans, from selecting are just not present values # in a dios. But if a Nan was inserted in the dios it is a valid # value, so we firstly try to access the value. try: val = s2.loc[i] except KeyError: if np.isnan(exp): continue else: return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] does not exist") if np.isnan(exp) and not np.isnan(val): return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}") if exp != val: return fail(f"df.loc[{i}, {c}] == {exp}, but dios.loc[{i}, {c}] == {val}") return True, "equal" if with_msg else True BLIST = [True, False, False, False, True] * 2 LISTIDXER = [['a'], ['a', 'c'], pd.Series(['a', 'c'])] BOOLIDXER = [BLIST, pd.Series(BLIST), df__() > 10] SLICEIDXER = [slice(None), slice(4), slice(-3, -1), slice(-1, 3), slice(None, None, 3)] MULTIIDXER = [df__() > 9, df__() != df__(), df__() == df__()] EMPTYIDEXER = [[], pd.Series(), slice(3, 3), slice(3, -1), pd.DataFrame(), []] INDEXERS = LISTIDXER + BOOLIDXER + SLICEIDXER + MULTIIDXER + EMPTYIDEXER LOC_L = [slice(None), slice(2, 8), pd.Series(BLIST), BLIST, [6, 5], 2] LOC_R = [slice(None), slice('a', 'c'), pd.Series([False, False, True, False], index=list("abcd")), [False, False, True, False], "a"] + LISTIDXER ILIST = [[0], [1, 3], 2] IEMPTY = [[], slice(3, 3), slice(3, -1), []] ILOC_L = [slice(None), slice(2, 8), BLIST, ] + ILIST ILOC_R = [slice(None), slice(1, 3), [False, False, True, False], ] + ILIST FAIL_INDEXERS = [['z'], ['a', 'z'], pd.Series(['a', 'z']), BLIST, pd.DataFrame(dict(a=[1, 2, 3]))] O = [[0, 0, 0], [0, 0, 0]] I = [[1, 1, 1], [1, 1, 1]] A = [[1, 2, 3], [4, 5, 6]] B = [[0, 2, 2], [5, 5, 5]] C = [[3, 2, 0], [1, 0, 3]] D = [[6, 5, 4], [3, 2, 1]] DATA_ALIGNED = [O, I, A, B, C, D] # outer lists could have differnet length, but this would # make the checks to complicated EEE = [[], [], []] O = [[0, 0], [0, 0, 0], [0, 0, 0, 0]] I = [[1, 1, 1], [1, 1, 1], [1]] A = [[1], [2, 3], [4, 5, 6]] B = [[0, 2, 2], [5], [5, 5]] C = [[3, 2, 0], [1, 0, 3], [0, 0, 0]] D = [[6], [2], [9]] DATA_UNALIGNED = [O, I, A, B, C, D, EEE] # only use if a single matrix is used ALL = DATA_ALIGNED + DATA_UNALIGNED OPCOMP = list(_OP2_COMP_MAP) OPNOCOMP = list(_OP2_ARITH_MAP) + list(_OP2_BOOL_MAP) + list(_OP2_DIV_MAP) OP2 = OPCOMP + OPNOCOMP OP1 = list(_OP1_MAP) def diosFromMatr(mlist): l = [] for m in mlist: m = np.array(m) l.append(DictOfSeries(m.copy())) return tuple(l) def _get_dios(ser, i): dios = DictOfSeries() for i in range(i): dios[f'c{i}'] = ser.copy() * (i + 1) // 2 return dios @pytest.fixture() def getDtDiosAligned(datetime_series): return _get_dios(datetime_series, 5)