diff --git a/test/run_dios.py b/test/run_dios.py index bde77740a68f42faed4e283906899c7b371db1af..1409433a032a55aef8feef0eb1c85843cc5e24d0 100644 --- a/test/run_dios.py +++ b/test/run_dios.py @@ -1,5 +1,5 @@ -from ..dios import * +from dios import * import numpy as np if __name__ == '__main__': diff --git a/test/test__getitem__.py b/test/test__getitem__.py index d94ee4ed0964b57c592a398f4570660940dbda2f..2012b2e02b1bbd713a409c4f25efa6ca70261801 100644 --- a/test/test__getitem__.py +++ b/test/test__getitem__.py @@ -1,18 +1,15 @@ -from ..dios import * from .test_setup import * from pandas.core.dtypes.common import is_scalar -# s1 = pd.Series(range(10), index=range(10)) -# s2 = pd.Series(range(5, 10), index=range(5, 10)) -# s3 = pd.Series(range(1, 30, 2), index=range(1, 30, 2)) -# s4 = pd.Series(np.linspace(7, 13, 9), index=range(3, 12)) -# s1.name, s2.name, s3.name, s4.name = 'a', 'b', 'c', 'd' -# d1 = DictOfSeries(data=dict(a=s1.copy(), b=s2.copy(), c=s3.copy(), d=s4.copy())) -# -d1 = dios__() +unal1 = pd.Series(range(10), index=range(10)) +unal2 = pd.Series(range(5, 10), index=range(5, 10)) +unal3 = pd.Series(range(1, 30, 2), index=range(1, 30, 2)) +unal4 = pd.Series(np.linspace(7, 13, 9), index=range(3, 12)) +unal1.name, unal2.name, unal3.name, unal4.name = 'a', 'b', 'c', 'd' +d1 = DictOfSeries(data=dict(a=unal1.copy(), b=unal2.copy(), c=unal3.copy(), d=unal4.copy())) -@pytest.mark.parametrize(('idxer', 'exp'), [('a', s1), ('c', s3)]) +@pytest.mark.parametrize(('idxer', 'exp'), [('a', unal1), ('c', unal3)]) def test__getitem_single(idxer, exp): a = d1[idxer] b = d1.loc[:, idxer] @@ -22,31 +19,31 @@ def test__getitem_single(idxer, exp): assert (b == exp).all() -@pytest.mark.parametrize(('idxer', 'exp'), [((1, 'a'), s1), ((3, 'c'), s3)]) +@pytest.mark.parametrize(('idxer', 'exp'), [((1, 'a'), unal1), ((3, 'c'), unal3)]) def test__getitem_scalar_loc(idxer, exp): a = d1.loc[idxer] assert is_scalar(a) assert a == exp.loc[idxer[0]] -@pytest.mark.parametrize(('idxer', 'exp'), [(0, s1), (1, s2), (2, s3), (3, s4), - (-1, s4), (-2, s3), (-3, s2), (-4, s1)]) +@pytest.mark.parametrize(('idxer', 'exp'), [(0, unal1), (1, unal2), (2, unal3), (3, unal4), + (-1, unal4), (-2, unal3), (-3, unal2), (-4, unal1)]) def test__getitem_single_iloc(idxer, exp): a = d1.iloc[:, idxer] assert isinstance(a, pd.Series) assert (a == exp).all() -@pytest.mark.parametrize(('idxer', 'exp'), [((1, 0), s1), ((3, -2), s3), ((-1, -1), s4)]) +@pytest.mark.parametrize(('idxer', 'exp'), [((1, 0), unal1), ((3, -2), unal3), ((-1, -1), unal4)]) def test__getitem_scalar_iloc(idxer, exp): a = d1.iloc[idxer] assert is_scalar(a) assert a == exp.iloc[idxer[0]] -@pytest.mark.parametrize('idxer', ['x', '2', 1, None, ]) +@pytest.mark.parametrize('idxer', ['x', '2', 1000, None, ]) def test__getitem_single_fail(idxer): - with pytest.raises(KeyError): + with pytest.raises((KeyError, ValueError)): a = d1[idxer] print(idxer, a) @@ -63,14 +60,14 @@ def test__getitem_single_iloc_fail(idxer): a = d1.iloc[:, idxer] -@pytest.mark.parametrize('idxer', INDEXERS) -def test__getitem__(idxer): +@pytest.mark.parametrize('idxer', INDEXERS_NOBOOL) +def test__getitem_(idxer): d = d1[idxer] assert isinstance(d, DictOfSeries) @pytest.mark.parametrize('idxer', FAIL_INDEXERS) -def test__getitem__fail(idxer): +def test__getitem_fail(idxer): with pytest.raises((ValueError, KeyError)): d1[idxer] diff --git a/test/test_ops.py b/test/test__ops__.py similarity index 98% rename from test/test_ops.py rename to test/test__ops__.py index 1e835d40b06167bd18a157fc908f9b3285901f5c..67479b0230b7fc29b8dc09ee2bdec432565a17c5 100644 --- a/test/test_ops.py +++ b/test/test__ops__.py @@ -1,6 +1,5 @@ #!/usr/bin/env python from .test_setup import * -from ..dios.operators import * __author__ = "Bert Palm" diff --git a/test/test__setitem__.py b/test/test__setitem__.py index 28dfe6177a2d8ebff1106732e4ef5d97dc471d28..5ec57e6fc5faac7e5b8ac06de4b10d340d2a2bca 100644 --- a/test/test__setitem__.py +++ b/test/test__setitem__.py @@ -1,4 +1,3 @@ -from ..dios import * from .test_setup import * import pytest diff --git a/test/test_df_like.py b/test/test_dflike.py similarity index 98% rename from test/test_df_like.py rename to test/test_dflike.py index d706321cd06d77b2c3a01c7dbb16c981a30d6f57..a0b93a0673b8d5ee8e021c0dbb9c5b7baaaf17b9 100644 --- a/test/test_df_like.py +++ b/test/test_dflike.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import pytest -from ..dios import * +from .test_setup import * import pandas as pd from pandas.core.dtypes.common import is_dict_like, is_nested_list_like diff --git a/test/test_dflike__setget__.py b/test/test_dflike__setget__.py index 453a91490d48b2d66d8a4eb67e222f6b1afb0eda..bfd8640862f2e6bac77c960598827603c5ba4cc7 100644 --- a/test/test_dflike__setget__.py +++ b/test/test_dflike__setget__.py @@ -1,4 +1,3 @@ -from ..dios import * from .test_setup import * import pytest diff --git a/test/test_dios_old.py b/test/test_dios_old.py deleted file mode 100644 index 7e0a024df55c948aecd00435ba8324f2cf1ad0be..0000000000000000000000000000000000000000 --- a/test/test_dios_old.py +++ /dev/null @@ -1,306 +0,0 @@ -from ..dios import * -from ..profiling import * -import pandas as pd -import datetime as dt -import numpy as np -import pytest - -pytestmark = pytest.mark.skip - - -v0 = 'var0' -v1 = 'var1' -v2 = 'var2' -v3 = 'var3' -v4 = 'var4' -v5 = 'var5' -v6 = 'var6' -v7 = 'var7' -v8 = 'var8' -v9 = 'var9' - - -def gen_series(rows, randomize=True): - start = dt.datetime.strptime("2000-01-10 00:00:00", "%Y-%m-%d %H:%M:%S") - if randomize: - sec = np.random.randint(1, 10) - rows = np.random.randint(int(rows * 0.5), rows * 2) - start += pd.Timedelta(sec) - else: - sec = 10 - times = pd.date_range(periods=rows, start=start, freq=f'{sec}s') - d = np.random.randint(1, 9, rows) - return pd.Series(data=d, index=times) - - -def test_getitem(): - # prepare - begin = dt.datetime.strptime("2000-01-10 00:00:00", "%Y-%m-%d %H:%M:%S") - t0 = begin + pd.Timedelta('20s') - t1 = t0 + pd.Timedelta('50s') - dios_aligned = DictOfSeries() - dios_rand = DictOfSeries() - - # fill - vars = 10 - rows = 10 - for i in range(0, vars): - dios_aligned[f'var{i}'] = gen_series(rows, randomize=False) - dios_rand[f'var{i}'] = gen_series(rows, randomize=True) - - # testsets - var = [v0, [], [v1], [v0, v0], [v0, v2]] - tssl = [slice(None), slice(t0, None), slice(None, t1), slice(t0, t1), slice(t0, t0)] - ts = [t0, [], [t0], [t0, t0], [t0, t1]] - - for v in var: - print(v) - dios_rand[v] - - # use aligned dios for time stamps instead of time-ranges - for t in tssl: - print(t) - dios_rand[v] - - try: - dios_aligned[v1, v2] - except KeyError: - pass - - try: - dios_aligned[v1, v2, v3] - except KeyError: - pass - - -def test_setitem(): - # prepare - begin = dt.datetime.strptime("2000-01-10 00:00:00", "%Y-%m-%d %H:%M:%S") - t0 = begin + pd.Timedelta('30s') - t1 = t0 + pd.Timedelta('50s') - dios_aligned = DictOfSeries() - dios_aligned.name = 'aligned' - dios_rand = DictOfSeries() - dios_rand.name = 'rand' - - # fill - vars = 10 - rows = 100 - for i in range(0, vars): - dios_aligned[f'var{i}'] = gen_series(rows, randomize=False) - dios_rand[f'var{i}'] = gen_series(rows, randomize=True) - - # testsets - keys = [v0, [v1], [v0, v0], [v0, v2]] - tssl = [slice(None), slice(t0, None), slice(None, t1), slice(t0, t1), slice(t0, t0)] - scalars = [1, 'deadbeef'] - l = list(np.random.randint(0, 100, rows)) - dios = [dios_aligned.copy(), dios_rand.copy()] - for d in dios: - d.name = 'src-dios' - - # assign scalars - for val in scalars: - for v in keys: - print(v, '=', val) - dios_rand[v] = val - - # assign scalars - for val in scalars: - for t in tssl: - print(t, '=', val) - dios_rand[t] = val - - # assign list - for v in keys: - print(v, '=', l) - dios_aligned[v] = l - - # assign series - for v in keys: - print(v, '=', 'series') - dios_aligned[v] = dios_aligned[v4] - - # assign dios - for v in keys: - for d in dios: - print(f'{v} = dios[{v}]') - dios_aligned[v] = d[v] - - # probkeys = [[], slice(v0, v0), ] - # for v in probkeys: - # try: - # dios_aligned[v] = l - # except ValueError: - # pass - # else: - # raise AssertionError("should return an error") - - -def test_integrity(): - rows = 1000 - cols = 10 - df, _, _, dios = get_testset(1000, 10) - - v = var_prefix + str(np.random.randint(0, cols)) - t = find_index_range(dios) - t0, t1 = gen_random_timestamps(*t) - - # originals - dios_ = dios.copy(deep=False) - df_ = df.copy() - s_ = df[v].dropna() - - # identity - assert (dios_ == dios).all().all() - assert (dios_[v] == dios[v]).all().all() - assert dios_ is not dios - assert dios_[v] is not dios[v] - - # equal t0 df - assert dios[v] is not df[v] - assert (dios[v] == df[v].dropna()).all().all() - - # write - dios = dios_.copy() - s = s_.copy() - dios[t0:t1] = 4 - s[t0:t1] = 4 - assert (dios[v] == s).all() - - # write all - s = s_.copy() - dios = dios_.copy() - dios[v] = 111 - s[:] = 111 - assert (dios[v] == s).all() - - # multi variables - slice - df = df_.copy() - dios = dios_.copy() - li = [v0, v1, v2] - dios[t0:t1] = 222 - for x in li: - s = df[x].dropna() - s[t0:t1] = 222 - assert (dios[x] == s).all() - # on all - dios[t0:t1] = 222.111 - m = df.loc[t0:t1,:].notna() - df[m] = 222.111 - for x in df: - s = df[x].dropna() - assert (dios[x] == s).all() - - # multi variables - list - df = df_.copy() - dios = dios_.copy() - li = [v0, v5, v3, v9] - dios[t0:t1] = 333 - for x in li: - s = df[x].dropna() - s[t0:t1] = 333 - assert (dios[x] == s).all() - - # dios to dios - df = df_.copy() - dios = dios_.copy() - dios[v] = 444 - dios[v5] = dios[v] * 0.1 - s = df[v].dropna() - s[:] = 444 * 0.1 - assert (dios[v5] == s).all() - - -def test_foreach(): - # dios.foreach() is alias for dios.pipe() - - dios = DictOfSeries() - ser = pd.Series([1,4,5,6,3,5,5,3,2,1,13]) - dios['a'] = ser - dios['b'] = ser * 4 - - # return type of function: Series - d = dios.pipe(pd.Series.astype, float) - assert isinstance(d, DictOfSeries) - assert not d['a'].dtype == ser.dtype - assert (d['a'] == ser.astype(float)).all() - - # return type of function: scalar - d = dios.pipe(min) - assert isinstance(d, pd.Series) - assert d['a'] == ser.min() - assert d['b'] == ser.min() * 4 - - # return type of function: scalar - # but with squeeze - d = dios.copy() - del d['a'] - d = d.pipe(min) - assert isinstance(d, np.int64) - assert d == 4 - - # test inplace - d = dios.copy() - d1 = dios.copy() - d2 = d.pipe(pd.Series.drop_duplicates, inplace=False) - # original didtn change - assert d is not d1 and (d == d1).all() - # tests on d2 - assert d2 is not d - assert isinstance(d2, DictOfSeries) - assert (d2['a'] == ser.copy().drop_duplicates()).all() - assert (d2['b'] == (ser * 4 ).copy().drop_duplicates()).all() - - d3 = d.pipe(pd.Series.drop_duplicates, inplace=True) - assert d3 is None - # original did (!) change - assert d is not d1 - try: d == d1 - except ValueError: pass; - else: raise AssertionError - # check if gone well.. - assert (d['a'] == ser.copy().drop_duplicates()).all() - assert (d['b'] == (ser * 4 ).copy().drop_duplicates()).all() - - -def tmptest(): - # prepare - begin = dt.datetime.strptime("2000-01-10 00:00:00", "%Y-%m-%d %H:%M:%S") - t0 = begin + pd.Timedelta('20s') - t1 = t0 + pd.Timedelta('50s') - diosa = DictOfSeries() - diosr = DictOfSeries() - - # fill - vars = 10 - rows = 8 - for i in range(0, vars): - diosa[f'v{i}'] = gen_series(rows, randomize=False) - diosr[f'v{i}'] = gen_series(rows, randomize=True) - - diosa[[], slice(None)] = 99 - exit(9) - - print(diosa) - d = diosa[t0:t1, v1] - d = d * 1000 - print(d) - print() - print(diosa[t0:t1, v1]) - l1 = len(diosa[t0:t1, v1]) - l2 = len(d) - print(l1, l2) - diosa[t0:t1, v1] = d - print(diosa[v1]) - exit(3) - - -if __name__ == '__main__': - dios_options['disp_max_rows'] = 20 - - # tmptest() - test_getitem() - test_setitem() - test_integrity() - test_foreach() diff --git a/test/test_methods.py b/test/test_methods.py index 95dfb347f301550e560f510d54047410a562ee3c..60a2b297c47e3b04733edf0f3b74fc20cd5ebda8 100644 --- a/test/test_methods.py +++ b/test/test_methods.py @@ -24,9 +24,9 @@ def test_copy_copy_empty(getDtDiosAligned): @pytest.mark.parametrize('left', diosFromMatr(DATA_UNALIGNED)) +# we use comp ops just to get some noise in the data @pytest.mark.parametrize('op', OPCOMP) def test_all(left, op): - # we use comp ops just to get some noise in the data a = left ser = (op(a, a)).all() assert isinstance(ser, pd.Series) diff --git a/test/test_setup.py b/test/test_setup.py index b98ace0a58f1322e9f376a756e13406cde578c1d..29e9ffb4094dca67e3a80fd415ddd25318669c07 100644 --- a/test/test_setup.py +++ b/test/test_setup.py @@ -1,6 +1,6 @@ import pytest -from ..dios import * -from ..dios.operators import _OP1_MAP, _OP2_DIV_MAP, _OP2_ARITH_MAP, _OP2_BOOL_MAP, _OP2_COMP_MAP +from dios.dios import * +from dios.operators import OP_MAP, _OP1_MAP, _OP2_DIV_MAP, _OP2_ARITH_MAP, _OP2_BOOL_MAP, _OP2_COMP_MAP import pandas as pd import numpy as np from copy import deepcopy @@ -133,12 +133,13 @@ def dios_eq_df(dios, df, dios_dropped_empty_colums=False, with_msg=False, raisea BLIST = [True, False, False, False, True] * 2 LISTIDXER = [['a'], ['a', 'c'], pd.Series(['a', 'c'])] -BOOLIDXER = [BLIST, pd.Series(BLIST), df__() > 10] +BOOLIDXER = [BLIST, pd.Series(BLIST), pd.Series(BLIST).values] SLICEIDXER = [slice(None), slice(4), slice(-3, -1), slice(-1, 3), slice(None, None, 3)] -MULTIIDXER = [df__() > 9, df__() != df__(), df__() == df__()] +MULTIIDXER = [df__() > 9, df__() != df__(), df__() == df__(), df__() % 3 == 0] EMPTYIDEXER = [[], pd.Series(), slice(3, 3), slice(3, -1), pd.DataFrame(), []] INDEXERS = LISTIDXER + BOOLIDXER + SLICEIDXER + MULTIIDXER + EMPTYIDEXER +INDEXERS_NOBOOL = LISTIDXER + SLICEIDXER + MULTIIDXER + EMPTYIDEXER LOC_L = [slice(None), slice(2, 8), pd.Series(BLIST), BLIST, [6, 5], 2] LOC_R = [slice(None), slice('a', 'c'), pd.Series([False, False, True, False], index=list("abcd")), @@ -149,7 +150,7 @@ IEMPTY = [[], slice(3, 3), slice(3, -1), []] ILOC_L = [slice(None), slice(2, 8), BLIST, ] + ILIST ILOC_R = [slice(None), slice(1, 3), [False, False, True, False], ] + ILIST -FAIL_INDEXERS = [['z'], ['a', 'z'], pd.Series(['a', 'z']), BLIST, pd.DataFrame(dict(a=[1, 2, 3]))] +FAIL_INDEXERS = [['z'], ['a', 'z'], pd.Series(['a', 'z']), pd.DataFrame(dict(a=[1, 2, 3]))] O = [[0, 0, 0], [0, 0, 0]] I = [[1, 1, 1], [1, 1, 1]]