diff --git a/dios/__init__.py b/dios/__init__.py index 8875553b2e562e32d68485977a997a63e9122eb4..2866f4281a46bc1685369d00b95fe1dfc2b7b57b 100644 --- a/dios/__init__.py +++ b/dios/__init__.py @@ -1,6 +1,3 @@ from dios.dios import * -from dios.profiling.generate_testsets import get_testset, var_prefix - -from dios.profiling.performance import gen_random_timestamps, find_index_range diff --git a/dios/dios.py b/dios/dios.py index c41bc33b2cddad2dd0697eb1ba37d590dadb509d..85d773c089bbf7248425cc89d316cfe846d5d1e0 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -8,14 +8,22 @@ import datetime as dt from collections import OrderedDict from pandas.core.dtypes.common import ( is_list_like, - is_iterator, is_scalar, - is_number, is_integer, ) +from pandas.core.dtypes.common import is_iterator as _is_iterator from pandas.core.indexing import need_slice +def is_iterator(obj): + """ This is only a dummy wrapper, to warn that the docu of this isnt't right. + Unlike the example says, + >>is_iterator([1, 2, 3]) + returns False, not True for lists + """ + return _is_iterator(obj) + + class DictOfSeries: """ DictionaryOfSeries is a collection of pd.Series's which aim to be as close as possible similar to @@ -198,6 +206,9 @@ class DictOfSeries: if isinstance(value, DictOfSeries): self._setitem_dios(keys, slicer, value) else: + if is_iterator(value): + value = list(value) + for k in keys: self._setitem(k, value, slicer) @@ -228,18 +239,19 @@ class DictOfSeries: return sl = sl or slice(None) + item = self._data[key] # label, scalar: dios['a'] = 3.9 or # slice, scalar: dios[0:3] = 4.0 if is_scalar(val): - self._data[key][sl] = val + item[sl] = val # label, list: dios['a'] = [0.0, 0.3, 0.0] # sclice, list: dios[0:3] = [0.0, 0.3, 0.0] elif is_list_like(val): - # ensure same size - if len(self._data[key]) == len(val): - self._data[key][sl] = val + # ensure same size # fixme: is this neccessary, wouldnt pd.Series raise a Valuerror ? + if len(item[sl]) == len(val): + item[sl] = val else: raise ValueError(f'Length of values does not match length of index for the key {key}') else: @@ -255,6 +267,10 @@ class DictOfSeries: def loc(self): return _LocIndexer(self) + @property + def iloc(self): + return _iLocIndexer(self) + def __str__(self): return self.__repr__() @@ -496,17 +512,38 @@ class DictOfSeries: return news.squeeze() -class _LocIndexer: - +class _Indexer: def __init__(self, _dios): self._dios = _dios # short handles self._data = _dios._data + + +class _LocIndexer(_Indexer): + + def __init__(self, _dios): + super().__init__(_dios) self._check_keys = _dios._check_keys def __getitem__(self, key): - # if we have a tuple, we have rows and columns - # if not we have only rows and work on all columns + rkey, cols = self._unpack_key(key) + new = DictOfSeries() + for c in cols: + new[c] = self._data[c].loc[rkey] + return new + + def __setitem__(self, key, value): + rkey, cols = self._unpack_key(key) + # todo: dios -> dios_to_dios, -> series + # scalar, -> automatically + # series, -> automatically + # list_like -> check length + for c in cols: + self._data[c].loc[rkey] = value + + def _unpack_key(self, key): + # if we have a tuple, we have a rows- and a column-indexer + # if not, we only have a row-indexer and work on all columns if isinstance(key, tuple): rkey, ckey, *fail = key if fail: @@ -530,12 +567,7 @@ class _LocIndexer: else: cols = self._data.keys() rkey = key - - # pass the row-key directly to pd.Series.loc[row-key] - new = DictOfSeries() - for c in cols: - new[c] = self._data[c].loc[rkey] - return new + return rkey, cols def _col_slice_to_col_list(self, cslice): """ see here: @@ -552,3 +584,58 @@ class _LocIndexer: return keys[slice(start, stop + 1, cslice.step)] +class _iLocIndexer(_Indexer): + + def __getitem__(self, key): + rkey, cols = self._unpack_key(key) + new = DictOfSeries() + for c in cols: + new[c] = self._data[c].iloc[rkey] + return new + + def _unpack_key(self, key): + # if we have a tuple, we have a rows- and a column-indexer + # if not, we only have a row-indexer and work on all columns + if isinstance(key, tuple): + rkey, ckey, *fail = key + if fail: + raise KeyError("To many indexers") + + # prepare ckey + if is_iterator(ckey): + ckey = list(ckey) + + # determine columns + if is_integer(ckey): + self._check_keys([ckey]) + cols = self._integers_to_col_list([ckey]) + elif isinstance(ckey, slice): + cols = self._col_slice_to_col_list(ckey) + elif is_list_like(ckey): + self._check_keys(ckey) + cols = self._integers_to_col_list(ckey) + else: + raise KeyError(f"Type {type(ckey)} is not supported for indexing on columns.") + else: + cols = self._data.keys() + rkey = key + return rkey, cols + + def _check_keys(self, keys): + bound = len(self._data) + for k in keys: + if k not in range(-bound, bound): + raise KeyError("positional indexer(s) are out-of-bounds in columns") + + def _integers_to_col_list(self, ints): + klist = list(self._data.keys()) + ks = set() + for i in ints: + ks.add(klist[i]) + return ks + + def _col_slice_to_col_list(self, sl): + for s in [sl.start, sl.stop, sl.step]: + if not is_integer(s): + raise TypeError(f"positional indexing with slice must be integers, passed was {s} of {type(s)}") + return list(self._data.keys())[sl] diff --git a/tests/run_dios.py b/tests/run_dios.py index 9cac320ccc62fbee453e6d2ebbf21ba7bb8cdbb0..83fa9bee6a65b1e592ef5bac9c60d3f9b7ff06c3 100644 --- a/tests/run_dios.py +++ b/tests/run_dios.py @@ -9,8 +9,8 @@ if __name__ == '__main__': dios_options[Options.allow_mixed_indextypes] = True a = dios.loc[:] - # df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01')) - # a = df.loc["2000-01-02":] + df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01')) + a = df.iloc[:,0] print(a) exit(4)