diff --git a/dios/locator.py b/dios/locator.py index 7b891ded593402bdcf3de971c8f9a0cbf2a3ac07..3da7ef91197ae443ed298bb31bf863891911931d 100644 --- a/dios/locator.py +++ b/dios/locator.py @@ -1,4 +1,5 @@ from dios.dios import * +from abc import abstractmethod class _Indexer: @@ -7,15 +8,6 @@ class _Indexer: self._data = _dios._data self._columns = _dios.columns - -class _LocIndexer(_Indexer): - - def __init__(self, _dios): - super().__init__(_dios) - # we can use set item here, as this - # also uses .loc for setting values - self._set_item = _dios._set_item - def __setitem__(self, key, val): keys, rkey, lowdim = self._unpack_key(key) ix, ixalign = self._unpack_rowkey(rkey) @@ -37,7 +29,7 @@ class _LocIndexer(_Indexer): # set series in new dios OR set values in # new series if ix is hashable (see above) for k in keys: - new[k] = self._get_item(self._data.loc[k], ix, ixalign=ixalign) + new[k] = self._get_item(k, ix, ixalign=ixalign) maby_set_series_name(new[k], k) # squeeze to series if a single label was given @@ -47,11 +39,6 @@ class _LocIndexer(_Indexer): return new - def _get_item(self, ser, ix, ixalign=False): - if ixalign: - ix = ser.index.intersection(ix.index) - return ser.loc[ix] - def _unpack_rowkey(self, rkey): align = False if is_dios_like(rkey) or is_nested_list_like(rkey): @@ -62,6 +49,37 @@ class _LocIndexer(_Indexer): rkey, align = rkey[rkey], True # kill `False` return rkey, align + @abstractmethod + def _unpack_key(self, key): + ... + + @abstractmethod + def _get_item(self, key, ix, ixalign=False): + ... + + @abstractmethod + def _set_item(self, ser, ix, val, ixalign=False): + ... + + +class _LocIndexer(_Indexer): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # we can use set item here, as this + # also uses .loc for setting values + + def _set_item(self, *args, **kwargs): + # we can use DictionaryOfSeries._set_item() + # here because it also uses .loc + self._dios._set_item(*args, **kwargs) + + def _get_item(self, key, ix, ixalign=False): + ser = self._data.loc[key] + if ixalign: + ix = ser.index.intersection(ix.index) + return ser.loc[ix] + def _unpack_key(self, key): lowdim = False if isinstance(key, tuple): @@ -70,10 +88,11 @@ class _LocIndexer(_Indexer): raise KeyError("To many indexers") if is_dios_like(ckey): raise ValueError("Cannot index with multidimensional key") - if is_hashable(ckey): - keys = [ckey] - lowdim = True + if is_bool_series(ckey): + keys = ckey.where(ckey).dropna().index.to_list() else: + if is_hashable(ckey): + ckey, lowdim = [ckey], True keys = self._data.loc[ckey].index.to_list() else: keys = self._columns.to_list() @@ -82,108 +101,44 @@ class _LocIndexer(_Indexer): class _iLocIndexer(_Indexer): - def __getitem__(self, key): - rkey, cols, lowdim = self._unpack_key(key) - if is_scalar(rkey[0]): - return self._series(rkey, cols, lowdim) - elif lowdim: - return self._scalar(rkey[0], cols[0]) - else: - new = self._dios.copy_empty() - for i, _ in enumerate(cols): - c, r = cols[i], rkey[i] - new[c] = self._data[c].iloc[r] - return new - - def _series(self, rkey, cols, lowdim): - if lowdim: - return self._scalar(rkey[0], cols[0]) - new = pd.Series() - for c in cols: - try: - new[c] = self._data[c].iloc[rkey] - except KeyError: - new[c] = np.nan - - def _scalar(self, r, c): - return self._data[c].iloc[r] - - def __setitem__(self, key, value): - ixs, keys, _ = self._unpack_key(key) - gen = self._unpack_value(keys, ixs, value) - for tup in gen: - self._set_item_positional(*tup) - raise NotImplemented + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - def _set_item_positional(self, key, ix, val): + def _set_item(self, key, ix, right, ixalign=False): + # we do use loc instead of iloc as we get real keys. + # this works, because keys keep sorted if they come + # from an index from and series (doesn't work with df) ser = self._data[key] - if isinstance(val, pd.Series): - index = ser.iloc[ix].index - index = index.intersection(val.index) - if not index.empty: - ser.loc[index] = val.loc[index].copy() - else: - ser.iloc[ix] = val + if ixalign: + ix = ser.index.intersection(ix.index) + if isinstance(right, pd.Series): + left = ser[ix] + right, ix = align_index_by_policy(left, right) + ser.iloc[ix] = right + + def _get_item(self, key, ix, ixalign=False): + ser = self._data.loc[key] + if ixalign: + ix = ser.index.intersection(ix.index) + return ser.iloc[ix] def _unpack_key(self, key): - # if we have a tuple, we have a rows- and a column-indexer - # if not, we only have a row-indexer and work on all columns lowdim = False if isinstance(key, tuple): - rkey, ckey, *fail = key + key, ckey, *fail = key if fail: raise KeyError("To many indexers") - - # prepare ckey - ckey = list(ckey) if is_iterator(ckey) else ckey - - # determine columns - if is_integer(ckey): - self._check_keys([ckey]) - cols = self._integers_to_col_list([ckey]) - lowdim = True - elif isinstance(ckey, slice): - cols = self._col_slice_to_col_list(ckey) - elif is_list_like(ckey) and not is_nested_list_like(ckey): - arr = np.array(ckey) - if is_bool_array(arr): - raise NotImplementedError - self._check_keys(ckey) - cols = self._integers_to_col_list(ckey) - elif isinstance(ckey, pd.Series): - raise NotImplementedError - elif is_bool_indexer(ckey): - raise NotImplementedError + if is_dios_like(ckey): + raise ValueError("Cannot index with multidimensional key") + if is_bool_series(ckey): + keys = ckey.where(ckey).dropna().index.to_list() else: - raise KeyError(f"{ckey} of type {type(ckey)}") + if is_integer(ckey): + ckey, lowdim = [ckey], True + keys = self._data.iloc[ckey].index.to_list() else: - cols = list(self._data.index) - rkey = key - - # blowup - rkey = [rkey] * len(cols) - return rkey, cols, lowdim - - def _check_keys(self, keys): - bound = len(self._data) - for k in keys: - if not is_integer(k): - raise ValueError(f"{type(k)} is not integer") - if k not in range(-bound, bound): - raise KeyError("positional indexer(s) are out-of-bounds in columns") - - def _integers_to_col_list(self, ints): - klist = list(self._data.index) - ks = set() - for i in ints: - ks.add(klist[i]) - return list(ks) - - def _col_slice_to_col_list(self, sl): - for s in [sl.start, sl.stop, sl.step]: - if not is_integer(s): - raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}") - return list(self._data.index)[sl] + keys = self._columns.to_list() + return keys, key, lowdim def _unpack_value(keys, ix, val): diff --git a/test/test__getitem__.py b/test/test__getitem__.py index 52dd8130ac69981de3ac5e4422c40f3645985cf4..5eab0d82c673089cd7129262c74418f88feaea13 100644 --- a/test/test__getitem__.py +++ b/test/test__getitem__.py @@ -56,7 +56,7 @@ def test__getitem_single_loc_fail(idxer): @pytest.mark.parametrize('idxer', [-5, 99, 'a', '2', None, ]) def test__getitem_single_iloc_fail(idxer): - with pytest.raises(KeyError): + with pytest.raises((KeyError, IndexError, TypeError)): a = d1.iloc[:, idxer] diff --git a/test/test__setitem__.py b/test/test__setitem__.py index 40e79f0556eb6611ff025b86f2656271e99370ac..1c497c545125e6cf26b955f91fd2ec0216d2cb7a 100644 --- a/test/test__setitem__.py +++ b/test/test__setitem__.py @@ -33,40 +33,6 @@ def test__setitem_scalar_loc(idxer, exp): assert ((d[c] == 99) == exp[i]).all() -@pytest.mark.parametrize(('idxer', 'exp'), [(0, s1), (1, s2), (2, s3), (3, s4), - (-1, s4), (-2, s3), (-3, s2), (-4, s1)]) -def test__setitem_single_iloc(idxer, exp): - a = d1.iloc[:, idxer] - assert isinstance(a, pd.Series) - assert (a == exp).all() - - -@pytest.mark.parametrize(('idxer', 'exp'), [((1, 0), s1), ((3, -2), s3), ((-1, -1), s4)]) -def test__setitem_scalar_iloc(idxer, exp): - a = d1.iloc[idxer] - assert is_scalar(a) - assert a == exp.iloc[idxer[0]] - - -@pytest.mark.parametrize('idxer', ['x', '2', 1, None, ]) -def test__setitem_single_fail(idxer): - with pytest.raises(KeyError): - a = d1[idxer] - print(idxer, a) - - -@pytest.mark.parametrize('idxer', ['x', '2', 1, None, ]) -def test__setitem_single_loc_fail(idxer): - with pytest.raises((KeyError, TypeError)): - a = d1.loc[:, idxer] - - -@pytest.mark.parametrize('idxer', [-5, 99, 'a', '2', None, ]) -def test__setitem_single_iloc_fail(idxer): - with pytest.raises(KeyError): - a = d1.iloc[:, idxer] - - BLIST = [True, False, False, True] LISTIDXER = [['a'], ['a', 'c'], pd.Series(['a', 'c'])] @@ -77,10 +43,16 @@ EMPTYIDEXER = [[], pd.Series(), slice(3, 3), slice(3, -1), DictOfSeries()] INDEXERS = LISTIDXER + BOOLIDXER + SLICEIDXER + MULTIIDXER + EMPTYIDEXER +LOC_L = [slice(None), slice(2, 8), pd.Series(BLIST, index=[1, 4, 8, 9]), ] +LOC_R = [slice(None), slice('a', 'c'), pd.Series(BLIST, index=list("baxd")), BLIST, ] + LISTIDXER +VALS = [99, pd.Series(range(4,10), index=range(4,10)), DictOfSeries(dict(a=[33,33,33], b=range(30,90,2)))] -@pytest.mark.parametrize('idxer', INDEXERS) -def test__setitem__(idxer): - d = d1[idxer] +@pytest.mark.parametrize('idxerL', LOC_L) +@pytest.mark.parametrize('idxerR', LOC_R) +@pytest.mark.parametrize('val', VALS) +def test__setitem__loc(idxerL, idxerR, val): + d = d1.copy() + d.loc[idxerL, idxerR] = val assert isinstance(d, DictOfSeries)