From 7d8e8ef67347fe30a25bbe530ede07dc32d02738 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Mon, 16 Mar 2020 00:34:32 +0100 Subject: [PATCH] fixed setitem value=dios, added .values --- dios/dios.py | 31 +++++++++++++++++++++++++----- dios/indexer.py | 50 +++++++++++++++++++++++++++--------------------- test/run_dios.py | 3 ++- 3 files changed, 56 insertions(+), 28 deletions(-) diff --git a/dios/dios.py b/dios/dios.py index 5ce12b8..8169647 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -140,6 +140,9 @@ class DictOfSeries: for c in self.columns[self._data.isna()]: self._insert(c, e.copy()) + self._data.name = '_data' + self._data.index.name = 'columns' + def _init_insert_data(self, data, columns): """ Insert items of a iterable in self""" @@ -186,6 +189,13 @@ class DictOfSeries: @columns.setter def columns(self, cols): self._data.index = cols + if not self._data.index.is_unique: + raise ValueError("columns index must have unique values") + + @property + def values(self): + s = self._fast_apply('values') + return s.values @property def indexes(self): @@ -325,11 +335,22 @@ class DictOfSeries: self._data.at[k][s.index] = s def _setitem_dios(self, data, value): - keys = self.columns.intersection(data.columns) - for k in keys: - r = value[k] - idx = data._data.at[k].index.intersection(r.index) - self._data.at[k][idx] = r[idx] + if len(data) != len(self.columns): + raise ValueError(f"length of axis 1 of input array does not match length of selected " + f"columns, input length {len(data)}, self length {len(self.columns)}") + + for i, k in enumerate(data): + l, r = data._data.at[k], value[value.columns[i]] + toset = l.index.intersection(r.index) + if not toset.empty: + self._data.at[k][toset] = r[toset] + + # no drop - set Nans + # todrop = l.index.difference(r.index) + # if not todrop.empty: + # optimisation: drop performance depends on + # index length, even if todrop is empty + # self._data.at[k].drop(todrop, inplace=True) @property def loc(self): diff --git a/dios/indexer.py b/dios/indexer.py index afe9425..8eb5ea9 100644 --- a/dios/indexer.py +++ b/dios/indexer.py @@ -19,9 +19,9 @@ _is_bool_indexer = ccom.is_bool_indexer class _Indexer: - def __init__(self, _dios): - self._dios = _dios - self._data = _dios._data + def __init__(self, obj): + self.obj = obj + self._data = obj._data def _unpack_key(self, key): @@ -80,12 +80,17 @@ class _LocIndexer(_Indexer): # .loc[non-scalar, non-scalar] -> dios else: - new = self._dios.copy_empty(columns=False) + new = self.obj.copy_empty(columns=False) new._data = data return new def __setitem__(self, key, value): + # todo: value is dios-like + # if indexed self is dios: dios-val align+cut (df fill Nans) + # if indexed self is dios: ser-val treat as iterable (length must match) pass to .loc + # if indexed self is ser: dios-val raise ValueError + # if indexed self is ser: ser-val align+cut (df fill Nans to this series) rowkey, colkey = self._unpack_key(key) if _is_dios_like(rowkey) or _is_dios_like(colkey): @@ -94,23 +99,23 @@ class _LocIndexer(_Indexer): # .loc[any, scalar] if _is_hashable(colkey): # .loc[dont-care, new-scalar] = val - if colkey not in self._dios.columns: - self._dios._insert(colkey, value) + if colkey not in self.obj.columns: + self.obj._insert(colkey, value) else: self._data.at[colkey].loc[rowkey] = value # .loc[any, non-scalar] else: i = None + data = self._data.loc[colkey] try: - for i, s in enumerate(self._data.loc[colkey]): + for i, s in enumerate(data): s.loc[rowkey] = value except Exception as e: - raise type(e)(f"failed for " - f"column {self._data.loc[colkey].index[i] if i is not None else '?'}: " - f"" + str(e)) from e + c = data.index[i] if i is not None else '?' + raise type(e)(f"failed for column {c}: " + str(e) ) from e # ############################################################################# @@ -150,12 +155,13 @@ class _iLocIndexer(_Indexer): # .iloc[non-int, non-int] -> dios else: - new = self._dios.copy_empty(columns=False) + new = self.obj.copy_empty(columns=False) new._data = data return new def __setitem__(self, key, value): + # todo: value is dios-like see loc rowkey, colkey = self._unpack_key(key) if _is_dios_like(rowkey) or _is_dios_like(colkey): raise ValueError("Cannot index with multidimensional key") @@ -167,15 +173,15 @@ class _iLocIndexer(_Indexer): # .iloc[any, non-int] else: i = None + data = self._data.iloc[colkey] try: - for i, s in enumerate(self._data.iloc[colkey]): + for i, s in enumerate(data): s.iloc[rowkey] = value except Exception as e: - raise type(e)(f"failed for " - f"column {self._data.loc[colkey].index[i] if i is not None else '?'}: " - f"" + str(e)) from e + c = data.index[i] if i is not None else '?' + raise type(e)(f"failed for column {c}: " f"" + str(e)) from e # ############################################################################# @@ -286,14 +292,14 @@ class _aLocIndexer(_Indexer): c = colkeys[0] new = self._data.at[c].loc[rowkeys[0]] else: - new = pd.Series(index=self._dios.iytpe.min_pdindex) + new = pd.Series(index=self.obj.itype.min_pdindex) else: data = pd.Series(dtype='O', index=colkeys) for i, c in enumerate(data.index): data.at[c] = self._data.at[c].loc[rowkeys[i]] - new = DictOfSeries(data=data, itype=self._dios.itype, - cast_policy=self._dios._policy, + new = DictOfSeries(data=data, itype=self.obj.itype, + cast_policy=self.obj._policy, fastpath=True) except Exception as e: @@ -313,7 +319,7 @@ class _aLocIndexer(_Indexer): # dios / df if _is_dios_like(key): - colkey = self._dios.columns.intersection(key.columns) + colkey = self.obj.columns.intersection(key.columns) rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey] else: @@ -321,17 +327,17 @@ class _aLocIndexer(_Indexer): # handle gratefully: scalar if _is_hashable(colkey): - colkey = [colkey] if colkey in self._dios.columns else [] + colkey = [colkey] if colkey in self.obj.columns else [] lowdim = True # column-alignable: dios, only align on columns, ignore rows elif _is_dios_like(colkey): - colkey = self._dios.columns.intersection(colkey.columns) + colkey = self.obj.columns.intersection(colkey.columns) # column-alignable: list-like, filter only existing columns elif _is_list_like_not_nested(colkey) and not _is_bool_indexer(colkey): colkey = colkey.values if isinstance(colkey, pd.Series) else colkey - colkey = self._dios.columns.intersection(colkey) + colkey = self.obj.columns.intersection(colkey) # not alignable # fall back to .loc (boolean list/series, slice(..), ... diff --git a/test/run_dios.py b/test/run_dios.py index 6b9a08e..8160be4 100644 --- a/test/run_dios.py +++ b/test/run_dios.py @@ -7,7 +7,8 @@ if __name__ == '__main__': dios_options[OptsFields.mixed_itype_warn_policy] = Opts.itype_warn print(dios_options) - # df = pd.DataFrame(columns=range(1000)) + df = pd.DataFrame(columns=range(1000)) + pd.Series() # print(df) # exit(99) -- GitLab