diff --git a/dios/dios.py b/dios/dios.py index 4a31acc50a542a0ace4c54f15a02fb81dd44d99e..7a6638aa87fc3021c574c7cc2a5a10bdd2948d22 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -72,7 +72,7 @@ class DictOfSeries: def __init__(self, data=None, columns=None, itype=MixedItype, downcast_policy='save'): - self._data = pd.Series() + self._data = pd.Series(dtype='O') # We need to keep track of the index-type (itype) of every new Series. # If the itypes differ between different series, slicing will almost always fail # (eg. a datetime-like slice cannot work on a numeric index and vice versa). @@ -189,7 +189,7 @@ class DictOfSeries: new = self._getitem_bool_listlike(key) else: # work on columns - new = self.copy_empty() + new = self.copy_empty(columns=False) new._data = self._data[key] return new @@ -198,7 +198,7 @@ class DictOfSeries: if key == slice(None): return self.copy() - new = self.copy_empty() + new = self.copy_empty(columns=False) for k in self.columns: new._data.at[k] = self._data.at[k][key] return new @@ -210,7 +210,7 @@ class DictOfSeries: if not is_bool_indexer(key[k]): raise ValueError("Must pass DictOfSeries with boolean values only") - new = self.copy_empty() + new = self.copy_empty(columns=True) for k in keys: ser = self._data.at[k] boolser = key[k] @@ -220,7 +220,7 @@ class DictOfSeries: return new def _getitem_bool_listlike(self, key): - new = self.copy_empty() + new = self.copy_empty(columns=False) for k in self.columns: new._data.at[k] = self._data.at[k].loc[key] return new @@ -348,7 +348,7 @@ class DictOfSeries: return self.copy(deep=True) def copy(self, deep=True): - new = self.copy_empty() + new = DictOfSeries(itype=self.itype) # We use `_data` here, because all checks are already done. # So this should be much faster, especially, because we use the underlying dict for # getting and setting the values, instead of ``__setitem__`` and ``__getitem__``. @@ -356,10 +356,8 @@ class DictOfSeries: new._data.at[i] = self._data.at[i].copy(deep=deep) return new - def copy_empty(self): - new = DictOfSeries() - new._itype = self.itype - return new + def copy_empty(self, columns=True): + return DictOfSeries(columns=self.columns if columns is True else None, itype=self.itype) def to_df(self): return self._data.apply(lambda s: s).transpose() @@ -399,7 +397,7 @@ class DictOfSeries: pass if need_dios: - dios = self.copy_empty() + dios = self.copy_empty(columns=False) for i, c in enumerate(self.columns): dios[c] = pd.Series(new[i]) new = dios @@ -408,7 +406,7 @@ class DictOfSeries: return new def _op1(self, op): - new = self.copy_empty() + new = self.copy_empty(columns=False) try: for k in self.columns: new[k] = op(self[k]) @@ -448,7 +446,7 @@ class DictOfSeries: else: raise NotImplementedError - new = self if inplace else self.copy_empty() + new = self if inplace else self.copy_empty(columns=True) try: for k, val in gen(): new[k] = val @@ -479,7 +477,8 @@ class DictOfSeries: def pprint(dios, max_rows=10, max_cols=2, delim=' '): if dios.empty: - return "Empty DictionaryOfSeries" + return f"Empty DictionaryOfSeries\n" \ + f"Columns: {dios.columns.to_list()}" sstr = [] cols = list(dios.columns) diff --git a/dios/locator.py b/dios/locator.py index e0eff837f4b8fca9d299121fd1ebfce4619f02e4..332c53093294892a4efc43aaed46fdd722866e1b 100644 --- a/dios/locator.py +++ b/dios/locator.py @@ -49,7 +49,7 @@ class _LocIndexer(_Indexer): if is_hashable(rowkey): new = data else: - new = self._dios.copy_empty() + new = self._dios.copy_empty(columns=False) new._data = data return new @@ -100,7 +100,7 @@ class _iLocIndexer(_Indexer): if is_integer(rowkey): new = data else: - new = self._dios.copy_empty() + new = self._dios.copy_empty(columns=False) new._data = data return new @@ -139,9 +139,57 @@ class _aLocIndexer(_Indexer): raise NotImplementedError def __getitem__(self, key): - # todo - raise NotImplementedError + if is_dios_like(key): + raise NotImplementedError + else: + rowkey, colkey = self._unpack_key(key) + + # filter column key + + # make column-slice from scalar + if is_hashable(colkey): + colkey = [colkey] if colkey in self._dios.columns else [] + + # pd.Series(a=True, b=False, x=True), columns:[a,b,c,d,] -> [a,] + elif is_bool_series(colkey): + colkey = self._dios.columns.intersection(colkey[colkey].index).to_list() + # filter only existing columns from list + elif is_list_like_not_nested(colkey): + colkey = [c for c in self._dios.columns if c in colkey] + + else: + colkey = self._data.loc[colkey].index.to_list() + + # filter row key + + # make row-slice from scalar + if is_hashable(rowkey): + rowkey = [slice(rowkey, rowkey)] * len(colkey) + + # pd.Series(1=True, 4=False, 12=True) + # align every series, in columns + elif is_bool_series(rowkey): + rowkey = rowkey[rowkey] # kill False + rkeys = [] + for c in colkey: + rkeys += [self._data.at[c].index.intersection(rowkey.index)] + rowkey = rkeys + + # filter only existing rows from list + elif is_list_like_not_nested(rowkey): + rkeys = [] + for c in colkey: + rkeys += [self._data.at[c].index.intersection(rowkey)] + rowkey = rkeys + + else: + rowkey = [rowkey] * len(colkey) + + new = self._dios.copy_empty(columns=False) + for i, c in enumerate(colkey): + new._data.at[c] = self._dios.loc[rowkey[i], c] + return new # ############################################################################# diff --git a/dios/options.py b/dios/options.py index 89d24308c4947ac759a192891fd0897c28213fe4..7d202525dd2bdc0ac5d4bedd4a4fd592be3fb4b1 100644 --- a/dios/options.py +++ b/dios/options.py @@ -81,7 +81,7 @@ dios_options = { def align_dioslikes(self, other, nan=np.nan, policy=None): - new = self.copy_empty() + new = self.copy_empty(columns=False) for k in self.columns: left = self.at[k] if k not in other: