diff --git a/dios/dios.py b/dios/dios.py index 65a3c4f88445cb33107f4029e3f898a8612ed4bf..cd5e7e151e7c729aa93a442bc6ca8571511cbab3 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -101,7 +101,7 @@ class DictOfSeries: self._data = data else: # it is significantly faster, to provide an index and fill it, - # than to successively build the index by adding series's + # than to successively build the index by adding data self._data = pd.Series(dtype='O', index=columns) else: @@ -416,13 +416,10 @@ class DictOfSeries: if `method` == `indexed` (default): every column is represented by a own index and corresponding values - if `method` == `aligned`: + if `method` == `aligned` [2]: one(!) global index is generated and values from a column appear at the corresponding index-location. - Note: the (common-) params are directly passed to pd.DataFrame.to_string(..) - under the hood - Common params : - max_cols: not more column than `max_cols` are printed [1] - max_rows: see `min_rows` [1] @@ -440,7 +437,10 @@ class DictOfSeries: that this should differ from `na_rep`, otherwise you cannot differ missing- from NaN- values. Notes: - [1] defaults to the corresponding value in `dios_options` + [1]: defaults to the corresponding value in `dios_options` + [2]: the common-params are directly passed to pd.DataFrame.to_string(..) + under the hood, if method is `aligned` + """ if self.empty: return _empty_repr(self) @@ -505,18 +505,36 @@ class DictOfSeries: raise ValueError(axis) return new + def _fast_apply(self, attr, **kwargs): + data = pd.Series(dtype='O', index=self.columns) + call = callable(getattr(pd.Series, attr)) + for c in self.columns: + s = self._data.at[c] + data.at[c] = getattr(s, attr)(**kwargs) if call else getattr(s, attr) + return data + @property def dtypes(self): - s = pd.Series(index=self.columns, dtype='O') - for k in self.columns: - s.at[k] = self._data.at[k].dtype - return s + return self._fast_apply('dtype') def astype(self, dtype, copy=True, errors='raise'): - new = self.copy_empty(columns=True) if copy else self - for k in self.columns: - new._data.at[k] = self._data.at[k].astype(dtype=dtype, copy=copy, errors=errors) - return new + data = self._fast_apply('astype', dtype=dtype, copy=copy, errors=errors) + return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True) + + def isna(self): + data = self._fast_apply('isna') + return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True) + + def notna(self): + data = self._fast_apply('notna') + return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True) + + def dropna(self, inplace=False): + data = self._fast_apply('dropna', inplace=False) # never pass inplace=True + if inplace: + self._data = data + else: + return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True) def memory_usage(self, index=True, deep=False): mem = 0