From c014a0df4b155bff60780732040baa9ca1a851d2 Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Thu, 12 Mar 2020 14:43:30 +0100 Subject: [PATCH] fine new properties / methods --- dios/dios.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/dios/dios.py b/dios/dios.py index 0675e3d..8117f3c 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -13,7 +13,6 @@ import numpy as np import operator as op import functools as ftools -import itertools import pandas.core.dtypes.common as dcom @@ -127,9 +126,8 @@ class DictOfSeries: if data is not None: self._init_insert_data(data, columns) - # NOTE: self._data contain nans at locations - # where no data was present, but a column-name - # was given + # self._data contain nans at locations, where no + # data was present, but a column-name was given if itype is None: self._itype = _find_least_common_itype(self._data.dropna()) @@ -189,6 +187,48 @@ class DictOfSeries: def columns(self, cols): self._data.index = cols + @property + def indexes(self): + """ Return pd.series with the indexes. """ + return self._fast_apply('index') + + def index_of(self, method='all'): + """ Return an single index with indices from all columns. + + parameter + ---------- + method: string + - 'all' : get all indices from all columns + - 'union' : alias for 'all' + - 'shared' : get indices that are present in every columns + - 'intersection' : alias for 'shared' + - 'uniques' : get indices that are only present in a single column + - 'non-uniques' : get indices that are present in more than one column + + Note + ---- + The returned index never contains duplicates. + """ + + indexes = self.indexes + if len(indexes) <= 1: + return indexes.squeeze() + + if method in ['union', 'all']: + res = ftools.reduce(pd.Index.union, indexes) + elif method == ['intersection', 'shared']: + res = ftools.reduce(pd.Index.intersection, indexes) + elif method in ['uniques', 'non-uniques']: + res = ftools.reduce(pd.Index.append, indexes) + res = res.value_counts(sort=False, dropna=False) + if method == 'uniques': + res = res[res == 1].index + else: + res = res[res > 1].index + else: + raise ValueError(method) + return res if res.is_unique else res.unique() + @property def itype(self): return self._itype @@ -352,6 +392,14 @@ class DictOfSeries: raise ValueError(axis) + @property + def size(self): + return self.lengths.sum() + + @property + def lengths(self): + return self._data.apply(len) + def __len__(self): return len(self.columns) @@ -486,8 +534,8 @@ class DictOfSeries: if not isinstance(d, pd.Series): d = pd.Series(d) data.at[c] = d - new = DictOfSeries(data=data, itype=MixedItype, fastpath=True) - new._itype = new.__find_least_common_itype() + itype = _find_least_common_itype(data) + new = DictOfSeries(data=data, itype=itype, fastpath=True) else: new = pd.Series(data=new, index=self.columns) else: -- GitLab