From c014a0df4b155bff60780732040baa9ca1a851d2 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Thu, 12 Mar 2020 14:43:30 +0100
Subject: [PATCH] fine new properties / methods

---
 dios/dios.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/dios/dios.py b/dios/dios.py
index 0675e3d..8117f3c 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -13,7 +13,6 @@ import numpy as np
 import operator as op
 
 import functools as ftools
-import itertools
 
 import pandas.core.dtypes.common as dcom
 
@@ -127,9 +126,8 @@ class DictOfSeries:
             if data is not None:
                 self._init_insert_data(data, columns)
 
-            # NOTE: self._data contain nans at locations
-            # where no data was present, but a column-name
-            # was given
+            # self._data contain nans at locations, where no
+            # data was present, but a column-name was given
 
             if itype is None:
                 self._itype = _find_least_common_itype(self._data.dropna())
@@ -189,6 +187,48 @@ class DictOfSeries:
     def columns(self, cols):
         self._data.index = cols
 
+    @property
+    def indexes(self):
+        """ Return pd.series with the indexes. """
+        return self._fast_apply('index')
+
+    def index_of(self, method='all'):
+        """ Return an single index with indices from all columns.
+
+        parameter
+        ----------
+        method: string
+            - 'all' : get all indices from all columns
+            - 'union' : alias for 'all'
+            - 'shared' : get indices that are present in every columns
+            - 'intersection' : alias for 'shared'
+            - 'uniques' : get indices that are only present in a single column
+            - 'non-uniques' : get indices that are present in more than one column
+
+        Note
+        ----
+        The returned index never contains duplicates.
+        """
+
+        indexes = self.indexes
+        if len(indexes) <= 1:
+            return indexes.squeeze()
+
+        if method in ['union', 'all']:
+            res = ftools.reduce(pd.Index.union, indexes)
+        elif method == ['intersection', 'shared']:
+            res = ftools.reduce(pd.Index.intersection, indexes)
+        elif method in ['uniques', 'non-uniques']:
+            res = ftools.reduce(pd.Index.append, indexes)
+            res = res.value_counts(sort=False, dropna=False)
+            if method == 'uniques':
+                res = res[res == 1].index
+            else:
+                res = res[res > 1].index
+        else:
+            raise ValueError(method)
+        return res if res.is_unique else res.unique()
+
     @property
     def itype(self):
         return self._itype
@@ -352,6 +392,14 @@ class DictOfSeries:
 
         raise ValueError(axis)
 
+    @property
+    def size(self):
+        return self.lengths.sum()
+
+    @property
+    def lengths(self):
+        return self._data.apply(len)
+
     def __len__(self):
         return len(self.columns)
 
@@ -486,8 +534,8 @@ class DictOfSeries:
                     if not isinstance(d, pd.Series):
                         d = pd.Series(d)
                     data.at[c] = d
-                new = DictOfSeries(data=data, itype=MixedItype, fastpath=True)
-                new._itype = new.__find_least_common_itype()
+                itype = _find_least_common_itype(data)
+                new = DictOfSeries(data=data, itype=itype, fastpath=True)
             else:
                 new = pd.Series(data=new, index=self.columns)
         else:
-- 
GitLab