From 0432299cf59b8de00cedc5f8b44c5d4de62ea042 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 15 Apr 2020 02:44:32 +0200
Subject: [PATCH] simplified init a bit

---
 dios/dios.py | 65 +++++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/dios/dios.py b/dios/dios.py
index ad16abd..032113b 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -100,20 +100,23 @@ class DictOfSeries:
             self._policy = cast_policy
 
             cols = pd.Index([] if columns is None else columns)
-            self._data = pd.Series(dtype='O', index=cols.unique())
+            if not cols.is_unique:
+                raise ValueError("columns must be unique")
+            self._data = pd.Series(dtype='O', index=cols)
 
             if data is not None:
-                self._init_insert_data(data, index=index)
+                self._init_insert_data(data, columns, index)
 
-            # self._data contain nans at locations, where no
-            # data was present, but a column-name was given
+            # Note: self._data still contain nans at all positions
+            # where no data was present, but a column-name was given
 
             if itype is None:
                 self._itype = _find_least_common_itype(self._data.dropna())
                 if not self._itype.unique:
                     _throw_MixedItype_err_or_warn(self.itype)
 
-        # insert empty columns
+        # finalise data: insert empty
+        # columns at nan positions
         if self._data.hasnans:
             e = pd.Series(dtype='O', index=index)
             for c in self.columns[self._data.isna()]:
@@ -121,41 +124,41 @@ class DictOfSeries:
 
         self._data.index.name = 'columns'
 
-    def _init_insert_data(self, data, index=None):
+    def _init_insert_data(self, data, columns, index):
         """ Insert items of a iterable in self"""
 
-        data = list(data) if _is_iterator(data) else data
-        cols_empty = self.columns.empty
+        if _is_iterator(data):
+            data = list(data)
 
-        if isinstance(data, dict) or _is_dios_like(data):
-            for k in data:
-                if cols_empty or k in self.columns:
-                    s = pd.Series(data[k], index=index)
-                    self._insert(k, s)
+        if _is_dios_like(data) or isinstance(data, dict):
+            if columns is None:
+                pass  # data is dict-like
+            else:
+                data = {k: data[k] for k in data if k in columns}
 
         elif isinstance(data, pd.Series):
             name = data.name or 0
-            if not cols_empty:
-                # overwrite by columns
-                if data.name is None or data.name not in self.columns:
-                    name = self.columns[0]
-            s = pd.Series(data, index=index)
-            self._insert(name, s)
+            if columns is not None and len(columns) > 0:
+                name = self.columns[0]
+            data = {name: data}
+
+        elif _is_nested_list_like(data):
+            if columns is None:
+                data = {i: d for i, d in enumerate(data)}
+            elif len(data) == len(columns):
+                data = dict(zip(self.columns, data))
+            else:
+                raise ValueError(f"{len(columns)} columns passed, data implies {len(data)} columns")
 
         elif _is_list_like(data):
-            data = data if _is_nested_list_like(data) else [data]
-
-            if cols_empty:
-                self._data = pd.Series(dtype='O', index=pd.RangeIndex(len(data)))
+            name = 0 if columns is None or len(columns) < 1 else self.columns[0]
+            data = {name: data}
 
-            elif len(data) != len(self.columns):
-                raise ValueError(f"length of passed values is {len(data)}, columns imply {len(self.columns)}")
-
-            for i, c in enumerate(self.columns):
-                s = pd.Series(data[i], index=index)
-                self._insert(c, s)
         else:
-            raise ValueError(f"data must be some kind of iterable, type {type(data)} was given")
+            raise TypeError("data type not understood")
+
+        for k in data:
+            self._insert(k, pd.Series(data[k], index=index))
 
     def _insert(self, col, val):
         """Insert a fresh new value as pd.Series into self"""
@@ -167,7 +170,7 @@ class DictOfSeries:
                 raise ValueError(f"Cannot insert frame-like with more than one column")
 
         elif val is None:
-            val = pd.Series(val)
+            val = pd.Series()
 
         elif not isinstance(val, pd.Series):
             raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}")
-- 
GitLab