From 20cac5df6e3ff5d43b1595ec63b57c2f459c25b3 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Mon, 13 Apr 2020 03:12:30 +0200
Subject: [PATCH] added index to init. make insert in dios more strict.

---
 dios/dios.py | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/dios/dios.py b/dios/dios.py
index 63c08bd..ad16abd 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -66,7 +66,7 @@ class DictOfSeries:
         Policy to use for down-casting an itype.
     """
 
-    def __init__(self, data=None, columns=None, itype=None, cast_policy='save', fastpath=False):
+    def __init__(self, data=None, columns=None, index=None, itype=None, cast_policy='save', fastpath=False):
 
         # we are called internally
         if fastpath:
@@ -82,22 +82,28 @@ class DictOfSeries:
                 self._data = pd.Series(dtype='O', index=columns)
         else:
 
+            if index is not None and not isinstance(index, pd.Index):
+                index = pd.Index(index)
+
             # itype=None means infer the itype by the data, so we first set to the highest
             # possible itype, then insert data, then find the best-fitting itype.
-            if itype is None:
+            if itype is None and index is None:
                 self._itype = ObjItype
             else:
-                self._itype = get_itype(itype)
+                if index is not None:
+                    self._itype = get_itype(index)
+                if itype is not None:
+                    self._itype = get_itype(itype)
 
             if cast_policy not in _CAST_POLICIES:
                 raise ValueError(f"downcast_policy must be one of {_CAST_POLICIES}")
             self._policy = cast_policy
 
-            index = pd.Index([] if columns is None else columns)
-            self._data = pd.Series(dtype='O', index=index.unique())
+            cols = pd.Index([] if columns is None else columns)
+            self._data = pd.Series(dtype='O', index=cols.unique())
 
             if data is not None:
-                self._init_insert_data(data)
+                self._init_insert_data(data, index=index)
 
             # self._data contain nans at locations, where no
             # data was present, but a column-name was given
@@ -109,14 +115,13 @@ class DictOfSeries:
 
         # insert empty columns
         if self._data.hasnans:
-            e = pd.Series(dtype='O')
+            e = pd.Series(dtype='O', index=index)
             for c in self.columns[self._data.isna()]:
                 self._insert(c, e.copy())
 
-        self._data.name = '_data'
         self._data.index.name = 'columns'
 
-    def _init_insert_data(self, data):
+    def _init_insert_data(self, data, index=None):
         """ Insert items of a iterable in self"""
 
         data = list(data) if _is_iterator(data) else data
@@ -125,7 +130,8 @@ class DictOfSeries:
         if isinstance(data, dict) or _is_dios_like(data):
             for k in data:
                 if cols_empty or k in self.columns:
-                    self._insert(k, data[k])
+                    s = pd.Series(data[k], index=index)
+                    self._insert(k, s)
 
         elif isinstance(data, pd.Series):
             name = data.name or 0
@@ -133,7 +139,8 @@ class DictOfSeries:
                 # overwrite by columns
                 if data.name is None or data.name not in self.columns:
                     name = self.columns[0]
-            self._insert(name, data)
+            s = pd.Series(data, index=index)
+            self._insert(name, s)
 
         elif _is_list_like(data):
             data = data if _is_nested_list_like(data) else [data]
@@ -145,7 +152,8 @@ class DictOfSeries:
                 raise ValueError(f"length of passed values is {len(data)}, columns imply {len(self.columns)}")
 
             for i, c in enumerate(self.columns):
-                self._insert(c, data[i])
+                s = pd.Series(data[i], index=index)
+                self._insert(c, s)
         else:
             raise ValueError(f"data must be some kind of iterable, type {type(data)} was given")
 
@@ -157,9 +165,13 @@ class DictOfSeries:
             val = val.squeeze()
             if not isinstance(val, pd.Series):
                 raise ValueError(f"Cannot insert frame-like with more than one column")
-        elif not isinstance(val, pd.Series):
+
+        elif val is None:
             val = pd.Series(val)
 
+        elif not isinstance(val, pd.Series):
+            raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}")
+
         val = cast_to_itype(val, self.itype, policy=self._policy).copy(deep=True)
         val.name = col
         self._data.at[col] = val
-- 
GitLab