diff --git a/dios/dios.py b/dios/dios.py index 63c08bdb8bae21a11613a1d29c8d315bc7b86a93..ad16abdb2c717d51842a0ac7696b62a04c7712b8 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -66,7 +66,7 @@ class DictOfSeries: Policy to use for down-casting an itype. """ - def __init__(self, data=None, columns=None, itype=None, cast_policy='save', fastpath=False): + def __init__(self, data=None, columns=None, index=None, itype=None, cast_policy='save', fastpath=False): # we are called internally if fastpath: @@ -82,22 +82,28 @@ class DictOfSeries: self._data = pd.Series(dtype='O', index=columns) else: + if index is not None and not isinstance(index, pd.Index): + index = pd.Index(index) + # itype=None means infer the itype by the data, so we first set to the highest # possible itype, then insert data, then find the best-fitting itype. - if itype is None: + if itype is None and index is None: self._itype = ObjItype else: - self._itype = get_itype(itype) + if index is not None: + self._itype = get_itype(index) + if itype is not None: + self._itype = get_itype(itype) if cast_policy not in _CAST_POLICIES: raise ValueError(f"downcast_policy must be one of {_CAST_POLICIES}") self._policy = cast_policy - index = pd.Index([] if columns is None else columns) - self._data = pd.Series(dtype='O', index=index.unique()) + cols = pd.Index([] if columns is None else columns) + self._data = pd.Series(dtype='O', index=cols.unique()) if data is not None: - self._init_insert_data(data) + self._init_insert_data(data, index=index) # self._data contain nans at locations, where no # data was present, but a column-name was given @@ -109,14 +115,13 @@ class DictOfSeries: # insert empty columns if self._data.hasnans: - e = pd.Series(dtype='O') + e = pd.Series(dtype='O', index=index) for c in self.columns[self._data.isna()]: self._insert(c, e.copy()) - self._data.name = '_data' self._data.index.name = 'columns' - def _init_insert_data(self, data): + def _init_insert_data(self, data, index=None): """ Insert items of a iterable in self""" data = list(data) if _is_iterator(data) else data @@ -125,7 +130,8 @@ class DictOfSeries: if isinstance(data, dict) or _is_dios_like(data): for k in data: if cols_empty or k in self.columns: - self._insert(k, data[k]) + s = pd.Series(data[k], index=index) + self._insert(k, s) elif isinstance(data, pd.Series): name = data.name or 0 @@ -133,7 +139,8 @@ class DictOfSeries: # overwrite by columns if data.name is None or data.name not in self.columns: name = self.columns[0] - self._insert(name, data) + s = pd.Series(data, index=index) + self._insert(name, s) elif _is_list_like(data): data = data if _is_nested_list_like(data) else [data] @@ -145,7 +152,8 @@ class DictOfSeries: raise ValueError(f"length of passed values is {len(data)}, columns imply {len(self.columns)}") for i, c in enumerate(self.columns): - self._insert(c, data[i]) + s = pd.Series(data[i], index=index) + self._insert(c, s) else: raise ValueError(f"data must be some kind of iterable, type {type(data)} was given") @@ -157,9 +165,13 @@ class DictOfSeries: val = val.squeeze() if not isinstance(val, pd.Series): raise ValueError(f"Cannot insert frame-like with more than one column") - elif not isinstance(val, pd.Series): + + elif val is None: val = pd.Series(val) + elif not isinstance(val, pd.Series): + raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}") + val = cast_to_itype(val, self.itype, policy=self._policy).copy(deep=True) val.name = col self._data.at[col] = val