diff --git a/dios/dios.py b/dios/dios.py index ad16abdb2c717d51842a0ac7696b62a04c7712b8..032113bbd31da85796ac8f3c956ae209bead787b 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -100,20 +100,23 @@ class DictOfSeries: self._policy = cast_policy cols = pd.Index([] if columns is None else columns) - self._data = pd.Series(dtype='O', index=cols.unique()) + if not cols.is_unique: + raise ValueError("columns must be unique") + self._data = pd.Series(dtype='O', index=cols) if data is not None: - self._init_insert_data(data, index=index) + self._init_insert_data(data, columns, index) - # self._data contain nans at locations, where no - # data was present, but a column-name was given + # Note: self._data still contain nans at all positions + # where no data was present, but a column-name was given if itype is None: self._itype = _find_least_common_itype(self._data.dropna()) if not self._itype.unique: _throw_MixedItype_err_or_warn(self.itype) - # insert empty columns + # finalise data: insert empty + # columns at nan positions if self._data.hasnans: e = pd.Series(dtype='O', index=index) for c in self.columns[self._data.isna()]: @@ -121,41 +124,41 @@ class DictOfSeries: self._data.index.name = 'columns' - def _init_insert_data(self, data, index=None): + def _init_insert_data(self, data, columns, index): """ Insert items of a iterable in self""" - data = list(data) if _is_iterator(data) else data - cols_empty = self.columns.empty + if _is_iterator(data): + data = list(data) - if isinstance(data, dict) or _is_dios_like(data): - for k in data: - if cols_empty or k in self.columns: - s = pd.Series(data[k], index=index) - self._insert(k, s) + if _is_dios_like(data) or isinstance(data, dict): + if columns is None: + pass # data is dict-like + else: + data = {k: data[k] for k in data if k in columns} elif isinstance(data, pd.Series): name = data.name or 0 - if not cols_empty: - # overwrite by columns - if data.name is None or data.name not in self.columns: - name = self.columns[0] - s = pd.Series(data, index=index) - self._insert(name, s) + if columns is not None and len(columns) > 0: + name = self.columns[0] + data = {name: data} + + elif _is_nested_list_like(data): + if columns is None: + data = {i: d for i, d in enumerate(data)} + elif len(data) == len(columns): + data = dict(zip(self.columns, data)) + else: + raise ValueError(f"{len(columns)} columns passed, data implies {len(data)} columns") elif _is_list_like(data): - data = data if _is_nested_list_like(data) else [data] - - if cols_empty: - self._data = pd.Series(dtype='O', index=pd.RangeIndex(len(data))) + name = 0 if columns is None or len(columns) < 1 else self.columns[0] + data = {name: data} - elif len(data) != len(self.columns): - raise ValueError(f"length of passed values is {len(data)}, columns imply {len(self.columns)}") - - for i, c in enumerate(self.columns): - s = pd.Series(data[i], index=index) - self._insert(c, s) else: - raise ValueError(f"data must be some kind of iterable, type {type(data)} was given") + raise TypeError("data type not understood") + + for k in data: + self._insert(k, pd.Series(data[k], index=index)) def _insert(self, col, val): """Insert a fresh new value as pd.Series into self""" @@ -167,7 +170,7 @@ class DictOfSeries: raise ValueError(f"Cannot insert frame-like with more than one column") elif val is None: - val = pd.Series(val) + val = pd.Series() elif not isinstance(val, pd.Series): raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}")