Skip to content
Snippets Groups Projects
Commit ca31885e authored by Bert Palm's avatar Bert Palm 🎇
Browse files

create like df

parent 6140bd50
No related branches found
No related tags found
No related merge requests found
from .operators import OP_MAP as _OP_MAP
from .lib import *
from .lib import _CAST_POLICIES, _itype_le, _itype_lt, _throw_MixedItype_err_or_warn
from .lib import (
_CAST_POLICIES,
_itype_le, _itype_lt,
_throw_MixedItype_err_or_warn,
_find_least_common_itype,
)
import pandas as pd
import numpy as np
......@@ -105,10 +110,8 @@ class DictOfSeries:
self._data = pd.Series(dtype='O', index=columns)
else:
self._data = pd.Series(dtype='O', index=pd.Index([]))
# itype=None means infer the itype by the data, so we first set to the highest
# possible itype, then insert data, then find the best-fitting.
# possible itype, then insert data, then find the best-fitting itype.
if itype is None:
self._itype = MixedItype
else:
......@@ -118,53 +121,53 @@ class DictOfSeries:
raise ValueError(f"downcast_policy must be one of {_CAST_POLICIES}")
self._policy = cast_policy
if columns is not None and not _is_list_like_not_nested(columns):
raise TypeError("'columns' must be some kind of list-like collection.")
index = pd.Index([] if columns is None else columns)
self._data = pd.Series(dtype='O', index=index.unique())
if data is not None:
self._init_insert_data(data, columns)
# NOTE: self._data contain nans at locations
# where no data was present, but a column-name
# was given
if itype is None:
self._itype = self.__find_least_common_itype()
self._itype = _find_least_common_itype(self._data.dropna())
if not self._itype.unique:
_throw_MixedItype_err_or_warn(self.itype)
# insert empty series for requested columns
if columns is not None:
# insert empty columns
if self._data.hasnans:
e = pd.Series(dtype='O')
for c in columns:
if fastpath or c not in self.columns:
self._insert(c, e.copy())
for c in self.columns[self._data.isna()]:
self._insert(c, e.copy())
def _init_insert_data(self, data, columns):
def incols(c):
return c in columns if columns is not None else True
""" Insert items of a iterable in self"""
data = list(data) if _is_iterator(data) else data
if isinstance(data, dict) or _is_dios_like(data):
for k in data:
if incols(k):
if columns is None or k in self.columns:
self._insert(k, data[k])
elif _is_list_like(data): # also Series !
data = data if _is_nested_list_like(data) else [data]
if columns is None:
for i, d in enumerate(data):
self._insert(i, d)
else:
if len(data) != len(columns):
raise ValueError(f"length of passed values is {len(data)}, columns imply {len(columns)}")
if self.columns.empty:
self._data = pd.Series(dtype='O', index=pd.RangeIndex(len(data)))
elif len(data) != len(self.columns):
raise ValueError(f"length of passed values is {len(data)}, columns imply {len(self.columns)}")
for i, c in enumerate(columns):
self._insert(c, data[i])
for i, c in enumerate(self.columns):
self._insert(c, data[i])
else:
raise ValueError(f"data must be some kind of iterable, type {type(data)} was given")
def _insert(self, col, val):
"""Insert a fresh new value into self"""
"""Insert a fresh new value as pd.Series into self"""
val = list(val) if _is_iterator(val) else val
if _is_dios_like(val):
......@@ -183,8 +186,8 @@ class DictOfSeries:
return self._data.index
@columns.setter
def columns(self, newindex):
self._data.index = newindex
def columns(self, cols):
self._data.index = cols
@property
def itype(self):
......@@ -205,20 +208,6 @@ class DictOfSeries:
except Exception as e:
raise type(e)(f"Column {c}: " + str(e)) from e
def __find_least_common_itype(self):
itypes = [NumItype, FloatItype, IntItype, DtItype]
tlist = [get_itype(s.index) for s in self._data]
found = MixedItype
if tlist:
for itype in itypes:
for t in tlist:
if _itype_le(t, itype):
continue
break
else:
found = itype
return found
def __getitem__(self, key):
""" dios[key] -> dios/series """
key = list(key) if _is_iterator(key) else key
......
......@@ -135,6 +135,21 @@ def _itype_le(a, b):
return is_itype_like(a, b)
def _find_least_common_itype(iterable_of_series):
itypes = [NumItype, FloatItype, IntItype, DtItype]
tlist = [get_itype(s.index) for s in iterable_of_series]
found = MixedItype
if tlist:
for itype in itypes:
for t in tlist:
if _itype_le(t, itype):
continue
break
else:
found = itype
return found
################################################################################
# Casting
......
......@@ -7,8 +7,6 @@ from pandas.core.dtypes.common import is_dict_like, is_nested_list_like
import numpy as np
from copy import deepcopy
pytestmark = pytest.mark.skip
__author__ = "Bert Palm"
__email__ = "bert.palm@ufz.de"
__copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
......@@ -16,10 +14,13 @@ __copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - U
arr = np.random.rand(8)
TESTDATA = [
None, # empty
arr.copy(), # list
# np.array([arr.copy(), arr.copy(), arr.copy()]), # nested list
dict(a=arr.copy(), b=arr.copy()), # dict
None, # empty # 0
[1], # 1
arr.copy(), # 2
np.array([arr.copy(), arr.copy(), arr.copy()]), # 3 - nested list
range(4), # 4
dict(a=arr.copy(), b=arr.copy()), # 5 dict
pd.DataFrame(dict(a=arr.copy(), b=arr.copy())) # 6 df
]
......@@ -27,12 +28,10 @@ TESTDATA = [
@pytest.mark.parametrize("with_column_param", [False, True])
def test_dios_create(data, with_column_param):
if is_dict_like(data) and with_column_param:
# giving column names in dict-keys and in columns-parameter is special in df
pytest.skip()
data_copy0 = deepcopy(data)
data_copy1 = deepcopy(data)
# create columns list
if with_column_param:
df = pd.DataFrame(data=data_copy0)
col = [f"new_{c}" for c in df]
......@@ -43,16 +42,11 @@ def test_dios_create(data, with_column_param):
# giving nested lists, work different between df and dios
data_copy1 = data_copy1.transpose()
df = pd.DataFrame(data=data_copy1, columns=col)
dios = DictOfSeries(data=data_copy0, columns=col)
assert len(dios.columns) == len(df.columns)
assert np.all(dios.values == df.values)
df = pd.DataFrame(data=data_copy0, columns=col)
dios = DictOfSeries(data=data_copy1, columns=col)
# df columns may not be strings, but dios'es are always
columns = [str(c) for c in df.columns]
assert list(dios.columns) == columns
assert dios.columns.equals(df.columns)
for c in df.columns:
assert np.all(dios[str(c)] == df[c])
assert np.all(dios[c] == df[c].dropna())
......@@ -5,15 +5,24 @@ def test_copy_copy_empty(getDtDiosAligned):
dios = getDtDiosAligned.copy()
shallow = dios.copy(deep=False)
deep = dios.copy(deep=True)
empty = dios.copy_empty()
empty_w_cols = dios.copy_empty(columns=True)
empty_no_cols = dios.copy_empty(columns=False)
assert dios is not shallow
assert dios is not deep
assert dios is not empty
assert dios is not empty_w_cols
assert dios is not empty_no_cols
assert dios.itype == shallow.itype
assert dios.itype == deep.itype
assert dios.itype == empty.itype
for attr in ['itype', '_itype', '_policy', ]:
dios_attr = getattr(dios, attr)
for cop in [shallow, deep, empty_w_cols, empty_no_cols]:
copy_attr = getattr(cop, attr)
assert dios_attr == copy_attr
assert dios.columns.equals(shallow.columns)
assert dios.columns.equals(deep.columns)
assert dios.columns.equals(empty_w_cols.columns)
assert not dios.columns.equals(empty_no_cols.columns)
for i in dios:
assert dios[i].index is shallow[i].index
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment