Skip to content
Snippets Groups Projects
Commit 60dff935 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

iloc done

parent f810894d
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
......@@ -88,9 +88,7 @@ class DictOfSeries:
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
self._itype = None
with reraise("param itype: "):
self.itype = get_itype(itype)
self.itype = get_itype(itype)
if downcast_policy not in CAST_POLICIES:
raise ValueError(f"downcast_policy must be one of {CAST_POLICIES}")
......@@ -105,20 +103,19 @@ class DictOfSeries:
def __init_insert_data__(self, data):
if isinstance(data, DictOfSeries):
for k in data:
self[k] = data[k]
g = ((k, data[k]) for k in data)
else:
if is_iterator(data):
data = list(data)
data = list(data) if is_iterator(data) else data
if is_dict_like(data):
for k in data:
self[k] = data[k]
g = ((k, data[k]) for k in data)
elif is_nested_list_like(data):
for i, d in enumerate(data):
self[str(i)] = d
g = ((str(i), d) for i, d in enumerate(data))
elif is_list_like(data):
self['0'] = data
g = [('0', data)]
else:
raise ValueError(f"init with data of type {type(data)} is not possible.")
for k, val in g:
self[k] = val
return
@property
......@@ -171,15 +168,13 @@ class DictOfSeries:
f"You are hereby warned!")
def __cast_all(self, itype):
for k in self.columns:
with reraise(f"Column {k}: "):
k = '?'
try:
for k in self.columns:
casted = cast_to_itype(self._data[k], itype, policy=self._policy)
self._data[k] = casted
def _check_keys(self, keys):
missing = [k for k in keys if k not in self.columns]
if missing:
raise KeyError(f"{missing} not in index")
self._data[k] = casted
except Exception as e:
raise type(e)(f"Column {k}: " + str(e)) from e
def __getitem__(self, key):
"""
......@@ -196,6 +191,7 @@ class DictOfSeries:
new = self._get_item(key)
else:
raise KeyError(key)
# all other cases
else:
keys, ixs = self._get_keys_and_indexer(key)
new = self.copy_empty()
......@@ -205,7 +201,7 @@ class DictOfSeries:
return new
def _get_item(self, key):
# return always a pd.Series
"""Extract a pd.Series from self"""
return self._data[key]
def __setitem__(self, key, value):
......@@ -229,20 +225,31 @@ class DictOfSeries:
return
else:
k, i = [key], [slice(None)]
# all other cases
else:
k, i = self._get_keys_and_indexer(key)
gen = self._setitem(k, i, value)
gen = self._yield_tuple_to_set(k, i, value)
for tup in gen:
self._set_item(*tup)
def _setitem(self, keys, ixs, val):
"""Return a generator that yield (key, indexer, value) for all keys"""
if is_iterator(val):
val = list(val)
def _set_item(self, key, ix, val):
"Set a value (scalar or list or series)"
ser = self._data[key]
if is_series_like(val):
left = ser[ix]
index = left.index.intersection(val.index)
if not index.empty:
left.loc[index] = val.loc[index].copy()
else:
ser[ix] = val
def _yield_tuple_to_set(self, keys, ixs, val):
"""Return a generator that yield (key, indexer, value) for all keys"""
val = list(val) if is_iterator(val) else val
diosl, dfl, nlistl = is_dios_like(val), is_dataframe_like(val), is_nested_list_like(val)
if diosl or dfl or nlistl and len(val) != len(keys):
if (diosl or dfl or nlistl) and len(val) != len(keys):
raise ValueError(f"could not broadcast input array with length {len(val)}"
f" into dios of length {len(keys)}")
......@@ -258,19 +265,8 @@ class DictOfSeries:
else:
yield key, ix, val
def _set_item(self, key, ix, val):
"Set a value (scalar or list or series)"
ser = self._data[key]
if is_series_like(val):
left = ser[ix]
index = left.index.intersection(val.index)
if not index.empty:
left.loc[index] = val.loc[index].copy()
else:
ser[ix] = val
def _insert(self, key, val):
""""""
"""Insert a fresh new value into self"""
if isinstance(val, DictOfSeries):
val = val.squeeze()
elif is_list_like(val) and not is_nested_list_like(val):
......@@ -283,19 +279,19 @@ class DictOfSeries:
self._data[key] = val.copy(deep=True)
def _get_keys_and_indexer(self, key):
""" Determine keys and indexer
""" Determine keys and indexer by type of key. This does not deal
with single label-access, only higher dimension objects are handled..
Notes:
Which keys we get, depends on the policy in dios_options
Which keys we get, may depend on the policy in dios_options
"""
err_bool = "only boolen values are allowed"
keys = None
indexers = None
blowup = False
# prevent consuming of a generator
if is_iterator(key):
key = list(key)
key = list(key) if is_iterator(key) else key
if isinstance(key, slice):
keys = self.columns
......@@ -445,92 +441,74 @@ class DictOfSeries:
new._itype = self.itype
return new
def anyop(self, op, ):
def _op1(self, op):
new = self.copy_empty()
with reraise(f"'{OP_MAP[op]} dios' failed: "):
try:
for k in self:
new[k] = op(self._data[k])
except Exception as e:
raise type(e)(f"'{OP_MAP[op]} dios' failed: " + str(e)) from e
return new
def foo(self, x):
raise ValueError('dnsjkncsncj')
__neg__ = partialmethod(anyop, op.neg)
__abs__ = partialmethod(anyop, op.abs)
# __invert__ = partialmethod(anyop, op.inv)
__invert__ = partialmethod(anyop, foo)
def _op2(self, other, op, inplace=False):
new = self.copy_empty()
# with index
if isinstance(other, (self.__class__, pd.DataFrame)):
if set(other) != set(self):
raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
for k in self:
left, right = self._data[k], other[k]
l, r = left.align(right, join='inner')
val = op(l, r)
new._data[k] = val
elif isinstance(other, pd.Series):
for k in self:
left, right = self._data[k], other
l, r = left.align(right, join='inner')
val = op(l, r)
new._data[k] = val
# no index
elif is_dict_like(other):
if set(other) != set(self):
raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
for k in self:
new._data[k] = op(self._data[k], other[k])
elif is_nested_list_like(other):
if len(other) != len(self):
raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
for i, k in enumerate(self):
new._data[k] = op(self._data[k], other[i])
def _op2(self, op, other, inplace=False):
def raiseif(cond, s='lenght'):
if cond:
raise ValueError(f"{s} does not match, {s} left: {len(self)}, {s} right: {len(other)} keys")
def gen():
if isinstance(other, (self.__class__, pd.DataFrame)):
raiseif(set(other) != set(self), '#keys')
for k in self.columns:
left, right = self._data[k], other[k]
yield k, op(*(left.align(right, join='inner')))
elif isinstance(other, pd.Series):
for k in self.columns:
left, right = self._data[k], other
yield k, op(*(left.align(right, join='inner')))
elif is_dict_like(other):
raiseif(set(other) != set(self), '#keys')
for k in self.columns:
yield k, op(self._data[k], other[k])
elif is_nested_list_like(other):
raiseif(len(other) != len(self), 'length')
for i, k in enumerate(self.columns):
yield k, op(self._data[k], other[i])
elif is_scalar(other) or is_list_like(other):
for k in self.columns:
yield k, op(self._data[k], other)
else:
raise NotImplementedError
elif is_scalar(other) or is_list_like(other):
for k in self:
new._data[k] = op(self._data[k], other)
else:
return NotImplemented
new = self if inplace else self.copy_empty()
try:
for k, val in gen():
new[k] = val
except Exception as e:
raise type(e)(f"'dios {OP_MAP[op]} other' failed: " + str(e)) from e
return new
def _op2_wrap(op, inplace=False):
def anyop(self, other):
with reraise(f"'dios {OP_MAP[op]} other' failed: "):
return self._op2(other, op, inplace=inplace)
anyop.__name__ = '__' + op.__name__ + '__'
return anyop
# comparision
__eq__ = _op2_wrap(op.eq)
__ne__ = _op2_wrap(op.ne)
__le__ = _op2_wrap(op.le)
__ge__ = _op2_wrap(op.ge)
__lt__ = _op2_wrap(op.lt)
__gt__ = _op2_wrap(op.gt)
__add__ = _op2_wrap(op.add)
__sub__ = _op2_wrap(op.sub)
__mul__ = _op2_wrap(op.mul)
__mod__ = _op2_wrap(op.mod)
__truediv__ = _op2_wrap(op.truediv)
__floordiv__ = _op2_wrap(op.floordiv)
__pow__ = _op2_wrap(op.pow)
__and__ = _op2_wrap(op.and_)
__or__ = _op2_wrap(op.or_)
__xor__ = _op2_wrap(op.xor)
__neg__ = partialmethod(_op1, op.neg)
__abs__ = partialmethod(_op1, op.abs)
__invert__ = partialmethod(_op1, op.inv)
__eq__ = partialmethod(_op2, op.eq)
__ne__ = partialmethod(_op2, op.ne)
__le__ = partialmethod(_op2, op.le)
__ge__ = partialmethod(_op2, op.ge)
__lt__ = partialmethod(_op2, op.lt)
__gt__ = partialmethod(_op2, op.gt)
__add__ = partialmethod(_op2, op.add)
__sub__ = partialmethod(_op2, op.sub)
__mul__ = partialmethod(_op2, op.mul)
__mod__ = partialmethod(_op2, op.mod)
__truediv__ = partialmethod(_op2, op.truediv)
__floordiv__ = partialmethod(_op2, op.floordiv)
__pow__ = partialmethod(_op2, op.pow)
__and__ = partialmethod(_op2, op.and_)
__or__ = partialmethod(_op2, op.or_)
__xor__ = partialmethod(_op2, op.xor)
def squeeze(self):
if len(self) == 1:
return self[self.columns[0]]
else:
return self
return self[self.columns[0]] if len(self) == 1 else self
def memory_usage(self, index=True, deep=False):
mem = 0
......@@ -585,54 +563,21 @@ class DictOfSeries:
return None
return news.squeeze()
# def __find_least_common_itype(self):
# def all_itypes_le(itypes, super_itype):
# for itype in itypes:
# if itype_le(itype, super_itype):
# continue
# return False
# return True
#
# itypes = []
# for k in self.columns:
# itypes.append(get_itype(self._data[k].index))
#
# if not itypes:
# return None
#
# found = None
#
# # check supertypes
# super_itypes = [MixedItype, NumericItype]
# for super_itype in super_itypes:
# if all_itypes_le(itypes, super_itype):
# found = super_itype
# continue
# break
# assert found, "At least this should be MixedItype"
#
# # check base types
# single_itypes = [DatetimeItype, IntegerItype, FloatItype]
# for single_itype in single_itypes:
# if all_itypes_le(itypes, single_itype):
# found = single_itype
# break
# return found
#
class _Indexer:
def __init__(self, _dios):
self._dios = _dios
# short handles
self._data = _dios._data
self._yield_tuple_to_set = _dios._yield_tuple_to_set
class _LocIndexer(_Indexer):
def __init__(self, _dios):
super().__init__(_dios)
self._check_keys = _dios._check_keys
self._set_item = _dios._set_item
self._get_keys_and_indexer = _dios._get_keys_and_indexer
def __getitem__(self, key):
rkey, cols = self._unpack_key(key)
......@@ -642,15 +587,10 @@ class _LocIndexer(_Indexer):
return new
def __setitem__(self, key, value):
rkey, cols = self._unpack_key(key)
# todo: dios -> dios_to_dios, -> series
# scalar, -> automatically
# series, -> automatically
# list_like -> check length
for c in cols:
self._data[c].loc[rkey] = value
# todo loc.__setitem__(self, key, value):
return NotImplemented
ixs, keys = self._unpack_key(key)
gen = self._yield_tuple_to_set(keys, ixs, value)
for tup in gen:
self._set_item(*tup)
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
......@@ -661,23 +601,26 @@ class _LocIndexer(_Indexer):
raise KeyError("To many indexers")
# prepare ckey
if is_iterator(ckey):
ckey = list(ckey)
ckey = list(ckey) if is_iterator(ckey) else ckey
# determine columns
if is_dataframe_like(ckey) or is_nested_list_like(ckey) or is_dios_like(ckey):
raise ValueError("Cannot index with multidimensional key")
if isinstance(ckey, str):
self._check_keys([ckey])
cols = [ckey]
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey):
self._check_keys(ckey)
cols = ckey
else:
raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
try:
# list and bool list like
cols, _ = self._get_keys_and_indexer(key)
except Exception:
raise
else:
cols = self._data.keys()
rkey = key
# blowup
rkey = [rkey] * len(cols)
return rkey, cols
def _col_slice_to_col_list(self, cslice):
......@@ -689,9 +632,9 @@ class _LocIndexer(_Indexer):
start = keys.index(cslice.start) if cslice.start is not None else None
stop = keys.index(cslice.stop) if cslice.stop is not None else None
except ValueError:
raise KeyError("The slice start label or the slice stop label is not present in the columns.")
if not is_integer(cslice.step) and cslice.step > 0:
raise TypeError("The step parameter of the slice must be positive integer.")
raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
if not is_integer(cslice.step) or cslice.step <= 0:
return []
return keys[slice(start, stop + 1, cslice.step)]
......@@ -705,9 +648,24 @@ class _iLocIndexer(_Indexer):
return new
def __setitem__(self, key, value):
# todo iloc.__setitem__(self, key, value):
ixs, keys = self._unpack_key(key)
gen = self._yield_tuple_to_set(keys, ixs, value)
for tup in gen:
self._set_item_positional(*tup)
raise NotImplemented
def _set_item_positional(self, key, ix, val):
ser = self._data[key]
if is_series_like(val):
index = ser.iloc[ix].index
index = index.intersection(val.index)
if not index.empty:
ser.loc[index] = val.loc[index].copy()
else:
ser.iloc[ix] = val
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
......@@ -717,8 +675,7 @@ class _iLocIndexer(_Indexer):
raise KeyError("To many indexers")
# prepare ckey
if is_iterator(ckey):
ckey = list(ckey)
ckey = list(ckey) if is_iterator(ckey) else ckey
# determine columns
if is_integer(ckey):
......@@ -726,11 +683,18 @@ class _iLocIndexer(_Indexer):
cols = self._integers_to_col_list([ckey])
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey):
elif is_list_like(ckey) and not is_nested_list_like(ckey):
arr = np.array(ckey)
if is_bool_array(arr):
raise NotImplementedError
self._check_keys(ckey)
cols = self._integers_to_col_list(ckey)
elif is_series_like(ckey):
raise NotImplementedError
elif is_bool_indexer(ckey):
raise NotImplementedError
else:
raise KeyError(f"Type {type(ckey)} is not supported for indexing on columns.")
raise KeyError(f"{ckey} of type {type(ckey)}")
else:
cols = self._data.keys()
rkey = key
......@@ -739,6 +703,8 @@ class _iLocIndexer(_Indexer):
def _check_keys(self, keys):
bound = len(self._data)
for k in keys:
if not is_integer(k):
raise ValueError(f"{type(k)} is not integer")
if k not in range(-bound, bound):
raise KeyError("positional indexer(s) are out-of-bounds in columns")
......@@ -752,5 +718,5 @@ class _iLocIndexer(_Indexer):
def _col_slice_to_col_list(self, sl):
for s in [sl.start, sl.stop, sl.step]:
if not is_integer(s):
raise TypeError(f"positional indexing with slice must be integers, passed was {s} of {type(s)}")
raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
return list(self._data.keys())[sl]
......@@ -5,12 +5,12 @@ import contextlib
import operator as op
@contextlib.contextmanager
def reraise(prefix="", postfix=""):
try:
yield
except Exception as e:
raise type(e)(prefix + str(e) + postfix) from e
# @contextlib.contextmanager
# def reraise(prefix="", postfix=""):
# try:
# yield
# except Exception as e:
# raise type(e)(prefix + str(e) + postfix) from e
......
......@@ -5,12 +5,13 @@ import numpy as np
if __name__ == '__main__':
# dios_options[Options.mixed_itype_policy] = 'error'
df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
df[[True, False]]
# df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
# df[[True, False]]
dios = DictOfSeries(data=[234.54, 5, 5, 4, np.nan, 5, 4, 5])
dios = abs(-dios)
dios = abs(~dios)
print(all(dios == dios))
dtser = pd.Series([2,4,4123,122,4], index=pd.date_range(freq='1d', periods=5, start='2000-01-01'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment