Skip to content
Snippets Groups Projects
Commit 38a9b89f authored by Bert Palm's avatar Bert Palm 🎇
Browse files

hardcore reduce

parent 1bc35760
No related branches found
No related tags found
1 merge request!2Develop
......@@ -190,7 +190,7 @@ class DictOfSeries:
else:
# work on columns
new = self.copy_empty()
new._data = self._data.loc[key]
new._data = self._data[key]
return new
def _slice(self, key):
......@@ -200,10 +200,7 @@ class DictOfSeries:
new = self.copy_empty()
for k in self.columns:
# we cannot use loc here, because s.loc[:4]
# is inclusive, whereas s[:4] isn't :(
new._data.at[k] = self._data.at[k][key]
return new
def _getitem_bool_dios(self, key):
......@@ -225,8 +222,7 @@ class DictOfSeries:
def _getitem_bool_listlike(self, key):
new = self.copy_empty()
for k in self.columns:
ser = self._data.at[k]
new._data.at[k] = ser.loc[key]
new._data.at[k] = self._data.at[k].loc[key]
return new
def __setitem__(self, key, value):
......@@ -235,25 +231,20 @@ class DictOfSeries:
if isinstance(key, tuple):
raise KeyError(f"{key}. tuples are not allowed")
elif is_hashable(key) and key not in self.columns:
self._insert(key, value)
return
data = self.__getitem__(key)
elif is_hashable(key):
if isinstance(value, pd.Series):
self._insert(key, value)
else:
self._data.at[key].loc[:] = value
if isinstance(data, pd.Series):
# key must be a scalar
assert is_hashable(key)
data.loc[:] = value
self._data.at[key] = data
else:
data = self.__getitem__(key)
assert isinstance(data, self.__class__), f"getitem returned data of type {type(data)}"
elif isinstance(data, self.__class__):
for k in data.columns:
s = data._data.at[k]
s.loc[:] = value
self._data.at[k] = s
else:
raise AssertionError(f"getitem returned data of type {type(data)}")
s[:] = value
self._data.at[k].loc[s.index] = s
@property
def loc(self):
......@@ -337,7 +328,6 @@ class DictOfSeries:
return item in self.columns.copy()
def __delitem__(self, key):
# is 'indexing bug' save see hacking.md
del self._data[key]
def __copy__(self):
......@@ -477,14 +467,17 @@ class DictOfSeries:
def pprint(dios, max_rows=10, max_cols=2, delim=' '):
sstr = []
cols = list(dios.columns)
if dios.empty:
return "Empty DictionaryOfSeries"
sstr = []
cols = list(dios.columns)
for c in dios.columns:
sstr.append(dios[c].to_string(max_rows=max_rows).split('\n'))
if dios[c].empty:
sstr.append(['no data'])
else:
sstr.append(dios[c].to_string(max_rows=max_rows).split('\n'))
maxlen = max([len(x) for x in sstr])
......
......@@ -7,16 +7,8 @@ class _Indexer:
self._dios = _dios
self._data = _dios._data
def _unpack_key(self, key):
# #############################################################################
class _LocIndexer(_Indexer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __getitem__(self, key):
if isinstance(key, tuple):
if len(key) > 2:
raise KeyError("To many indexers")
......@@ -29,96 +21,54 @@ class _LocIndexer(_Indexer):
if is_dios_like(rowkey) or is_dios_like(colkey):
raise ValueError("Cannot index with multidimensional key")
return rowkey, colkey
# #############################################################################
class _LocIndexer(_Indexer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __getitem__(self, key):
rowkey, colkey = self._unpack_key(key)
data = self._data.loc[colkey]
# in any case data is a series now,
# either a column-indexed series of series,
# or a simple single row-indexed series (of values)
if isinstance(data, pd.Series):
# .loc[any, scalar] - got a single row indexed series
if is_hashable(colkey):
new = data.loc[rowkey]
# .loc[scalar, any]
elif is_hashable(rowkey):
# we do not override data directly to may get
# a better fitting series dtype
new = pd.Series(index=type(data.index)([]))
for k in data.index:
s = data.at[k]
new.at[k] = s.loc[rowkey]
# .iloc[:, any] - simple low-cost optimization
elif isinstance(rowkey, slice) and rowkey == slice(None):
new = self._dios.copy_empty()
new._data = data.copy()
# .loc[any, scalar]
if is_hashable(colkey):
new = data.loc[rowkey]
# .loc[non-scalar, non-scalar]
# .loc[any, non-scalar]
else:
for k in data.index:
data.at[k] = data.at[k].loc[rowkey]
if is_hashable(rowkey):
new = data
else:
new = self._dios.copy_empty()
for k in data.index:
new._data.at[k] = data.at[k].loc[rowkey]
else:
raise AssertionError(f"getitem returned data of type {type(data)}")
new._data = data
return new
def __setitem__(self, key, value):
if isinstance(key, tuple):
if len(key) > 2:
raise KeyError("To many indexers")
rowkey, colkey = key
if isinstance(rowkey, tuple):
raise KeyError(f"{key}. tuples are not allowed.")
rowkey, colkey = self._unpack_key(key)
# .loc[any, scalar]
if is_hashable(colkey):
# .loc[dont-care, new-scalar] = val
# if a unknown colkey was given, we insert it and ignore rowkey
if is_hashable(colkey) and colkey not in self._dios.columns:
if colkey not in self._dios.columns:
self._dios._insert(colkey, value)
return
else:
rowkey, colkey = key, slice(None)
# get .loc[any,any] - we use key(!) here
data = self.__getitem__(key)
if is_dios_like(value) or is_nested_list_like(value):
raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
# .loc[scalar, any]
if is_hashable(rowkey):
# .loc[scalar, scalar]
if is_hashable(colkey):
s = self._data.at[colkey]
s.at[rowkey] = value
self._data.at[colkey] = s
# .loc[scalar, non-scalar] - column-labeled series
else:
data.loc[:] = value
for k in data.index:
s = self._data.at[k]
s.at[rowkey] = data.at[k]
self._data.at[k] = s
# .loc[non-scalar, scalar] - single row-labeled series
elif is_hashable(colkey):
data.loc[rowkey] = value
self._data.at[colkey] = data
# .loc[non-scalar, non-scalar]
elif isinstance(data, self._dios.__class__):
for k in data.columns:
s = data._data.at[k]
s.loc[rowkey] = value
self._data.at[k] = s
self._data.at[colkey].loc[rowkey] = value
# .loc[any, non-scalar]
else:
raise AssertionError(f"getitem returned data of type {type(data)}")
for s in self._data.loc[colkey]:
s.loc[rowkey] = value
# #############################################################################
......@@ -130,97 +80,36 @@ class _iLocIndexer(_Indexer):
super().__init__(*args, **kwargs)
def __getitem__(self, key):
if isinstance(key, tuple):
if len(key) > 2:
raise KeyError("To many indexers")
rowkey, colkey = key
else:
rowkey, colkey = key, slice(None)
if isinstance(rowkey, tuple):
raise KeyError(f"{key}. tuples are not allowed.")
if is_dios_like(rowkey) or is_dios_like(colkey):
raise ValueError("Cannot index with multidimensional key")
rowkey, colkey = self._unpack_key(key)
data = self._data.iloc[colkey]
# in any case data is a series now,
# either a column-indexed series of series,
# or a simple single row-indexed series (of values)
if isinstance(data, pd.Series):
# .iloc[any, int] - got a single row indexed series
if is_integer(colkey):
new = data.iloc[rowkey]
# .loc[int, any]
elif is_integer(rowkey):
# we do not override data directly to may get
# a better fitting series dtype
new = pd.Series(index=type(data.index)([]))
for k in data.index:
s = data.at[k]
new.at[k] = s.iloc[rowkey]
# .iloc[:, any] - simple low-cost optimization
elif isinstance(rowkey, slice) and rowkey == slice(None):
new = self._dios.copy_empty()
new._data = data.copy()
# .iloc[any, scalar]
if is_integer(colkey):
new = data.iloc[rowkey]
# .loc[non-int, non-int]
# .iloc[any, non-scalar]
else:
for k in data.index:
data.at[k] = data.at[k].iloc[rowkey]
if is_integer(rowkey):
new = data
else:
new = self._dios.copy_empty()
for k in data.index:
new._data.at[k] = data.at[k].iloc[rowkey]
else:
raise AssertionError(f"getitem returned data of type {type(data)}")
new._data = data
return new
def __setitem__(self, key, value):
if isinstance(key, tuple):
rowkey, colkey = key
else:
rowkey, colkey = key, slice(None)
# get .iloc[any,any] - we use key(!) here
data = self.__getitem__(key)
rowkey, colkey = self._unpack_key(key)
if is_dios_like(value) or is_nested_list_like(value):
raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
# .iloc[scalar, any]
if is_integer(rowkey):
# .iloc[scalar, scalar]
if is_integer(colkey):
s = self._data.iat[colkey]
s.iat[rowkey] = value
self._data.iat[colkey] = s
# .iloc[scalar, non-scalar] - column-labeled series
else:
data.iloc[:] = value
for k in data.index:
s = self._data.at[k]
s.iat[rowkey] = data.at[k]
self._data.at[k] = s
# .iloc[non-scalar, scalar] - single row-labeled series
elif is_integer(colkey):
data.iloc[rowkey] = value
self._data.iat[colkey] = data
# .iloc[non-scalar, non-scalar]
elif isinstance(data, self._dios.__class__):
for k in data.columns:
s = data._data.at[k]
s.iloc[rowkey] = value
self._data.at[k] = s
# .iloc[any, scalar]
if is_integer(colkey):
self._data.iat[colkey].iloc[rowkey] = value
# .iloc[any, non-scalar]
else:
raise AssertionError(f"getitem returned data of type {type(data)}")
for s in self._data.iloc[colkey]:
s.iloc[rowkey] = value
# #############################################################################
......@@ -262,9 +151,7 @@ class _AtIndexer(_Indexer):
self._check_key(key)
if is_dios_like(value) or is_nested_list_like(value):
raise TypeError(".at[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
s = self._data.at[key[1]]
s.at[key[0]] = value
self._data.at[key[1]] = s
self._data.at[key[1]].at[key[0]] = value
# #############################################################################
......@@ -289,9 +176,7 @@ class _iAtIndexer(_Indexer):
self._check_key(key)
if is_dios_like(value) or is_nested_list_like(value):
raise TypeError(".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
s = self._data.iat[key[1]]
s.iat[key[0]] = value
self._data.iat[key[1]] = s
self._data.iat[key[1]].iat[key[0]] = value
# #############################################################################
......
from .test_setup import *
......@@ -3,37 +3,37 @@ from test.test_setup import *
import pytest
def _test(val, exp):
def _test(res, exp):
if isinstance(exp, pd.DataFrame):
assert isinstance(val, DictOfSeries)
assert isinstance(res, DictOfSeries)
if val.empty:
if res.empty:
for c in exp:
assert exp[c].dropna().empty
return
assert (val.columns == exp.columns).all()
assert (res.columns == exp.columns).all()
for c in exp:
l = val[c]
l = res[c]
r = exp[c].dropna()
assert isinstance(l, pd.Series)
assert isinstance(r, pd.Series)
assert (l == r).all()
else:
assert type(exp) == type(val)
assert type(exp) == type(res)
if isinstance(exp, pd.Series):
assert (val == exp.dropna()).all()
assert (res == exp.dropna()).all()
else:
assert val == exp
assert res == exp
@pytest.mark.parametrize('idxer', INDEXERS)
def test_dflike__getitem__(df_, dios_, idxer):
print(idxer)
exp = df_[idxer]
val = dios_[idxer]
_test(val, exp)
res = dios_[idxer]
_test(res, exp)
@pytest.mark.parametrize('locL', LOC_L)
......@@ -42,8 +42,8 @@ def test_dflike__get_loc__(df_, dios_, locL, locR):
print(locL)
print(locR)
exp = df_.loc[locL, locR]
val = dios_.loc[locL, locR]
_test(val, exp)
res = dios_.loc[locL, locR]
_test(res, exp)
@pytest.mark.parametrize('ilocL', ILOC_L)
......@@ -52,29 +52,45 @@ def test_dflike__get_iloc__(df_, dios_, ilocL, ilocR):
print(ilocL)
print(ilocR)
exp = df_.iloc[ilocL, ilocR]
val = dios_.iloc[ilocL, ilocR]
# _test(val, exp)
res = dios_.iloc[ilocL, ilocR]
_test(res, exp)
if isinstance(exp, pd.DataFrame):
assert isinstance(val, DictOfSeries)
if val.empty:
for c in exp:
assert exp[c].dropna().empty
return
VALS = [99, ]
assert (val.columns == exp.columns).all()
for c in exp:
l = val[c]
r = exp[c].dropna()
assert isinstance(l, pd.Series)
assert isinstance(r, pd.Series)
assert (l == r).all()
else:
assert type(exp) == type(val)
@pytest.mark.parametrize('idxer', INDEXERS)
@pytest.mark.parametrize('val', VALS)
def test_dflike__setitem__(df_, dios_, idxer, val):
print(idxer)
exp = df_
res = dios_
exp[idxer] = val
res[idxer] = val
_test(res, exp)
if isinstance(exp, pd.Series):
assert (val == exp.dropna()).all()
else:
assert val == exp
@pytest.mark.parametrize('locL', LOC_L)
@pytest.mark.parametrize('locR', LOC_R)
@pytest.mark.parametrize('val', VALS)
def test_dflike__set_loc__(df_, dios_, locL, locR, val):
print(locL)
print(locR)
exp = df_
res = dios_
exp.loc[locL, locR] = val
res.loc[locL, locR] = val
_test(res, exp)
@pytest.mark.parametrize('ilocL', ILOC_L)
@pytest.mark.parametrize('ilocR', ILOC_R)
@pytest.mark.parametrize('val', VALS)
def test_dflike__set_iloc__(df_, dios_, ilocL, ilocR, val):
print(ilocL)
print(ilocR)
exp = df_
res = dios_
exp.iloc[ilocL, ilocR] = val
res.iloc[ilocL, ilocR] = val
_test(res, exp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment