Skip to content
Snippets Groups Projects
Commit 26cc1ebf authored by Bert Palm's avatar Bert Palm 🎇
Browse files

loc done

parent d57bcfe9
No related branches found
No related tags found
1 merge request!2Develop
......@@ -283,8 +283,8 @@ class DictOfSeries:
# bool indexer
# ------------
# bool indexer always work on rows, so they need to have
# an index wo which we can align to. This is necessary
# because we can hold series of different indices.
# an index, to which we can align to. This is necessary
# because we could hold series of different lenght/indexes.
if is_bool_indexer(key):
if not isinstance(key, pd.Series):
raise ValueError("Must pass Series with boolean values only")
......@@ -321,10 +321,12 @@ class DictOfSeries:
@property
def loc(self):
from dios.locator import _LocIndexer
return _LocIndexer(self)
@property
def iloc(self):
from dios.locator import _iLocIndexer
return _iLocIndexer(self)
def __str__(self):
......@@ -552,202 +554,3 @@ class DictOfSeries:
if len(news) == 0:
return None
return news.squeeze()
class _Indexer:
def __init__(self, _dios):
self._dios = _dios
self._data = _dios._data
self._unpack_value = _dios._unpack_value
class _LocIndexer(_Indexer):
def __init__(self, _dios):
super().__init__(_dios)
self._set_item = _dios._set_item
def __getitem__(self, key):
rkey, cols, lowdim = self._unpack_key(key)
if is_scalar(rkey[0]):
return self._series(rkey, cols, lowdim)
elif lowdim:
return self._scalar(rkey[0], cols[0])
else:
new = self._dios.copy_empty()
for i, _ in enumerate(cols):
c, r = cols[i], rkey[i]
new[c] = self._data[c].loc[r]
return new
def _series(self, rkey, cols, lowdim):
if lowdim:
return self._scalar(rkey[0], cols[0])
new = pd.Series()
for c in cols:
try:
new[c] = self._data[c].loc[rkey]
except KeyError:
new[c] = np.nan
def _scalar(self, r, c):
return self._data[c].loc[r]
def __setitem__(self, key, value):
ixs, keys, _ = self._unpack_key(key)
gen = self._unpack_value(keys, ixs, value)
for tup in gen:
self._set_item(*tup)
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
lowdim = False
if isinstance(key, tuple):
rkey, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
# prepare ckey
ckey = list(ckey) if is_iterator(ckey) else ckey
# determine columns
if is_nested_list_like(ckey) or is_dios_like(ckey):
raise ValueError("Cannot index with multidimensional key")
if isinstance(ckey, str):
cols = [ckey]
lowdim = True
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
else:
try:
# list, boolean-list or series
cols, *_ = self._dios._unpack_key(ckey)
except Exception as e:
raise e
else:
cols = list(self._data.index)
rkey = key
# blowup
rkey = [rkey] * len(cols)
return rkey, cols, lowdim
def _col_slice_to_col_list(self, cslice):
""" see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys = list(self._data.index)
try:
start = keys.index(cslice.start) if cslice.start is not None else None
stop = keys.index(cslice.stop) if cslice.stop is not None else None
except ValueError:
raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
if not is_integer(cslice.step) or cslice.step <= 0:
return []
return keys[slice(start, stop + 1, cslice.step)]
class _iLocIndexer(_Indexer):
def __getitem__(self, key):
rkey, cols, lowdim = self._unpack_key(key)
if is_scalar(rkey[0]):
return self._series(rkey, cols, lowdim)
elif lowdim:
return self._scalar(rkey[0], cols[0])
else:
new = self._dios.copy_empty()
for i, _ in enumerate(cols):
c, r = cols[i], rkey[i]
new[c] = self._data[c].iloc[r]
return new
def _series(self, rkey, cols, lowdim):
if lowdim:
return self._scalar(rkey[0], cols[0])
new = pd.Series()
for c in cols:
try:
new[c] = self._data[c].iloc[rkey]
except KeyError:
new[c] = np.nan
def _scalar(self, r, c):
return self._data[c].iloc[r]
def __setitem__(self, key, value):
ixs, keys, _ = self._unpack_key(key)
gen = self._unpack_value(keys, ixs, value)
for tup in gen:
self._set_item_positional(*tup)
raise NotImplemented
def _set_item_positional(self, key, ix, val):
ser = self._data[key]
if isinstance(val, pd.Series):
index = ser.iloc[ix].index
index = index.intersection(val.index)
if not index.empty:
ser.loc[index] = val.loc[index].copy()
else:
ser.iloc[ix] = val
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
lowdim = False
if isinstance(key, tuple):
rkey, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
# prepare ckey
ckey = list(ckey) if is_iterator(ckey) else ckey
# determine columns
if is_integer(ckey):
self._check_keys([ckey])
cols = self._integers_to_col_list([ckey])
lowdim = True
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey) and not is_nested_list_like(ckey):
arr = np.array(ckey)
if is_bool_array(arr):
raise NotImplementedError
self._check_keys(ckey)
cols = self._integers_to_col_list(ckey)
elif isinstance(ckey, pd.Series):
raise NotImplementedError
elif is_bool_indexer(ckey):
raise NotImplementedError
else:
raise KeyError(f"{ckey} of type {type(ckey)}")
else:
cols = list(self._data.index)
rkey = key
# blowup
rkey = [rkey] * len(cols)
return rkey, cols, lowdim
def _check_keys(self, keys):
bound = len(self._data)
for k in keys:
if not is_integer(k):
raise ValueError(f"{type(k)} is not integer")
if k not in range(-bound, bound):
raise KeyError("positional indexer(s) are out-of-bounds in columns")
def _integers_to_col_list(self, ints):
klist = list(self._data.index)
ks = set()
for i in ints:
ks.add(klist[i])
return list(ks)
def _col_slice_to_col_list(self, sl):
for s in [sl.start, sl.stop, sl.step]:
if not is_integer(s):
raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
return list(self._data.index)[sl]
from dios.dios import *
class _Indexer:
def __init__(self, _dios):
self._dios = _dios
self.columns = _dios.columns
self._data = _dios._data
# self._unpack_value = _dios._unpack_value
class _LocIndexer(_Indexer):
def __init__(self, _dios):
super().__init__(_dios)
self._set_item = _dios._set_item
def _series(self, rkey, cols, lowdim):
if lowdim:
return self._scalar(rkey[0], cols[0])
new = pd.Series()
for c in cols:
try:
new[c] = self._data[c].loc[rkey]
except KeyError:
new[c] = np.nan
def _scalar(self, r, c):
return self._data[c].loc[r]
def __setitem__(self, key, value):
data, rkey = self._getdata(key)
if data.empty:
return
if isinstance(data, pd.Series):
pass
def __getitem__(self, key):
data, rkey, lowdim = self._getdata(key)
colseries = is_hashable(rkey)
if data.empty:
if colseries:
data.name = rkey
return data # a empty Series
return self._dios.copy_empty()
if colseries:
new = pd.Series()
else:
new = self._dios.copy_empty()
if lowdim:
return data.loc[rkey]
for s in data.index:
new[s] = data[s].loc[rkey]
return new
def _getdata(self, key):
lowdim = False
if isinstance(key, tuple):
key, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
if is_dios_like(ckey):
raise ValueError("Cannot index with multidimensional key")
if is_hashable(ckey):
lowdim = True
try:
data = self._data.loc[ckey]
except Exception as e:
raise e
else:
data = self._data
return data, key, lowdim
def _col_slice_to_col_list(self, cslice):
""" see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys = list(self._data.index)
try:
start = keys.index(cslice.start) if cslice.start is not None else None
stop = keys.index(cslice.stop) if cslice.stop is not None else None
except ValueError:
raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
if not is_integer(cslice.step) or cslice.step <= 0:
return []
return keys[slice(start, stop + 1, cslice.step)]
class _iLocIndexer(_Indexer):
def __getitem__(self, key):
rkey, cols, lowdim = self._unpack_key(key)
if is_scalar(rkey[0]):
return self._series(rkey, cols, lowdim)
elif lowdim:
return self._scalar(rkey[0], cols[0])
else:
new = self._dios.copy_empty()
for i, _ in enumerate(cols):
c, r = cols[i], rkey[i]
new[c] = self._data[c].iloc[r]
return new
def _series(self, rkey, cols, lowdim):
if lowdim:
return self._scalar(rkey[0], cols[0])
new = pd.Series()
for c in cols:
try:
new[c] = self._data[c].iloc[rkey]
except KeyError:
new[c] = np.nan
def _scalar(self, r, c):
return self._data[c].iloc[r]
def __setitem__(self, key, value):
ixs, keys, _ = self._unpack_key(key)
gen = self._unpack_value(keys, ixs, value)
for tup in gen:
self._set_item_positional(*tup)
raise NotImplemented
def _set_item_positional(self, key, ix, val):
ser = self._data[key]
if isinstance(val, pd.Series):
index = ser.iloc[ix].index
index = index.intersection(val.index)
if not index.empty:
ser.loc[index] = val.loc[index].copy()
else:
ser.iloc[ix] = val
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
lowdim = False
if isinstance(key, tuple):
rkey, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
# prepare ckey
ckey = list(ckey) if is_iterator(ckey) else ckey
# determine columns
if is_integer(ckey):
self._check_keys([ckey])
cols = self._integers_to_col_list([ckey])
lowdim = True
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey) and not is_nested_list_like(ckey):
arr = np.array(ckey)
if is_bool_array(arr):
raise NotImplementedError
self._check_keys(ckey)
cols = self._integers_to_col_list(ckey)
elif isinstance(ckey, pd.Series):
raise NotImplementedError
elif is_bool_indexer(ckey):
raise NotImplementedError
else:
raise KeyError(f"{ckey} of type {type(ckey)}")
else:
cols = list(self._data.index)
rkey = key
# blowup
rkey = [rkey] * len(cols)
return rkey, cols, lowdim
def _check_keys(self, keys):
bound = len(self._data)
for k in keys:
if not is_integer(k):
raise ValueError(f"{type(k)} is not integer")
if k not in range(-bound, bound):
raise KeyError("positional indexer(s) are out-of-bounds in columns")
def _integers_to_col_list(self, ints):
klist = list(self._data.index)
ks = set()
for i in ints:
ks.add(klist[i])
return list(ks)
def _col_slice_to_col_list(self, sl):
for s in [sl.start, sl.stop, sl.step]:
if not is_integer(s):
raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
return list(self._data.index)[sl]
......@@ -5,10 +5,9 @@ import numpy as np
if __name__ == '__main__':
# dios_options[OptsFields.mixed_itype_policy] = 'error'
df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1y', start='2000-01-01'))
# df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1y', start='2000-01-01'))
# df[[True, False]]
df1 = pd.DataFrame(dict(a=range(5), b=range(0,50,10)))
df2 = pd.DataFrame(dict(b=[99], a=[888732727]), index=range(3,8))
d1 = DictOfSeries(df1)
......@@ -17,12 +16,12 @@ if __name__ == '__main__':
df1.to_string()
d = DictOfSeries(dict(a=[1,2], b=[12,38,32,32,323], ss=[2,23,3,2,3,], z=pd.Series([1,2,3], index=list("abc"))))
d['ss'].index = df.index
# d=DictOfSeries(df)
d1[:] = d2
print(d)
print(d1)
print(df1)
print(d, type(d))
d = d.loc[:,:]
print(d, type(d))
a = d.loc[:,'a']
print(a, type(a))
x = d.loc[1,['a', 'ss', 'z']]
print(x, type(x))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment