Skip to content
Snippets Groups Projects
Commit 8912d28a authored by Bert Palm's avatar Bert Palm 🎇
Browse files

aloc finished

parent d9b288ab
No related branches found
No related tags found
No related merge requests found
......@@ -33,34 +33,7 @@ Unlike the example says, return lists False, not True
"""
from pandas.core.dtypes.common import is_iterator as _is_iterator
def _is_list_like_not_nested(obj):
return _is_list_like(obj) and not _is_nested_list_like(obj)
def _is_dios_like(obj):
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return isinstance(obj, DictOfSeries) or isinstance(obj, pd.DataFrame)
def _is_bool_series(obj):
return isinstance(obj, pd.Series) and obj.dtype == bool
def __monkey_patch_pandas():
def to_dios(self):
return DictOfSeries(data=self)
pd.Series.to_dios = to_dios
pd.DataFrame.to_dios = to_dios
__monkey_patch_pandas()
from typing import Union, Any
class DictOfSeries:
......@@ -153,7 +126,10 @@ class DictOfSeries:
if columns is None or k in self.columns:
self._insert(k, data[k])
elif _is_list_like(data): # also Series !
elif isinstance(data, pd.Series):
self._insert(data.name or 0, data)
elif _is_list_like(data):
data = data if _is_nested_list_like(data) else [data]
if self.columns.empty:
......@@ -301,17 +277,16 @@ class DictOfSeries:
def _getitem_bool_dios(self, key):
""" Select items by a boolean dios-like drop un-selected indices. """
new = self.copy_empty(columns=True)
if not _is_bool_dios_like(key):
raise ValueError("Must pass DictOfSeries with boolean values only")
new = self.copy_empty(columns=True)
for k in self.columns.intersection(key.columns):
dat = self._data.at[k]
val = key[k]
if not _is_bool_indexer(val):
raise ValueError("Must pass DictOfSeries with boolean values only")
# align rows
idx = val[val].index.intersection(dat.index)
new._data.at[k] = dat[idx]
return new
def _getitem_bool_listlike(self, key):
......@@ -874,3 +849,48 @@ def _to_aligned_df(dios, no_value=' '):
df.loc[nandict[c], c] = np.nan
return df
def _is_list_like_not_nested(obj):
return _is_list_like(obj) and not _is_nested_list_like(obj)
def _is_dios_like(obj) -> bool:
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return isinstance(obj, DictOfSeries) or isinstance(obj, pd.DataFrame)
def _is_bool_series(obj) -> bool:
return isinstance(obj, pd.Series) and obj.dtype == bool
def _is_bool_dios_like(obj) -> bool:
if not _is_dios_like(obj):
return False
dtypes = obj.dtypes
if (dtypes == bool).all():
return True
if (dtypes == 'O').any():
return obj.apply(_is_bool_indexer).all()
return False
def to_dios(obj) -> DictOfSeries:
return DictOfSeries(data=obj)
def __monkey_patch_pandas():
def to_dios(self):
return DictOfSeries(data=self)
pd.Series.to_dios = to_dios
pd.DataFrame.to_dios = to_dios
__monkey_patch_pandas()
......@@ -3,11 +3,13 @@ from .dios import (
_is_dios_like,
_is_bool_series,
_is_list_like_not_nested,
_is_bool_dios_like,
_is_iterator)
import pandas as pd
import pandas.core.common as ccom
import pandas.core.dtypes.common as dcom
_is_list_like = dcom.is_list_like
_is_nested_list_like = dcom.is_nested_list_like
_is_scalar = dcom.is_scalar
......@@ -19,7 +21,7 @@ _is_bool_indexer = ccom.is_bool_indexer
class _Indexer:
def __init__(self, obj):
def __init__(self, obj: DictOfSeries):
self.obj = obj
self._data = obj._data
......@@ -132,7 +134,7 @@ class _LocIndexer(_Indexer):
except Exception as e:
c = data.index[i] if i is not None else '?'
raise type(e)(f"failed for column {c}: " + str(e) ) from e
raise type(e)(f"failed for column {c}: " + str(e)) from e
# #############################################################################
......@@ -259,96 +261,121 @@ class _aLocIndexer(_Indexer):
def __setitem__(self, key, value):
rowkeys, colkeys, _ = self._unpack_key_aloc(key)
c = '?'
try:
# full-alignable: dios/df, align rows and columns of value to ourself
# NOTE: this may shrink columns a third time (1st & 2nd in unpack_key_aloc)
if _is_dios_like(value):
colkeys = value.columns.intersection(colkeys)
for i, c in enumerate(colkeys):
l = self._data.at[c]
r = value[c]
idx = l.loc[rowkeys[i]].index.intersection(r.index)
l[idx] = r[idx]
# row-alignable: given series, align rows of value to every
# (colkeys selected) series in ourself
elif isinstance(value, pd.Series):
r, rindex = value, value.index
for i, c in enumerate(colkeys):
l = self._data.at[c]
idx = l.loc[rowkeys[i]].index.intersection(rindex)
l[idx] = r[idx]
elif _is_nested_list_like(value):
# todo: iterate + enumerate, check length, set
raise NotImplementedError
elif _is_list_like(value):
# todo: iterate columns, check length, set
raise NotImplementedError
def iter_self(colkeys, pos=True):
c = '?'
try:
# if no align is possible, fallback to .loc
else:
for i, c in enumerate(colkeys):
self._data.at[c].loc[rowkeys[i]] = value
dat = self._data.at[c]
rk = rowkeys[i]
if len(dat.loc[rk]) == 0:
continue
yield dat, rk, i if pos else c
except Exception as e:
raise type(e)(f"failed for column {c}: " + str(e)) from e
except Exception as e:
raise type(e)(f"failed for column {c}: " + str(e)) from e
# align columns, for rows use series.loc to align
if _is_dios_like(value):
colkeys = value.columns.intersection(colkeys)
for dat, rk, c in iter_self(colkeys, pos=False):
dat.loc[rk] = value[c]
# align rows by using series.loc
elif isinstance(value, pd.Series):
for dat, rk, _ in iter_self(colkeys):
dat.loc[rk] = value
# no align, no merci
elif _is_nested_list_like(value):
if len(colkeys) != len(value):
raise ValueError(f"shape mismatch: values array of shape "
f"(.., {len(value)}) could not "
f"be broadcast to indexing result of "
f"shape (.., {len(colkeys)})")
for dat, rk, i in iter_self(colkeys):
dat.loc[rk] = value[i]
# no align, no merci
else:
for dat, rk, _ in iter_self(colkeys):
dat.loc[rk] = value
def _unpack_key_aloc(self, key):
"""
Return a list of row indexer and a list of existing(!) column labels.
Both list always have the same length and also could be empty together.
"""
# if a single column-key is given, we will
# return a single Series, instead of a dios
Note:
The items of the row indexer list should be passed to pd.Series.loc[]
"""
# if a single column-key is given, the caller may
# want to return a single Series, instead of a dios
lowdim = False
# multi-dim (var I) depend on the set method
if _is_dios_like(key):
def keys_from_bool_dios_like(key):
if not _is_bool_dios_like(key):
raise ValueError("Must pass dios-like key with boolean "
"values only if passed as single indexer")
colkey = self.obj.columns.intersection(key.columns)
rowkey = []
for c in colkey:
b = key[c]
rowkey += [self._data.at[c].index.intersection(b[b].index)]
return rowkey, colkey, lowdim
# bool dios / df
if self._use_bool_dios:
# todo: use a _is_bool_dioslike() helper function,
# that check for dtype==bool for each series or
# dtype of pd.Dataframe
colkey = self.obj.columns.intersection(key.columns)
rowkey = []
for c in colkey:
b = key[c]
if not _is_bool_indexer(b):
raise ValueError("Must pass dios-like key with boolean "
"values only if passed as single indexer")
rowkey += [self._data.at[c].index.intersection(b[b].index)]
# align any dios-like
else:
colkey = self.obj.columns.intersection(key.columns)
rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey]
def keys_from_dios_like(key):
colkey = self.obj.columns.intersection(key.columns)
rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey]
return rowkey, colkey, lowdim
def keys_from_nested_list(key):
key = key.values if isinstance(key, pd.Series) else key
if len(key) != len(self.obj.columns):
raise ValueError("nested arrays outer length must have same langth than columns.")
colkey = self.obj.columns
rowkey = []
for i, k in colkey:
rowkey.append(self._data.at[k].index.intersection(key[i]))
return rowkey, colkey, lowdim
rowkey, colkey = self._unpack_key(key)
# handle multi-dim keys
if isinstance(key, tuple):
rowkey, colkey = self._unpack_key(key)
# .aloc[any, ...]
# The ellipsis is meant for dios only to indicate
# that alignment of dios is requested, instead of
# using (and checking) it as boolean dios
if colkey is Ellipsis:
if _is_dios_like(rowkey):
return keys_from_dios_like(rowkey)
if _is_nested_list_like(rowkey):
return keys_from_nested_list(rowkey)
colkey = slice(None)
# multi-dim (var II)
if colkey is Ellipsis:
if _is_dios_like(rowkey):
colkey = self.obj.columns.intersection(rowkey.columns)
rowkey = [self._data.at[c].index.intersection(rowkey[c].index) for c in colkey]
return rowkey, colkey, lowdim
# (I) .aloc[dios] -> defaults to (III)
# (II) .aloc(booldios=False)[dios] or
# (III) .aloc(booldios=True)[dios]
elif _is_dios_like(key):
if self._use_bool_dios:
return keys_from_bool_dios_like(key)
else:
colkey = slice(None)
return keys_from_dios_like(key)
elif _is_nested_list_like(key):
return keys_from_nested_list(key)
# a single row indexer (not multi-dim)
# or just some random crap was given
else:
rowkey, colkey = self._unpack_key(key)
# if we come here no more multi-dim keys are allowed
elif _is_dios_like(rowkey):
raise ValueError("Could not index with multi-dimensional "
"row key, if column key is not Ellipsis.")
elif _is_dios_like(colkey):
raise ValueError("Could not index with multi-dimensional "
"column key.")
# all multi-dim indexer was already handled
if _is_dios_like(rowkey) or _is_nested_list_like(rowkey):
raise ValueError("Could not index with multi-dimensional row key"
", if column key is given and is not Ellipsis.")
elif _is_dios_like(colkey) or _is_nested_list_like(colkey):
raise ValueError("Could not index with multi-dimensional column key.")
# handle gratefully: scalar
if _is_hashable(colkey):
......@@ -356,12 +383,11 @@ class _aLocIndexer(_Indexer):
lowdim = True
# column-alignable: list-like, filter only existing columns
elif _is_list_like_not_nested(colkey) and not _is_bool_indexer(colkey):
elif _is_list_like(colkey) and not _is_bool_indexer(colkey):
colkey = colkey.values if isinstance(colkey, pd.Series) else colkey
colkey = self.obj.columns.intersection(colkey)
# not alignable
# fall back to .loc (boolean list/series, slice(..), ...
# not alignable, fall back to .loc (boolean list/series, slice(..), etc.
else:
colkey = self._data.loc[colkey].index
......@@ -381,11 +407,12 @@ class _aLocIndexer(_Indexer):
# handle gratefully: list-like, filter only existing rows
# NOTE: dios.aloc[series.index] is processed here
elif _is_list_like_not_nested(rowkey) and not _is_bool_indexer(rowkey):
elif _is_list_like(rowkey) and not _is_bool_indexer(rowkey):
rowkey = [self._data.at[c].index.intersection(rowkey) for c in colkey]
# not alignable
# fallback to .loc (processed by caller) - (eg. slice(..), boolean list-like, ...)
# the rowkey is processed by .loc someway in
# the calling function - (eg. slice(..), boolean list-like, etc.)
else:
rowkey = [rowkey] * len(colkey)
......@@ -439,5 +466,3 @@ class _iAtIndexer(_Indexer):
if _is_dios_like(value) or _is_nested_list_like(value):
raise TypeError(".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
self._data.iat[key[1]].iat[key[0]] = value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment