From 0b51b1770f9f2584ced7b523ac7eab26ca0d54cb Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Sat, 21 Mar 2020 02:28:34 +0100 Subject: [PATCH] some fixes, that came across during integrating dios in saqc --- __init__.py | 3 ++ dios/indexer.py | 114 +++++++++++++++++++++--------------------------- 2 files changed, 52 insertions(+), 65 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..944c1c9 --- /dev/null +++ b/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python + +from .dios import * diff --git a/dios/indexer.py b/dios/indexer.py index 983a1e1..f1a4c43 100644 --- a/dios/indexer.py +++ b/dios/indexer.py @@ -90,12 +90,9 @@ class _Indexer: set_(data, value, iter=True) else: - if _is_list_like(value): - if _is_hashable(colkey) or _is_hashable(rowkey): - pass - else: - raise ValueError("broadcast array-like to multiple columns " - "is not allowed, use '.aloc' for that.") + if _is_list_like(value) and len(data) > 1: + raise ValueError("broadcast array-like to multiple columns " + "is not allowed, use '.aloc' for that.") set_(data, value) @@ -223,14 +220,6 @@ class _iLocIndexer(_Indexer): if _is_dios_like(rowkey) or _is_dios_like(colkey): raise ValueError("Cannot index with multidimensional key") - def set_(data, value, iter=False): - c = "?" - try: - for i, c in enumerate(data.index): - data.at[c].iloc[rowkey] = value[i] if iter else value - except Exception as e: - raise type(e)(f"failed for column {c}: " + str(e)) from e - # .iloc[any, int] = Any if _is_integer(colkey): if _is_dios_like(value) or _is_nested_list_like(value): @@ -259,6 +248,13 @@ class _aLocIndexer(_Indexer): self._usebool = True def __call__(self, usebool=True): + """ We are called if the user want to set `usebool=False', which make + boolean alignable indexer treat as non-boolean alignable indexer. + + Explanation: A boolean dios indexer align its indices with the indices + of the receiving dios like a non-boolean dios indexer also would do. + Additionally all rows with False values are kicked too. To disable + that `usebool=False` can be given.""" self._usebool = usebool return self @@ -338,8 +334,7 @@ class _aLocIndexer(_Indexer): def keys_from_bool_dios_like(key): if not _is_bool_dios_like(key): - raise ValueError("Must pass dios-like key with boolean " - "values only if passed as single indexer") + raise ValueError("Must pass dios-like key with boolean values only.") colkey = self.obj.columns.intersection(key.columns) rowkey = [] for c in colkey: @@ -352,56 +347,30 @@ class _aLocIndexer(_Indexer): rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey] return rowkey, colkey, lowdim - def keys_from_nested_list(key): - key = key.values if isinstance(key, pd.Series) else key - if len(key) != len(self.obj.columns): - raise ValueError("nested arrays outer length must have same langth than columns.") - colkey = self.obj.columns - rowkey = [] - for i, k in colkey: - rowkey.append(self._data.at[k].index.intersection(key[i])) - return rowkey, colkey, lowdim + rowkey, colkey = self._unpack_key(key) - # handle multi-dim keys - if isinstance(key, tuple): - rowkey, colkey = self._unpack_key(key) - # .aloc[any, ...] - # The ellipsis is meant for dios only to indicate - # that alignment of dios is requested, instead of - # using (and checking) it as boolean dios - if colkey is Ellipsis: - if _is_dios_like(rowkey): - return keys_from_dios_like(rowkey) - if _is_nested_list_like(rowkey): - return keys_from_nested_list(rowkey) - colkey = slice(None) - - # (I) .aloc[dios] -> defaults to (III) - # (II) .aloc(booldios=False)[dios] or - # (III) .aloc(booldios=True)[dios] - elif _is_dios_like(key): - if self._usebool: - return keys_from_bool_dios_like(key) - else: - return keys_from_dios_like(key) + if _is_dios_like(colkey) or _is_nested_list_like(colkey): + raise ValueError("Could not index with multi-dimensional column key.") - elif _is_nested_list_like(key): - return keys_from_nested_list(key) + # giving the ellipsis as column key, is an alias + # for giving `usebool=False`. see self.__call__() + if colkey is Ellipsis: + self._usebool = False + colkey = slice(None) - # a single row indexer (not multi-dim) - # or just some random crap was given - else: - rowkey, colkey = self._unpack_key(key) + # .aloc[dios] + if _is_dios_like(rowkey): - # all multi-dim indexer was already handled - if _is_dios_like(rowkey) or _is_nested_list_like(rowkey): - raise ValueError("Could not index with multi-dimensional row key" - ", if column key is given and is not Ellipsis.") - elif _is_dios_like(colkey) or _is_nested_list_like(colkey): - raise ValueError("Could not index with multi-dimensional column key.") + if not _is_null_slice(colkey): + raise ValueError(f"Could not index with a dios-like indexer as rowkey," + f"and a column key of that type {type(colkey)}") + if self._usebool: + return keys_from_bool_dios_like(rowkey) + else: + return keys_from_dios_like(rowkey) # handle gracefully: scalar - if _is_hashable(colkey): + elif _is_hashable(colkey): colkey = [colkey] if colkey in self.obj.columns else [] lowdim = True @@ -422,12 +391,26 @@ class _aLocIndexer(_Indexer): if len(colkey) == 0: # (!) `if not colkey:` fails for pd.Index return [], [], lowdim - # and now... No.1... the larch... - # and now... filter row key + rowkey = self._get_rowkey(rowkey, colkey) + + return rowkey, colkey, lowdim + + def _get_rowkey(self, rowkey, colkey, depth=0): + + if _is_nested_list_like(rowkey) and depth == 0: + rowkey = rowkey.values if isinstance(rowkey, pd.Series) else rowkey + if len(rowkey) != len(colkey): + raise ValueError("Nested arrays indexer must have same (outer) " + "length than the number of selected columns.") + indexer = [] + for i, c in enumerate(colkey): + # recurse to get the row indexer from inner element + indexer += self._get_rowkey(rowkey[i], [c], depth=depth+1) + rowkey = indexer # row-alignable: pd.Series(), align rows to every series in colkey (columns) - if isinstance(rowkey, pd.Series): - if _is_bool_indexer(rowkey) and self._usebool: + elif isinstance(rowkey, pd.Series): + if self._usebool and _is_bool_indexer(rowkey): rowkey = [self._data.at[c].index.intersection(rowkey[rowkey].index) for c in colkey] else: rowkey = [self._data.at[c].index.intersection(rowkey.index) for c in colkey] @@ -447,7 +430,7 @@ class _aLocIndexer(_Indexer): else: rowkey = [rowkey] * len(colkey) - return rowkey, colkey, lowdim + return rowkey # ############################################################################# @@ -497,3 +480,4 @@ class _iAtIndexer(_Indexer): if _is_dios_like(value) or _is_nested_list_like(value): raise TypeError(".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.") self._data.iat[key[1]].iat[key[0]] = value + -- GitLab