From 0b51b1770f9f2584ced7b523ac7eab26ca0d54cb Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Sat, 21 Mar 2020 02:28:34 +0100
Subject: [PATCH] some fixes, that came across during integrating dios in saqc

---
 __init__.py     |   3 ++
 dios/indexer.py | 114 +++++++++++++++++++++---------------------------
 2 files changed, 52 insertions(+), 65 deletions(-)
 create mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..944c1c9
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python
+
+from .dios import *
diff --git a/dios/indexer.py b/dios/indexer.py
index 983a1e1..f1a4c43 100644
--- a/dios/indexer.py
+++ b/dios/indexer.py
@@ -90,12 +90,9 @@ class _Indexer:
             set_(data, value, iter=True)
 
         else:
-            if _is_list_like(value):
-                if _is_hashable(colkey) or _is_hashable(rowkey):
-                    pass
-                else:
-                    raise ValueError("broadcast array-like to multiple columns "
-                                     "is not allowed, use '.aloc' for that.")
+            if _is_list_like(value) and len(data) > 1:
+                raise ValueError("broadcast array-like to multiple columns "
+                                 "is not allowed, use '.aloc' for that.")
             set_(data, value)
 
 
@@ -223,14 +220,6 @@ class _iLocIndexer(_Indexer):
         if _is_dios_like(rowkey) or _is_dios_like(colkey):
             raise ValueError("Cannot index with multidimensional key")
 
-        def set_(data, value, iter=False):
-            c = "?"
-            try:
-                for i, c in enumerate(data.index):
-                    data.at[c].iloc[rowkey] = value[i] if iter else value
-            except Exception as e:
-                raise type(e)(f"failed for column {c}: " + str(e)) from e
-
         # .iloc[any, int] = Any
         if _is_integer(colkey):
             if _is_dios_like(value) or _is_nested_list_like(value):
@@ -259,6 +248,13 @@ class _aLocIndexer(_Indexer):
         self._usebool = True
 
     def __call__(self, usebool=True):
+        """ We are called if the user want to set `usebool=False', which make
+        boolean alignable indexer treat as non-boolean alignable indexer.
+
+        Explanation: A boolean dios indexer align its indices with the indices
+        of the receiving dios like a non-boolean dios indexer also would do.
+        Additionally all rows with False values are kicked too. To disable
+         that `usebool=False` can be given."""
         self._usebool = usebool
         return self
 
@@ -338,8 +334,7 @@ class _aLocIndexer(_Indexer):
 
         def keys_from_bool_dios_like(key):
             if not _is_bool_dios_like(key):
-                raise ValueError("Must pass dios-like key with boolean "
-                                 "values only if passed as single indexer")
+                raise ValueError("Must pass dios-like key with boolean values only.")
             colkey = self.obj.columns.intersection(key.columns)
             rowkey = []
             for c in colkey:
@@ -352,56 +347,30 @@ class _aLocIndexer(_Indexer):
             rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey]
             return rowkey, colkey, lowdim
 
-        def keys_from_nested_list(key):
-            key = key.values if isinstance(key, pd.Series) else key
-            if len(key) != len(self.obj.columns):
-                raise ValueError("nested arrays outer length must have same langth than columns.")
-            colkey = self.obj.columns
-            rowkey = []
-            for i, k in colkey:
-                rowkey.append(self._data.at[k].index.intersection(key[i]))
-            return rowkey, colkey, lowdim
+        rowkey, colkey = self._unpack_key(key)
 
-        # handle multi-dim keys
-        if isinstance(key, tuple):
-            rowkey, colkey = self._unpack_key(key)
-            # .aloc[any, ...]
-            # The ellipsis is meant for dios only to indicate
-            # that alignment of dios is requested, instead of
-            # using (and checking) it as boolean dios
-            if colkey is Ellipsis:
-                if _is_dios_like(rowkey):
-                    return keys_from_dios_like(rowkey)
-                if _is_nested_list_like(rowkey):
-                    return keys_from_nested_list(rowkey)
-                colkey = slice(None)
-
-        # (I)   .aloc[dios] -> defaults to (III)
-        # (II)  .aloc(booldios=False)[dios] or
-        # (III) .aloc(booldios=True)[dios]
-        elif _is_dios_like(key):
-            if self._usebool:
-                return keys_from_bool_dios_like(key)
-            else:
-                return keys_from_dios_like(key)
+        if _is_dios_like(colkey) or _is_nested_list_like(colkey):
+            raise ValueError("Could not index with multi-dimensional column key.")
 
-        elif _is_nested_list_like(key):
-            return keys_from_nested_list(key)
+        # giving the ellipsis as column key, is an alias
+        # for giving `usebool=False`. see self.__call__()
+        if colkey is Ellipsis:
+            self._usebool = False
+            colkey = slice(None)
 
-        # a single row indexer (not multi-dim)
-        # or just some random crap was given
-        else:
-            rowkey, colkey = self._unpack_key(key)
+        # .aloc[dios]
+        if _is_dios_like(rowkey):
 
-        # all multi-dim indexer was already handled
-        if _is_dios_like(rowkey) or _is_nested_list_like(rowkey):
-            raise ValueError("Could not index with multi-dimensional row key"
-                             ", if column key is given and is not Ellipsis.")
-        elif _is_dios_like(colkey) or _is_nested_list_like(colkey):
-            raise ValueError("Could not index with multi-dimensional column key.")
+            if not _is_null_slice(colkey):
+                raise ValueError(f"Could not index with a dios-like indexer as rowkey,"
+                                 f"and a column key of that type {type(colkey)}")
+            if self._usebool:
+                return keys_from_bool_dios_like(rowkey)
+            else:
+                return keys_from_dios_like(rowkey)
 
         # handle gracefully: scalar
-        if _is_hashable(colkey):
+        elif _is_hashable(colkey):
             colkey = [colkey] if colkey in self.obj.columns else []
             lowdim = True
 
@@ -422,12 +391,26 @@ class _aLocIndexer(_Indexer):
         if len(colkey) == 0:  # (!) `if not colkey:` fails for pd.Index
             return [], [], lowdim
 
-        # and now... No.1... the larch...
-        # and now... filter row key
+        rowkey = self._get_rowkey(rowkey, colkey)
+
+        return rowkey, colkey, lowdim
+
+    def _get_rowkey(self, rowkey, colkey, depth=0):
+
+        if _is_nested_list_like(rowkey) and depth == 0:
+            rowkey = rowkey.values if isinstance(rowkey, pd.Series) else rowkey
+            if len(rowkey) != len(colkey):
+                raise ValueError("Nested arrays indexer must have same (outer) "
+                                 "length than the number of selected columns.")
+            indexer = []
+            for i, c in enumerate(colkey):
+                # recurse to get the row indexer from inner element
+                indexer += self._get_rowkey(rowkey[i], [c], depth=depth+1)
+            rowkey = indexer
 
         # row-alignable: pd.Series(), align rows to every series in colkey (columns)
-        if isinstance(rowkey, pd.Series):
-            if _is_bool_indexer(rowkey) and self._usebool:
+        elif isinstance(rowkey, pd.Series):
+            if self._usebool and _is_bool_indexer(rowkey):
                 rowkey = [self._data.at[c].index.intersection(rowkey[rowkey].index) for c in colkey]
             else:
                 rowkey = [self._data.at[c].index.intersection(rowkey.index) for c in colkey]
@@ -447,7 +430,7 @@ class _aLocIndexer(_Indexer):
         else:
             rowkey = [rowkey] * len(colkey)
 
-        return rowkey, colkey, lowdim
+        return rowkey
 
 
 # #############################################################################
@@ -497,3 +480,4 @@ class _iAtIndexer(_Indexer):
         if _is_dios_like(value) or _is_nested_list_like(value):
             raise TypeError(".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
         self._data.iat[key[1]].iat[key[0]] = value
+
-- 
GitLab