From d9b288ab7f2c35c9a30a9924c12973bf2538f3a5 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 18 Mar 2020 18:25:14 +0100
Subject: [PATCH] texttexttex

---
 Readme.md            | 42 ++++++++++++++++-------------
 dios/dios.py         | 64 ++++++++++++++++++++++++++++----------------
 test/test_methods.py |  6 ++---
 3 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/Readme.md b/Readme.md
index 0f99304..7bf6344 100644
--- a/Readme.md
+++ b/Readme.md
@@ -49,6 +49,17 @@ are scalars the stored element itself is returned. In all other cases a dios is
 For more pandas-like indexing magic and the differences between the indexers, 
 see the pandas documentation.
 
+**multi-dimensional indexer**
+
+`dios[boolean dios-like]` (as single key) -  dios accept boolean multi-indexer (boolean pd.Dataframe 
+or boolean Dios). Columns and rows from the multi-indexer align with the dios. 
+This means that only matching columns are selected/written, the same apply for rows. 
+Rows or whole columns that are missing in the indexer, but are present in the Dios are dropped, 
+but empty columns are preserved, with the effect that the resulting Dios always have the same 
+column dimension than the initial Dios. 
+This is a similar behavior to pd.DataFrame handling of multi-indexer, despite that pd.DataFrame 
+fill np.nans at missing locations and columns.
+
 **setting values**
 
 Setting values with `di[]` and `.loc[]`, `.iloc[]` and `.at[]`, `.iat[]` work like in pandas. 
@@ -57,40 +68,35 @@ values can be:
 - *scalars*: these are broadcast to the selected positions
 - *nested lists*: the outer list must match selected columns length, the inner lists lengths must match selected rows.
 - *normal lists* : columns key must be a scalar(!), the list is passed down, and set to the underlying series.
-- *pd.Series*: columns key must be a scalar(!), the series is passed down, and set to the underlying series, 
-where it is aligned. 
+- *pd.Series*: columns key must be a scalar(!), the series is passed down, and set to the underlying series
+in the dios, where both are aligned. 
 
 Examples:
 
 - `dios.loc[2:5, 'a'] = [1,2,3]` is the same as `a=dios['a']; a.loc[2:5]=[1,2,3]`
 - `dios.loc[2:5, :] = 99` : set 99 on rows 2 to 5 on all columns
 
-**multi-dimensional indexing**
-
-`dios[BoolDiosLike]` -  dios accept boolean multi-indexer (boolean pd.Dataframe 
-or boolean Dios). Columns and rows from the multi-indexer align with the dios. 
-This means that only matching columns are selected/written, the same apply for rows. 
-Rows or whole columns that are missing in the indexer, but are present in the Dios are dropped, 
-but empty columns are preserved, with the effect that the resulting Dios always have the same 
-column dimension than the initial Dios. 
-This is a similar behavior to pd.DataFrame handling of multi-indexer, despite that pd.DataFrame 
-fill np.nans at missing locations and columns.
-
 **special indexer `.aloc`**
 
 Additional to the pandas like indexers we have a `.aloc[..]` (align locator) indexing method. 
 Unlike `.iloc` and `.loc` indexers and/or values fully align if possible and 1D-array-likes
 can be broadcast to multiple columns at once. Also this method handle missing indexer-items gratefully. 
+It is used like `.loc`, so a single row-indexer (`.aloc[row-indexer]`) or a tuple of row-indexer and 
+column-indexer (`.aloc[row-indexer, column-indexer]`) can be given.
 
 *Alignable indexer* are:
 - `.aloc[pd.Series]` : only common indices are used in each column
-- `.aloc[bool-dios]` (as single key) : only matching columns and matching indices are used
+- `.aloc[boolean dios-like]` (as single key) : work same like `di[boolean dios-like]` (see above)
+
+```
+only matching columns and matching indices are used
 if the value is `True` (Values that are `False` are dropped and handled as they would be missing)
-In contrast to `di[BoolDiosLike]` (see above), missing rows are **not** filled with nan's, instead 
-they are dropped on selection operations and ignored on setting operations. Nevertheless empty columns
-are still preserved.
-- `.aloc[dios, ...]` (dios-like, **Ellipsis**) : "`...`" is not a placeholder, it refer to the ellipsis object. 
+In contrast to *normal* indexing, with `di[boolean dios-like]` (see above), missing rows are **not** 
+filled with nan's, instead they are dropped on selection operations and ignored on setting operations.
+Nevertheless empty columns are still preserved.
+- `.aloc[dios-like, ...]` (dios-like, **Ellipsis**) : "`...`" is not a placeholder, it refer to the ellipsis object. 
 Full align -> use only matching columns and indices. Alternatively, `.aloc(booldios=False)[dios]` can be used. 
+```
 
 *Indexer* that are handled grateful:
  - `.aloc[list]` (lists or any iterable obj) : only present labels/positions are used
diff --git a/dios/dios.py b/dios/dios.py
index f999db6..3cdf86a 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -263,10 +263,16 @@ class DictOfSeries:
         key = list(key) if _is_iterator(key) else key
         if isinstance(key, tuple):
             raise KeyError("tuples are not allowed")
-        elif _is_hashable(key):
+
+        if _is_hashable(key):
+            # NOTE: we use copy here to prevent index
+            # changes, that could result in an invalid
+            # itype. A shallow copy is not sufficient.
+
             # work on columns, return series
-            return self._data.at[key]
-        elif _is_dios_like(key):
+            return self._data.at[key].copy()
+
+        if _is_dios_like(key):
             # work on rows and columns
             new = self._getitem_bool_dios(key)
         elif isinstance(key, slice):
@@ -279,6 +285,7 @@ class DictOfSeries:
             # work on columns
             data = self._data.loc[key]
             new = DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
+
         return new
 
     def _slice(self, key):
@@ -292,27 +299,18 @@ class DictOfSeries:
         return new
 
     def _getitem_bool_dios(self, key):
-        """ Select items by a boolean dios-like drop un-selected indices.
-
-        todo: Desired behaivior: fill nan's at un-selected indices. but with this
-         we cannot set values properly (in the current implementation), because
-         we use __getitem__ in __setitem__ and cannot decide where the nans come from
-         i) from data or ii) from prior indexing :/"""
+        """ Select items by a boolean dios-like drop un-selected indices. """
 
         new = self.copy_empty(columns=True)
 
-        for k in self.columns:
+        for k in self.columns.intersection(key.columns):
             dat = self._data.at[k]
-
-            if k in key.columns:
-                val = key[k]
-                if not _is_bool_indexer(val):
-                    raise ValueError("Must pass DictOfSeries with boolean values only")
-                # align rows
-                idx = val[val].index.intersection(dat.index)
-                new._data.at[k] = dat[idx]
-            else:
-                new._insert(k, pd.Series(dtype='O'))
+            val = key[k]
+            if not _is_bool_indexer(val):
+                raise ValueError("Must pass DictOfSeries with boolean values only")
+            # align rows
+            idx = val[val].index.intersection(dat.index)
+            new._data.at[k] = dat[idx]
 
         return new
 
@@ -341,10 +339,15 @@ class DictOfSeries:
             assert isinstance(data, self.__class__), f"getitem returned data of type {type(data)}"
 
             # special cases
+            # (I)   Dios (ok)
+            # (II)  list-like (fail)
+            # (III) nested lists-like (ok)
+            # NOTE: pd.Series considered list-like and also fail
+            # if they dont hold list-likes obj
             if _is_dios_like(value):
                 self._setitem_dios(data, value)
             elif _is_list_like(value):
-                self._setitem_listlike(data, value)
+                self._setitem_nested_listlike(data, value)
 
             # default case
             else:
@@ -353,7 +356,9 @@ class DictOfSeries:
                     s[:] = value
                     self._data.at[k][s.index] = s
 
-    def _setitem_listlike(self, data, value):
+    def _setitem_nested_listlike(self, data, value):
+        # nested series, eg. `pd.Series([[1,2], [4,4]], dtype='O')`
+        value = value.values if isinstance(value, pd.Series) else value
         if not _is_nested_list_like(value):
             raise ValueError(f"1D array-like value could not be broadcast to "
                              f"indexing result of shape (.., {len(data.columns)})")
@@ -371,10 +376,20 @@ class DictOfSeries:
     def _setitem_dios(self, data, value):
         """ Write values from a dios-like to self.
 
-        No justification or alignment on columns, but on indices.
+        No justification or alignment of columns, but of indices.
         If value has missing indices, nan's are inserted at that
         locations, just like `series.loc[:]=val` or `df[:]=val` do.
 
+        Eg.
+        di1[::2] = di[::3]   ->   di[::2]
+
+            x |        x |            x |
+        ===== |     ==== |       ====== |
+        0   x |     0  z |       0    z |
+        2   x |  =  3  z |   ->  2  NaN |
+        4   x |     6  z |       4  NaN |
+        6   x |                  6    z |
+
         Parameter
         ----------
         data : dios
@@ -391,6 +406,9 @@ class DictOfSeries:
 
         for i, k in enumerate(data):
             dat = data._data.at[k]
+            # .loc cannot handle empty series
+            if dat.empty:
+                continue
             val = value[value.columns[i]]
             dat.loc[:] = val
             self._data.at[k].loc[dat.index] = dat
diff --git a/test/test_methods.py b/test/test_methods.py
index 0cb25a6..840909c 100644
--- a/test/test_methods.py
+++ b/test/test_methods.py
@@ -25,9 +25,9 @@ def test_copy_copy_empty(dios_aligned):
     assert not di.columns.equals(empty_no_cols.columns)
 
     for i in di:
-        assert di[i].index is shallow[i].index
-        assert di[i].index is not deep[i].index
-        di[i][0] = 999999
+        assert di._data[i].index is shallow._data[i].index
+        assert di._data[i].index is not deep._data[i].index
+        di._data[i][0] = 999999
         assert di[i][0] == shallow[i][0]
         assert di[i][0] != deep[i][0]
 
-- 
GitLab