From 9c5413d396b9812de516d5b24b32fc23a6b5ed0c Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 12 Feb 2020 14:49:10 +0100
Subject: [PATCH] added .iloc

---
 dios/__init__.py  |   3 --
 dios/dios.py      | 119 +++++++++++++++++++++++++++++++++++++++-------
 tests/run_dios.py |   4 +-
 3 files changed, 105 insertions(+), 21 deletions(-)

diff --git a/dios/__init__.py b/dios/__init__.py
index 8875553..2866f42 100644
--- a/dios/__init__.py
+++ b/dios/__init__.py
@@ -1,6 +1,3 @@
 
 from dios.dios import *
 
-from dios.profiling.generate_testsets import get_testset, var_prefix
-
-from dios.profiling.performance import gen_random_timestamps, find_index_range
diff --git a/dios/dios.py b/dios/dios.py
index c41bc33..85d773c 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -8,14 +8,22 @@ import datetime as dt
 from collections import OrderedDict
 from pandas.core.dtypes.common import (
     is_list_like,
-    is_iterator,
     is_scalar,
-    is_number,
     is_integer,
 )
+from pandas.core.dtypes.common import is_iterator as _is_iterator
 from pandas.core.indexing import need_slice
 
 
+def is_iterator(obj):
+    """ This is only a dummy wrapper, to warn that the docu of this isnt't right.
+    Unlike the example says,
+    >>is_iterator([1, 2, 3])
+    returns False, not True for lists
+    """
+    return _is_iterator(obj)
+
+
 class DictOfSeries:
     """
     DictionaryOfSeries is a collection of pd.Series's which aim to be as close as possible similar to
@@ -198,6 +206,9 @@ class DictOfSeries:
         if isinstance(value, DictOfSeries):
             self._setitem_dios(keys, slicer, value)
         else:
+            if is_iterator(value):
+                value = list(value)
+
             for k in keys:
                 self._setitem(k, value, slicer)
 
@@ -228,18 +239,19 @@ class DictOfSeries:
             return
 
         sl = sl or slice(None)
+        item = self._data[key]
 
         # label, scalar: dios['a'] = 3.9 or
         # slice, scalar: dios[0:3] = 4.0
         if is_scalar(val):
-            self._data[key][sl] = val
+            item[sl] = val
 
         # label,  list: dios['a'] = [0.0, 0.3, 0.0]
         # sclice, list: dios[0:3] = [0.0, 0.3, 0.0]
         elif is_list_like(val):
-            # ensure same size
-            if len(self._data[key]) == len(val):
-                self._data[key][sl] = val
+            # ensure same size # fixme: is this neccessary, wouldnt pd.Series raise a Valuerror ?
+            if len(item[sl]) == len(val):
+                item[sl] = val
             else:
                 raise ValueError(f'Length of values does not match length of index for the key {key}')
         else:
@@ -255,6 +267,10 @@ class DictOfSeries:
     def loc(self):
         return _LocIndexer(self)
 
+    @property
+    def iloc(self):
+        return _iLocIndexer(self)
+
     def __str__(self):
         return self.__repr__()
 
@@ -496,17 +512,38 @@ class DictOfSeries:
         return news.squeeze()
 
 
-class _LocIndexer:
-
+class _Indexer:
     def __init__(self, _dios):
         self._dios = _dios
         # short handles
         self._data = _dios._data
+
+
+class _LocIndexer(_Indexer):
+
+    def __init__(self, _dios):
+        super().__init__(_dios)
         self._check_keys = _dios._check_keys
 
     def __getitem__(self, key):
-        # if we have a tuple, we have rows and columns
-        # if not we have only rows and work on all columns
+        rkey, cols = self._unpack_key(key)
+        new = DictOfSeries()
+        for c in cols:
+            new[c] = self._data[c].loc[rkey]
+        return new
+
+    def __setitem__(self, key, value):
+        rkey, cols = self._unpack_key(key)
+        # todo: dios -> dios_to_dios, -> series
+        #       scalar, -> automatically
+        #       series, -> automatically
+        #       list_like -> check length
+        for c in cols:
+            self._data[c].loc[rkey] = value
+
+    def _unpack_key(self, key):
+        # if we have a tuple, we have a rows- and a column-indexer
+        # if not, we only have a row-indexer and work on all columns
         if isinstance(key, tuple):
             rkey, ckey, *fail = key
             if fail:
@@ -530,12 +567,7 @@ class _LocIndexer:
         else:
             cols = self._data.keys()
             rkey = key
-
-        # pass the row-key directly to pd.Series.loc[row-key]
-        new = DictOfSeries()
-        for c in cols:
-            new[c] = self._data[c].loc[rkey]
-        return new
+        return rkey, cols
 
     def _col_slice_to_col_list(self, cslice):
         """ see here:
@@ -552,3 +584,58 @@ class _LocIndexer:
         return keys[slice(start, stop + 1, cslice.step)]
 
 
+class _iLocIndexer(_Indexer):
+
+    def __getitem__(self, key):
+        rkey, cols = self._unpack_key(key)
+        new = DictOfSeries()
+        for c in cols:
+            new[c] = self._data[c].iloc[rkey]
+        return new
+
+    def _unpack_key(self, key):
+        # if we have a tuple, we have a rows- and a column-indexer
+        # if not, we only have a row-indexer and work on all columns
+        if isinstance(key, tuple):
+            rkey, ckey, *fail = key
+            if fail:
+                raise KeyError("To many indexers")
+
+            # prepare ckey
+            if is_iterator(ckey):
+                ckey = list(ckey)
+
+            # determine columns
+            if is_integer(ckey):
+                self._check_keys([ckey])
+                cols = self._integers_to_col_list([ckey])
+            elif isinstance(ckey, slice):
+                cols = self._col_slice_to_col_list(ckey)
+            elif is_list_like(ckey):
+                self._check_keys(ckey)
+                cols = self._integers_to_col_list(ckey)
+            else:
+                raise KeyError(f"Type {type(ckey)} is not supported for indexing on columns.")
+        else:
+            cols = self._data.keys()
+            rkey = key
+        return rkey, cols
+
+    def _check_keys(self, keys):
+        bound = len(self._data)
+        for k in keys:
+            if k not in range(-bound, bound):
+                raise KeyError("positional indexer(s) are out-of-bounds in columns")
+
+    def _integers_to_col_list(self, ints):
+        klist = list(self._data.keys())
+        ks = set()
+        for i in ints:
+            ks.add(klist[i])
+        return ks
+
+    def _col_slice_to_col_list(self, sl):
+        for s in [sl.start, sl.stop, sl.step]:
+            if not is_integer(s):
+                raise TypeError(f"positional indexing with slice must be integers, passed was {s} of {type(s)}")
+        return list(self._data.keys())[sl]
diff --git a/tests/run_dios.py b/tests/run_dios.py
index 9cac320..83fa9be 100644
--- a/tests/run_dios.py
+++ b/tests/run_dios.py
@@ -9,8 +9,8 @@ if __name__ == '__main__':
     dios_options[Options.allow_mixed_indextypes] = True
 
     a = dios.loc[:]
-    # df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
-    # a = df.loc["2000-01-02":]
+    df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
+    a = df.iloc[:,0]
     print(a)
     exit(4)
 
-- 
GitLab