iloc done

c1c8dea7 · Bert Palm · 95f0ba0b · c1c8dea7
Commit c1c8dea7 authored 5 years ago by Bert Palm 🎇
--- a/dios/locator.py
+++ b/dios/locator.py
@@ -8,6 +8,9 @@ class _Indexer:
        self._data = _dios._data


+# #############################################################################
+
+
 class _LocIndexer(_Indexer):

    def __init__(self, *args, **kwargs):
@@ -23,6 +26,8 @@ class _LocIndexer(_Indexer):

        if isinstance(rowkey, tuple):
            raise KeyError(f"{key}. tuples are not allowed.")
+        if is_dios_like(rowkey) or is_dios_like(colkey):
+            raise ValueError("Cannot index with multidimensional key")

        data = self._dios[colkey]

@@ -34,11 +39,15 @@ class _LocIndexer(_Indexer):
        elif isinstance(data, pd.Series):
            new = data.loc[rowkey]

+        # .loc[any, non-scalar]
        elif isinstance(data, self._dios.__class__):
-            # .loc[scalar, any]
+
+            # .loc[scalar, non-scalar]
            if is_hashable(rowkey):
+                # we do not override data directly to may get
+                # a better fitting series dtype
                new = pd.Series(index=type(data.columns)([]))
-                for k in data.columns:
+                for k in data.index:
                    s = data._data.at[k]
                    new.at[k] = s.loc[rowkey]

@@ -63,7 +72,7 @@ class _LocIndexer(_Indexer):
            if isinstance(rowkey, tuple):
                raise KeyError(f"{key}. tuples are not allowed.")

-            # .loc[-,new-scalar] = val
+            # .loc[dont-care, new-scalar] = val
            # if a unknown colkey was given, we insert it and ignore rowkey
            if is_hashable(colkey) and colkey not in self._dios.columns:
                self._dios._insert(colkey, value)
@@ -77,23 +86,27 @@ class _LocIndexer(_Indexer):
        if is_dios_like(value) or is_nested_list_like(value):
            raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")

-        # .loc[scalar, any] or .loc[any, scalar]
-        if isinstance(data, pd.Series):
+        # .loc[scalar, any]
+        if is_hashable(rowkey):
+
+            # .loc[scalar, scalar]
+            if is_hashable(colkey):
+                s = self._data.at[colkey]
+                s.at[rowkey] = value
+                self._data.at[colkey] = s
+
            # .loc[scalar, non-scalar] - column-labeled series
-            if is_hashable(rowkey):
+            else:
                data.loc[:] = value
                for k in data.index:
-                    s = data._data.at[k]
+                    s = self._data.at[k]
                    s.at[rowkey] = data.at[k]
                    self._data.at[k] = s

-            # .loc[non-scalar, scalar] - (normal) row-labeled series
-            elif is_hashable(colkey):
-                data.loc[rowkey] = value
-                self._data.at[colkey] = data
-
-            else:
-                raise AssertionError(f"getitem returned data of type {type(data)}")
+        # .loc[non-scalar, scalar] - single row-labeled series
+        elif is_hashable(colkey):
+            data.loc[rowkey] = value
+            self._data.at[colkey] = data

        # .loc[non-scalar, non-scalar]
        elif isinstance(data, self._dios.__class__):
@@ -102,11 +115,11 @@ class _LocIndexer(_Indexer):
                s.loc[rowkey] = value
                self._data.at[k] = s

-        # .loc[scalar, scalar]
        else:
-            s = self._data.at[colkey]
-            s.at[rowkey] = value
-            self._data.at[colkey] = s
+            raise AssertionError(f"getitem returned data of type {type(data)}")
+
+
+# #############################################################################


 class _iLocIndexer(_Indexer):
@@ -114,42 +127,104 @@ class _iLocIndexer(_Indexer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

-    def _set_item(self, key, ix, right, ixalign=False):
-        # we do use loc instead of iloc as we get real keys.
-        # this works, because keys keep sorted if they come
-        # from an series-index (doesn't work with df-index)
-        ser = self._data.loc[key]
-        if ixalign:
-            ix = ser.index.intersection(ix.index)
-        if isinstance(right, pd.Series):
-            left = ser[ix]
-            right, ix = align_index_by_policy(left, right)
-        ser.iloc[ix] = right
-        self._data.loc[key] = ser
-
-    def _get_item(self, key, ix, ixalign=False):
-        ser = self._data.loc[key]
-        if ixalign:
-            ix = ser.index.intersection(ix.index)
-        return ser.iloc[ix]
-
-    def _unpack_key(self, key):
-        lowdim = False
+    def __getitem__(self, key):
        if isinstance(key, tuple):
-            key, ckey, *fail = key
-            if fail:
+            if len(key) > 2:
                raise KeyError("To many indexers")
-            if is_dios_like(ckey):
-                raise ValueError("Cannot index with multi-dimensional key")
-            if is_bool_series(ckey):
-                keys = ckey.where(ckey).dropna().index.to_list()
+            rowkey, colkey = key
+        else:
+            rowkey, colkey = key, slice(None)
+
+        if isinstance(rowkey, tuple):
+            raise KeyError(f"{key}. tuples are not allowed.")
+        if is_dios_like(rowkey) or is_dios_like(colkey):
+            raise ValueError("Cannot index with multidimensional key")
+
+        if is_bool_indexer(colkey):
+            data = self._dios[colkey]._data
+        else:
+            data = self._data.iloc[colkey]
+
+        # in any case data is a series now,
+        # either a column-indexed series of series,
+        # or a simple single row-indexed series (of values)
+        if isinstance(data, pd.Series):
+
+            # .iloc[any, int]   - got a single row indexed series
+            if is_integer(colkey):
+                new = data.iloc[rowkey]
+
+            # .loc[int, any]
+            if is_integer(colkey):
+                # we do not override data directly to may get
+                # a better fitting series dtype
+                new = pd.Series(index=type(data.index)([]))
+                for k in data.index:
+                    s = data.at[k]
+                    new.at[k] = s.iloc[rowkey]
+
+            # .iloc[:, any] - simple low-cost optimization
+            elif isinstance(rowkey, slice) and rowkey == slice(None):
+                new = self._dios.copy_empty()
+                new._data = data.copy()
+
+            # .loc[non-int, non-int]
+            else:
+                new = self._dios.copy_empty()
+                for k in data.index:
+                    new._data.at[k] = data.at[k].iloc[rowkey]
+        else:
+            raise AssertionError(f"getitem returned data of type {type(data)}")
+
+        return new
+
+    def __setitem__(self, key, value):
+        if isinstance(key, tuple):
+            rowkey, colkey = key
+        else:
+            rowkey, colkey = key, slice(None)
+
+        # get .iloc[any,any] - we use key(!) here
+        data = self.__getitem__(key)
+
+        if is_dios_like(value) or is_nested_list_like(value):
+            raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
+
+        # .iloc[scalar, any]
+        if is_integer(rowkey):
+
+            # .iloc[scalar, scalar]
+            if is_integer(colkey):
+                s = self._data.iat[colkey]
+                s.iat[rowkey] = value
+                self._data.iat[colkey] = s
+
+            # .iloc[scalar, non-scalar] - column-labeled series
            else:
-                if is_integer(ckey):
-                    ckey, lowdim = [ckey], True
-                keys = self._data.iloc[ckey].index.to_list()
+                data.iloc[:] = value
+                for k in data.index:
+                    s = self._data.at[k]
+                    s.iat[rowkey] = data.at[k]
+                    self._data.at[k] = s
+
+        # .iloc[non-scalar, scalar] - single row-labeled series
+        elif is_integer(colkey):
+            data.iloc[rowkey] = value
+            self._data.iat[colkey] = data
+
+        # .iloc[non-scalar, non-scalar]
+        elif isinstance(data, self._dios.__class__):
+            for k in data.columns:
+                s = data._data.at[k]
+                s.iloc[rowkey] = value
+                self._data.at[k] = s
+
        else:
-            keys = self._columns.to_list()
-        return keys, key, lowdim
+            raise AssertionError(f"getitem returned data of type {type(data)}")
+
+
+
+# #############################################################################


 class _aLocIndexer(_Indexer):
@@ -166,6 +241,9 @@ class _aLocIndexer(_Indexer):
        return item


+# #############################################################################
+
+
 class _AtIndexer(_Indexer):

    def __init__(self, *args, **kwargs):
@@ -190,6 +268,9 @@ class _AtIndexer(_Indexer):
        self._data.at[key[1]] = s


+# #############################################################################
+
+
 class _iAtIndexer(_Indexer):

    def __init__(self, *args, **kwargs):
@@ -214,6 +295,9 @@ class _iAtIndexer(_Indexer):
        self._data.iat[key[1]] = s


+# #############################################################################
+
+
 def _unpack_value(keys, ix, val):
    """Return a generator that yield (column key, corresponding value, value-align(bool) )
    for all columns.