hardcore reduce

38a9b89f · Bert Palm · 1bc35760 · 38a9b89f · 38a9b89f · 38a9b89f
Commit 38a9b89f authored 5 years ago by Bert Palm 🎇
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -190,7 +190,7 @@ class DictOfSeries:
        else:
            # work on columns
            new = self.copy_empty()
-            new._data = self._data.loc[key]
+            new._data = self._data[key]
        return new

    def _slice(self, key):
@@ -200,10 +200,7 @@ class DictOfSeries:

        new = self.copy_empty()
        for k in self.columns:
-            # we cannot use loc here, because s.loc[:4]
-            # is inclusive, whereas s[:4] isn't :(
            new._data.at[k] = self._data.at[k][key]
-
        return new

    def _getitem_bool_dios(self, key):
@@ -225,8 +222,7 @@ class DictOfSeries:
    def _getitem_bool_listlike(self, key):
        new = self.copy_empty()
        for k in self.columns:
-            ser = self._data.at[k]
-            new._data.at[k] = ser.loc[key]
+            new._data.at[k] = self._data.at[k].loc[key]
        return new

    def __setitem__(self, key, value):
@@ -235,25 +231,20 @@ class DictOfSeries:
        if isinstance(key, tuple):
            raise KeyError(f"{key}. tuples are not allowed")

-        elif is_hashable(key) and key not in self.columns:
-            self._insert(key, value)
-            return
-
-        data = self.__getitem__(key)
+        elif is_hashable(key):
+            if isinstance(value, pd.Series):
+                self._insert(key, value)
+            else:
+                self._data.at[key].loc[:] = value

-        if isinstance(data, pd.Series):
-            # key must be a scalar
-            assert is_hashable(key)
-            data.loc[:] = value
-            self._data.at[key] = data
+        else:
+            data = self.__getitem__(key)
+            assert isinstance(data, self.__class__), f"getitem returned data of type {type(data)}"

-        elif isinstance(data, self.__class__):
            for k in data.columns:
                s = data._data.at[k]
-                s.loc[:] = value
-                self._data.at[k] = s
-        else:
-            raise AssertionError(f"getitem returned data of type {type(data)}")
+                s[:] = value
+                self._data.at[k].loc[s.index] = s

    @property
    def loc(self):
@@ -337,7 +328,6 @@ class DictOfSeries:
        return item in self.columns.copy()

    def __delitem__(self, key):
-        # is 'indexing bug' save see hacking.md
        del self._data[key]

    def __copy__(self):
@@ -477,14 +467,17 @@ class DictOfSeries:


 def pprint(dios, max_rows=10, max_cols=2, delim='  '):
-    sstr = []
-    cols = list(dios.columns)
-
    if dios.empty:
        return "Empty DictionaryOfSeries"

+    sstr = []
+    cols = list(dios.columns)
+
    for c in dios.columns:
-        sstr.append(dios[c].to_string(max_rows=max_rows).split('\n'))
+        if dios[c].empty:
+            sstr.append(['no data'])
+        else:
+            sstr.append(dios[c].to_string(max_rows=max_rows).split('\n'))

    maxlen = max([len(x) for x in sstr])


--- a/dios/locator.py
+++ b/dios/locator.py
@@ -7,16 +7,8 @@ class _Indexer:
        self._dios = _dios
        self._data = _dios._data

+    def _unpack_key(self, key):

-# #############################################################################
-
-
-class _LocIndexer(_Indexer):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def __getitem__(self, key):
        if isinstance(key, tuple):
            if len(key) > 2:
                raise KeyError("To many indexers")
@@ -29,96 +21,54 @@ class _LocIndexer(_Indexer):
        if is_dios_like(rowkey) or is_dios_like(colkey):
            raise ValueError("Cannot index with multidimensional key")

+        return rowkey, colkey
+
+
+# #############################################################################
+
+
+class _LocIndexer(_Indexer):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def __getitem__(self, key):
+
+        rowkey, colkey = self._unpack_key(key)
        data = self._data.loc[colkey]

-        # in any case data is a series now,
-        # either a column-indexed series of series,
-        # or a simple single row-indexed series (of values)
-        if isinstance(data, pd.Series):
-
-            # .loc[any, scalar]   - got a single row indexed series
-            if is_hashable(colkey):
-                new = data.loc[rowkey]
-
-            # .loc[scalar, any]
-            elif is_hashable(rowkey):
-                # we do not override data directly to may get
-                # a better fitting series dtype
-                new = pd.Series(index=type(data.index)([]))
-                for k in data.index:
-                    s = data.at[k]
-                    new.at[k] = s.loc[rowkey]
-
-            # .iloc[:, any] - simple low-cost optimization
-            elif isinstance(rowkey, slice) and rowkey == slice(None):
-                new = self._dios.copy_empty()
-                new._data = data.copy()
+        # .loc[any, scalar]
+        if is_hashable(colkey):
+            new = data.loc[rowkey]

-            # .loc[non-scalar, non-scalar]
+        # .loc[any, non-scalar]
+        else:
+            for k in data.index:
+                data.at[k] = data.at[k].loc[rowkey]
+            if is_hashable(rowkey):
+                new = data
            else:
                new = self._dios.copy_empty()
-                for k in data.index:
-                    new._data.at[k] = data.at[k].loc[rowkey]
-        else:
-            raise AssertionError(f"getitem returned data of type {type(data)}")
+                new._data = data

        return new

    def __setitem__(self, key, value):
-        if isinstance(key, tuple):
-            if len(key) > 2:
-                raise KeyError("To many indexers")
-
-            rowkey, colkey = key

-            if isinstance(rowkey, tuple):
-                raise KeyError(f"{key}. tuples are not allowed.")
+        rowkey, colkey = self._unpack_key(key)

+        # .loc[any, scalar]
+        if is_hashable(colkey):
            # .loc[dont-care, new-scalar] = val
-            # if a unknown colkey was given, we insert it and ignore rowkey
-            if is_hashable(colkey) and colkey not in self._dios.columns:
+            if colkey not in self._dios.columns:
                self._dios._insert(colkey, value)
-                return
-        else:
-            rowkey, colkey = key, slice(None)
-
-        # get .loc[any,any] - we use key(!) here
-        data = self.__getitem__(key)
-
-        if is_dios_like(value) or is_nested_list_like(value):
-            raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
-
-        # .loc[scalar, any]
-        if is_hashable(rowkey):
-
-            # .loc[scalar, scalar]
-            if is_hashable(colkey):
-                s = self._data.at[colkey]
-                s.at[rowkey] = value
-                self._data.at[colkey] = s
-
-            # .loc[scalar, non-scalar] - column-labeled series
            else:
-                data.loc[:] = value
-                for k in data.index:
-                    s = self._data.at[k]
-                    s.at[rowkey] = data.at[k]
-                    self._data.at[k] = s
-
-        # .loc[non-scalar, scalar] - single row-labeled series
-        elif is_hashable(colkey):
-            data.loc[rowkey] = value
-            self._data.at[colkey] = data
-
-        # .loc[non-scalar, non-scalar]
-        elif isinstance(data, self._dios.__class__):
-            for k in data.columns:
-                s = data._data.at[k]
-                s.loc[rowkey] = value
-                self._data.at[k] = s
+                self._data.at[colkey].loc[rowkey] = value

+        # .loc[any, non-scalar]
        else:
-            raise AssertionError(f"getitem returned data of type {type(data)}")
+            for s in self._data.loc[colkey]:
+                s.loc[rowkey] = value


 # #############################################################################
@@ -130,97 +80,36 @@ class _iLocIndexer(_Indexer):
        super().__init__(*args, **kwargs)

    def __getitem__(self, key):
-        if isinstance(key, tuple):
-            if len(key) > 2:
-                raise KeyError("To many indexers")
-            rowkey, colkey = key
-        else:
-            rowkey, colkey = key, slice(None)
-
-        if isinstance(rowkey, tuple):
-            raise KeyError(f"{key}. tuples are not allowed.")
-        if is_dios_like(rowkey) or is_dios_like(colkey):
-            raise ValueError("Cannot index with multidimensional key")
-
+        rowkey, colkey = self._unpack_key(key)
        data = self._data.iloc[colkey]

-        # in any case data is a series now,
-        # either a column-indexed series of series,
-        # or a simple single row-indexed series (of values)
-        if isinstance(data, pd.Series):
-
-            # .iloc[any, int]   - got a single row indexed series
-            if is_integer(colkey):
-                new = data.iloc[rowkey]
-
-            # .loc[int, any]
-            elif is_integer(rowkey):
-                # we do not override data directly to may get
-                # a better fitting series dtype
-                new = pd.Series(index=type(data.index)([]))
-                for k in data.index:
-                    s = data.at[k]
-                    new.at[k] = s.iloc[rowkey]
-
-            # .iloc[:, any] - simple low-cost optimization
-            elif isinstance(rowkey, slice) and rowkey == slice(None):
-                new = self._dios.copy_empty()
-                new._data = data.copy()
+        # .iloc[any, scalar]
+        if is_integer(colkey):
+            new = data.iloc[rowkey]

-            # .loc[non-int, non-int]
+        # .iloc[any, non-scalar]
+        else:
+            for k in data.index:
+                data.at[k] = data.at[k].iloc[rowkey]
+            if is_integer(rowkey):
+                new = data
            else:
                new = self._dios.copy_empty()
-                for k in data.index:
-                    new._data.at[k] = data.at[k].iloc[rowkey]
-        else:
-            raise AssertionError(f"getitem returned data of type {type(data)}")
+                new._data = data

        return new

    def __setitem__(self, key, value):
-        if isinstance(key, tuple):
-            rowkey, colkey = key
-        else:
-            rowkey, colkey = key, slice(None)
-
-        # get .iloc[any,any] - we use key(!) here
-        data = self.__getitem__(key)
+        rowkey, colkey = self._unpack_key(key)

-        if is_dios_like(value) or is_nested_list_like(value):
-            raise TypeError(".loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
-
-        # .iloc[scalar, any]
-        if is_integer(rowkey):
-
-            # .iloc[scalar, scalar]
-            if is_integer(colkey):
-                s = self._data.iat[colkey]
-                s.iat[rowkey] = value
-                self._data.iat[colkey] = s
-
-            # .iloc[scalar, non-scalar] - column-labeled series
-            else:
-                data.iloc[:] = value
-                for k in data.index:
-                    s = self._data.at[k]
-                    s.iat[rowkey] = data.at[k]
-                    self._data.at[k] = s
-
-        # .iloc[non-scalar, scalar] - single row-labeled series
-        elif is_integer(colkey):
-            data.iloc[rowkey] = value
-            self._data.iat[colkey] = data
-
-        # .iloc[non-scalar, non-scalar]
-        elif isinstance(data, self._dios.__class__):
-            for k in data.columns:
-                s = data._data.at[k]
-                s.iloc[rowkey] = value
-                self._data.at[k] = s
+        # .iloc[any, scalar]
+        if is_integer(colkey):
+            self._data.iat[colkey].iloc[rowkey] = value

+        # .iloc[any, non-scalar]
        else:
-            raise AssertionError(f"getitem returned data of type {type(data)}")
-
+            for s in self._data.iloc[colkey]:
+                s.iloc[rowkey] = value


 # #############################################################################
@@ -262,9 +151,7 @@ class _AtIndexer(_Indexer):
        self._check_key(key)
        if is_dios_like(value) or is_nested_list_like(value):
            raise TypeError(".at[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
-        s = self._data.at[key[1]]
-        s.at[key[0]] = value
-        self._data.at[key[1]] = s
+        self._data.at[key[1]].at[key[0]] = value


 # #############################################################################
@@ -289,9 +176,7 @@ class _iAtIndexer(_Indexer):
        self._check_key(key)
        if is_dios_like(value) or is_nested_list_like(value):
            raise TypeError(".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.")
-        s = self._data.iat[key[1]]
-        s.iat[key[0]] = value
-        self._data.iat[key[1]] = s
+        self._data.iat[key[1]].iat[key[0]] = value


 # #############################################################################

--- a/test/__init__.py
+++ b/test/__init__.py
+from .test_setup import *
--- a/test/test_dflike__setget__.py
+++ b/test/test_dflike__setget__.py
@@ -3,37 +3,37 @@ from test.test_setup import *
 import pytest


-def _test(val, exp):
+def _test(res, exp):
    if isinstance(exp, pd.DataFrame):
-        assert isinstance(val, DictOfSeries)
+        assert isinstance(res, DictOfSeries)

-        if val.empty:
+        if res.empty:
            for c in exp:
                assert exp[c].dropna().empty
            return

-        assert (val.columns == exp.columns).all()
+        assert (res.columns == exp.columns).all()
        for c in exp:
-            l = val[c]
+            l = res[c]
            r = exp[c].dropna()
            assert isinstance(l, pd.Series)
            assert isinstance(r, pd.Series)
            assert (l == r).all()
    else:
-        assert type(exp) == type(val)
+        assert type(exp) == type(res)

        if isinstance(exp, pd.Series):
-            assert (val == exp.dropna()).all()
+            assert (res == exp.dropna()).all()
        else:
-            assert val == exp
+            assert res == exp


 @pytest.mark.parametrize('idxer', INDEXERS)
 def test_dflike__getitem__(df_, dios_, idxer):
    print(idxer)
    exp = df_[idxer]
-    val = dios_[idxer]
-    _test(val, exp)
+    res = dios_[idxer]
+    _test(res, exp)


 @pytest.mark.parametrize('locL', LOC_L)
@@ -42,8 +42,8 @@ def test_dflike__get_loc__(df_, dios_, locL, locR):
    print(locL)
    print(locR)
    exp = df_.loc[locL, locR]
-    val = dios_.loc[locL, locR]
-    _test(val, exp)
+    res = dios_.loc[locL, locR]
+    _test(res, exp)


 @pytest.mark.parametrize('ilocL', ILOC_L)
@@ -52,29 +52,45 @@ def test_dflike__get_iloc__(df_, dios_, ilocL, ilocR):
    print(ilocL)
    print(ilocR)
    exp = df_.iloc[ilocL, ilocR]
-    val = dios_.iloc[ilocL, ilocR]
-    # _test(val, exp)
+    res = dios_.iloc[ilocL, ilocR]
+    _test(res, exp)

-    if isinstance(exp, pd.DataFrame):
-        assert isinstance(val, DictOfSeries)

-        if val.empty:
-            for c in exp:
-                assert exp[c].dropna().empty
-            return
+VALS = [99, ]

-        assert (val.columns == exp.columns).all()
-        for c in exp:
-            l = val[c]
-            r = exp[c].dropna()
-            assert isinstance(l, pd.Series)
-            assert isinstance(r, pd.Series)
-            assert (l == r).all()
-    else:
-        assert type(exp) == type(val)
+@pytest.mark.parametrize('idxer', INDEXERS)
+@pytest.mark.parametrize('val', VALS)
+def test_dflike__setitem__(df_, dios_, idxer, val):
+    print(idxer)
+    exp = df_
+    res = dios_
+    exp[idxer] = val
+    res[idxer] = val
+    _test(res, exp)

-        if isinstance(exp, pd.Series):
-            assert (val == exp.dropna()).all()
-        else:
-            assert val == exp
+
+@pytest.mark.parametrize('locL', LOC_L)
+@pytest.mark.parametrize('locR', LOC_R)
+@pytest.mark.parametrize('val', VALS)
+def test_dflike__set_loc__(df_, dios_, locL, locR, val):
+    print(locL)
+    print(locR)
+    exp = df_
+    res = dios_
+    exp.loc[locL, locR] = val
+    res.loc[locL, locR] = val
+    _test(res, exp)
+
+
+@pytest.mark.parametrize('ilocL', ILOC_L)
+@pytest.mark.parametrize('ilocR', ILOC_R)
+@pytest.mark.parametrize('val', VALS)
+def test_dflike__set_iloc__(df_, dios_, ilocL, ilocR, val):
+    print(ilocL)
+    print(ilocR)
+    exp = df_
+    res = dios_
+    exp.iloc[ilocL, ilocR] = val
+    res.iloc[ilocL, ilocR] = val
+    _test(res, exp)