iloc done

60dff935 · Bert Palm · f810894d · 60dff935 · 60dff935 · 60dff935
Commit 60dff935 authored 5 years ago by Bert Palm 🎇
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -88,9 +88,7 @@ class DictOfSeries:
        # If the itypes differ between different series, slicing will almost always fail
        # (eg. a datetime-like slice cannot work on a numeric index and vice versa).
        self._itype = None
-
-        with reraise("param itype: "):
-            self.itype = get_itype(itype)
+        self.itype = get_itype(itype)

        if downcast_policy not in CAST_POLICIES:
            raise ValueError(f"downcast_policy must be one of {CAST_POLICIES}")
@@ -105,20 +103,19 @@ class DictOfSeries:

    def __init_insert_data__(self, data):
        if isinstance(data, DictOfSeries):
-            for k in data:
-                self[k] = data[k]
+            g = ((k, data[k]) for k in data)
        else:
-            if is_iterator(data):
-                data = list(data)
-
+            data = list(data) if is_iterator(data) else data
            if is_dict_like(data):
-                for k in data:
-                    self[k] = data[k]
+                g = ((k, data[k]) for k in data)
            elif is_nested_list_like(data):
-                for i, d in enumerate(data):
-                    self[str(i)] = d
+                g = ((str(i), d) for i, d in enumerate(data))
            elif is_list_like(data):
-                self['0'] = data
+                g = [('0', data)]
+            else:
+                raise ValueError(f"init with data of type {type(data)} is not possible.")
+        for k, val in g:
+            self[k] = val
        return

    @property
@@ -171,15 +168,13 @@ class DictOfSeries:
                                         f"You are hereby warned!")

    def __cast_all(self, itype):
-        for k in self.columns:
-            with reraise(f"Column {k}: "):
+        k = '?'
+        try:
+            for k in self.columns:
                casted = cast_to_itype(self._data[k], itype, policy=self._policy)
-            self._data[k] = casted
-
-    def _check_keys(self, keys):
-        missing = [k for k in keys if k not in self.columns]
-        if missing:
-            raise KeyError(f"{missing} not in index")
+                self._data[k] = casted
+        except Exception as e:
+            raise type(e)(f"Column {k}: " + str(e)) from e

    def __getitem__(self, key):
        """
@@ -196,6 +191,7 @@ class DictOfSeries:
                new = self._get_item(key)
            else:
                raise KeyError(key)
+        # all other cases
        else:
            keys, ixs = self._get_keys_and_indexer(key)
            new = self.copy_empty()
@@ -205,7 +201,7 @@ class DictOfSeries:
        return new

    def _get_item(self, key):
-        # return always a pd.Series
+        """Extract a pd.Series from self"""
        return self._data[key]

    def __setitem__(self, key, value):
@@ -229,20 +225,31 @@ class DictOfSeries:
                return
            else:
                k, i = [key], [slice(None)]
+        # all other cases
        else:
            k, i = self._get_keys_and_indexer(key)

-        gen = self._setitem(k, i, value)
+        gen = self._yield_tuple_to_set(k, i, value)
        for tup in gen:
            self._set_item(*tup)

-    def _setitem(self, keys, ixs, val):
-        """Return a generator that yield (key, indexer, value) for all keys"""
-        if is_iterator(val):
-            val = list(val)
+    def _set_item(self, key, ix, val):
+        "Set a value (scalar or list or series)"
+        ser = self._data[key]
+        if is_series_like(val):
+            left = ser[ix]
+            index = left.index.intersection(val.index)
+            if not index.empty:
+                left.loc[index] = val.loc[index].copy()
+        else:
+            ser[ix] = val

+    def _yield_tuple_to_set(self, keys, ixs, val):
+        """Return a generator that yield (key, indexer, value) for all keys"""
+        val = list(val) if is_iterator(val) else val
        diosl, dfl, nlistl = is_dios_like(val), is_dataframe_like(val), is_nested_list_like(val)
-        if diosl or dfl or nlistl and len(val) != len(keys):
+
+        if (diosl or dfl or nlistl) and len(val) != len(keys):
            raise ValueError(f"could not broadcast input array with length {len(val)}"
                             f" into dios of length {len(keys)}")

@@ -258,19 +265,8 @@ class DictOfSeries:
            else:
                yield key, ix, val

-    def _set_item(self, key, ix, val):
-        "Set a value (scalar or list or series)"
-        ser = self._data[key]
-        if is_series_like(val):
-            left = ser[ix]
-            index = left.index.intersection(val.index)
-            if not index.empty:
-                left.loc[index] = val.loc[index].copy()
-        else:
-            ser[ix] = val
-
    def _insert(self, key, val):
-        """"""
+        """Insert a fresh new value into self"""
        if isinstance(val, DictOfSeries):
            val = val.squeeze()
        elif is_list_like(val) and not is_nested_list_like(val):
@@ -283,19 +279,19 @@ class DictOfSeries:
        self._data[key] = val.copy(deep=True)

    def _get_keys_and_indexer(self, key):
-        """ Determine keys and indexer
+        """ Determine keys and indexer by type of key. This does not deal
+            with single label-access, only higher dimension objects are handled..
+
        Notes:
-            Which keys we get, depends on the policy in dios_options
+            Which keys we get, may depend on the policy in dios_options
        """
-
        err_bool = "only boolen values are allowed"
        keys = None
        indexers = None
        blowup = False

        # prevent consuming of a generator
-        if is_iterator(key):
-            key = list(key)
+        key = list(key) if is_iterator(key) else key

        if isinstance(key, slice):
            keys = self.columns
@@ -445,92 +441,74 @@ class DictOfSeries:
        new._itype = self.itype
        return new

-    def anyop(self, op, ):
+    def _op1(self, op):
        new = self.copy_empty()
-        with reraise(f"'{OP_MAP[op]} dios' failed: "):
+        try:
            for k in self:
                new[k] = op(self._data[k])
+        except Exception as e:
+            raise type(e)(f"'{OP_MAP[op]} dios' failed: " + str(e)) from e
        return new

-    def foo(self, x):
-        raise ValueError('dnsjkncsncj')
-
-    __neg__ = partialmethod(anyop, op.neg)
-    __abs__ = partialmethod(anyop, op.abs)
-    # __invert__ = partialmethod(anyop, op.inv)
-    __invert__ = partialmethod(anyop, foo)
-
-    def _op2(self, other, op, inplace=False):
-        new = self.copy_empty()
-
-        # with index
-        if isinstance(other, (self.__class__, pd.DataFrame)):
-            if set(other) != set(self):
-                raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
-            for k in self:
-                left, right = self._data[k], other[k]
-                l, r = left.align(right, join='inner')
-                val = op(l, r)
-                new._data[k] = val
-
-        elif isinstance(other, pd.Series):
-            for k in self:
-                left, right = self._data[k], other
-                l, r = left.align(right, join='inner')
-                val = op(l, r)
-                new._data[k] = val
-
-        # no index
-        elif is_dict_like(other):
-            if set(other) != set(self):
-                raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
-            for k in self:
-                new._data[k] = op(self._data[k], other[k])
-
-        elif is_nested_list_like(other):
-            if len(other) != len(self):
-                raise ValueError(f"keys does not match, left: {len(self)} keys, right: {len(other)} keys")
-            for i, k in enumerate(self):
-                new._data[k] = op(self._data[k], other[i])
+    def _op2(self, op, other, inplace=False):
+        def raiseif(cond, s='lenght'):
+            if cond:
+                raise ValueError(f"{s} does not match, {s} left: {len(self)}, {s} right: {len(other)} keys")
+
+        def gen():
+            if isinstance(other, (self.__class__, pd.DataFrame)):
+                raiseif(set(other) != set(self), '#keys')
+                for k in self.columns:
+                    left, right = self._data[k], other[k]
+                    yield k, op(*(left.align(right, join='inner')))
+            elif isinstance(other, pd.Series):
+                for k in self.columns:
+                    left, right = self._data[k], other
+                    yield k, op(*(left.align(right, join='inner')))
+            elif is_dict_like(other):
+                raiseif(set(other) != set(self), '#keys')
+                for k in self.columns:
+                    yield k, op(self._data[k], other[k])
+            elif is_nested_list_like(other):
+                raiseif(len(other) != len(self), 'length')
+                for i, k in enumerate(self.columns):
+                    yield k, op(self._data[k], other[i])
+            elif is_scalar(other) or is_list_like(other):
+                for k in self.columns:
+                    yield k, op(self._data[k], other)
+            else:
+                raise NotImplementedError

-        elif is_scalar(other) or is_list_like(other):
-            for k in self:
-                new._data[k] = op(self._data[k], other)
-        else:
-            return NotImplemented
+        new = self if inplace else self.copy_empty()
+        try:
+            for k, val in gen():
+                new[k] = val
+        except Exception as e:
+            raise type(e)(f"'dios {OP_MAP[op]} other' failed: " + str(e)) from e
        return new

-    def _op2_wrap(op, inplace=False):
-        def anyop(self, other):
-            with reraise(f"'dios {OP_MAP[op]} other' failed: "):
-                return self._op2(other, op, inplace=inplace)
-
-        anyop.__name__ = '__' + op.__name__ + '__'
-        return anyop
-
-    # comparision
-    __eq__ = _op2_wrap(op.eq)
-    __ne__ = _op2_wrap(op.ne)
-    __le__ = _op2_wrap(op.le)
-    __ge__ = _op2_wrap(op.ge)
-    __lt__ = _op2_wrap(op.lt)
-    __gt__ = _op2_wrap(op.gt)
-    __add__ = _op2_wrap(op.add)
-    __sub__ = _op2_wrap(op.sub)
-    __mul__ = _op2_wrap(op.mul)
-    __mod__ = _op2_wrap(op.mod)
-    __truediv__ = _op2_wrap(op.truediv)
-    __floordiv__ = _op2_wrap(op.floordiv)
-    __pow__ = _op2_wrap(op.pow)
-    __and__ = _op2_wrap(op.and_)
-    __or__ = _op2_wrap(op.or_)
-    __xor__ = _op2_wrap(op.xor)
+    __neg__ = partialmethod(_op1, op.neg)
+    __abs__ = partialmethod(_op1, op.abs)
+    __invert__ = partialmethod(_op1, op.inv)
+    __eq__ = partialmethod(_op2, op.eq)
+    __ne__ = partialmethod(_op2, op.ne)
+    __le__ = partialmethod(_op2, op.le)
+    __ge__ = partialmethod(_op2, op.ge)
+    __lt__ = partialmethod(_op2, op.lt)
+    __gt__ = partialmethod(_op2, op.gt)
+    __add__ = partialmethod(_op2, op.add)
+    __sub__ = partialmethod(_op2, op.sub)
+    __mul__ = partialmethod(_op2, op.mul)
+    __mod__ = partialmethod(_op2, op.mod)
+    __truediv__ = partialmethod(_op2, op.truediv)
+    __floordiv__ = partialmethod(_op2, op.floordiv)
+    __pow__ = partialmethod(_op2, op.pow)
+    __and__ = partialmethod(_op2, op.and_)
+    __or__ = partialmethod(_op2, op.or_)
+    __xor__ = partialmethod(_op2, op.xor)

    def squeeze(self):
-        if len(self) == 1:
-            return self[self.columns[0]]
-        else:
-            return self
+        return self[self.columns[0]] if len(self) == 1 else self

    def memory_usage(self, index=True, deep=False):
        mem = 0
@@ -585,54 +563,21 @@ class DictOfSeries:
            return None
        return news.squeeze()

-    # def __find_least_common_itype(self):
-    #     def all_itypes_le(itypes, super_itype):
-    #         for itype in itypes:
-    #             if itype_le(itype, super_itype):
-    #                 continue
-    #             return False
-    #         return True
-    #
-    #     itypes = []
-    #     for k in self.columns:
-    #         itypes.append(get_itype(self._data[k].index))
-    #
-    #     if not itypes:
-    #         return None
-    #
-    #     found = None
-    #
-    #     # check supertypes
-    #     super_itypes = [MixedItype, NumericItype]
-    #     for super_itype in super_itypes:
-    #         if all_itypes_le(itypes, super_itype):
-    #             found = super_itype
-    #             continue
-    #         break
-    #     assert found, "At least this should be MixedItype"
-    #
-    #     # check base types
-    #     single_itypes = [DatetimeItype, IntegerItype, FloatItype]
-    #     for single_itype in single_itypes:
-    #         if all_itypes_le(itypes, single_itype):
-    #             found = single_itype
-    #             break
-    #     return found
-    #
-

 class _Indexer:
    def __init__(self, _dios):
        self._dios = _dios
-        # short handles
        self._data = _dios._data
+        self._yield_tuple_to_set = _dios._yield_tuple_to_set
+


 class _LocIndexer(_Indexer):

    def __init__(self, _dios):
        super().__init__(_dios)
-        self._check_keys = _dios._check_keys
+        self._set_item = _dios._set_item
+        self._get_keys_and_indexer = _dios._get_keys_and_indexer

    def __getitem__(self, key):
        rkey, cols = self._unpack_key(key)
@@ -642,15 +587,10 @@ class _LocIndexer(_Indexer):
        return new

    def __setitem__(self, key, value):
-        rkey, cols = self._unpack_key(key)
-        # todo: dios -> dios_to_dios, -> series
-        #       scalar, -> automatically
-        #       series, -> automatically
-        #       list_like -> check length
-        for c in cols:
-            self._data[c].loc[rkey] = value
-        # todo loc.__setitem__(self, key, value):
-        return NotImplemented
+        ixs, keys = self._unpack_key(key)
+        gen = self._yield_tuple_to_set(keys, ixs, value)
+        for tup in gen:
+            self._set_item(*tup)

    def _unpack_key(self, key):
        # if we have a tuple, we have a rows- and a column-indexer
@@ -661,23 +601,26 @@ class _LocIndexer(_Indexer):
                raise KeyError("To many indexers")

            # prepare ckey
-            if is_iterator(ckey):
-                ckey = list(ckey)
+            ckey = list(ckey) if is_iterator(ckey) else ckey

            # determine columns
+            if is_dataframe_like(ckey) or is_nested_list_like(ckey) or is_dios_like(ckey):
+                raise ValueError("Cannot index with multidimensional key")
            if isinstance(ckey, str):
-                self._check_keys([ckey])
                cols = [ckey]
            elif isinstance(ckey, slice):
                cols = self._col_slice_to_col_list(ckey)
-            elif is_list_like(ckey):
-                self._check_keys(ckey)
-                cols = ckey
            else:
-                raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
+                try:
+                    # list and bool list like
+                    cols, _ = self._get_keys_and_indexer(key)
+                except Exception:
+                    raise
        else:
            cols = self._data.keys()
            rkey = key
+        # blowup
+        rkey = [rkey] * len(cols)
        return rkey, cols

    def _col_slice_to_col_list(self, cslice):
@@ -689,9 +632,9 @@ class _LocIndexer(_Indexer):
            start = keys.index(cslice.start) if cslice.start is not None else None
            stop = keys.index(cslice.stop) if cslice.stop is not None else None
        except ValueError:
-            raise KeyError("The slice start label or the slice stop label is not present in the columns.")
-        if not is_integer(cslice.step) and cslice.step > 0:
-            raise TypeError("The step parameter of the slice must be positive integer.")
+            raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
+        if not is_integer(cslice.step) or cslice.step <= 0:
+            return []
        return keys[slice(start, stop + 1, cslice.step)]


@@ -705,9 +648,24 @@ class _iLocIndexer(_Indexer):
        return new

    def __setitem__(self, key, value):
-        # todo iloc.__setitem__(self, key, value):
+        ixs, keys = self._unpack_key(key)
+        gen = self._yield_tuple_to_set(keys, ixs, value)
+        for tup in gen:
+            self._set_item_positional(*tup)
        raise NotImplemented

+    def _set_item_positional(self, key, ix, val):
+        ser = self._data[key]
+        if is_series_like(val):
+            index = ser.iloc[ix].index
+            index = index.intersection(val.index)
+            if not index.empty:
+                ser.loc[index] = val.loc[index].copy()
+        else:
+            ser.iloc[ix] = val
+
+
+
    def _unpack_key(self, key):
        # if we have a tuple, we have a rows- and a column-indexer
        # if not, we only have a row-indexer and work on all columns
@@ -717,8 +675,7 @@ class _iLocIndexer(_Indexer):
                raise KeyError("To many indexers")

            # prepare ckey
-            if is_iterator(ckey):
-                ckey = list(ckey)
+            ckey = list(ckey) if is_iterator(ckey) else ckey

            # determine columns
            if is_integer(ckey):
@@ -726,11 +683,18 @@ class _iLocIndexer(_Indexer):
                cols = self._integers_to_col_list([ckey])
            elif isinstance(ckey, slice):
                cols = self._col_slice_to_col_list(ckey)
-            elif is_list_like(ckey):
+            elif is_list_like(ckey) and not is_nested_list_like(ckey):
+                arr = np.array(ckey)
+                if is_bool_array(arr):
+                    raise NotImplementedError
                self._check_keys(ckey)
                cols = self._integers_to_col_list(ckey)
+            elif is_series_like(ckey):
+                raise NotImplementedError
+            elif is_bool_indexer(ckey):
+                raise NotImplementedError
            else:
-                raise KeyError(f"Type {type(ckey)} is not supported for indexing on columns.")
+                raise KeyError(f"{ckey} of type {type(ckey)}")
        else:
            cols = self._data.keys()
            rkey = key
@@ -739,6 +703,8 @@ class _iLocIndexer(_Indexer):
    def _check_keys(self, keys):
        bound = len(self._data)
        for k in keys:
+            if not is_integer(k):
+                raise ValueError(f"{type(k)} is not integer")
            if k not in range(-bound, bound):
                raise KeyError("positional indexer(s) are out-of-bounds in columns")

@@ -752,5 +718,5 @@ class _iLocIndexer(_Indexer):
    def _col_slice_to_col_list(self, sl):
        for s in [sl.start, sl.stop, sl.step]:
            if not is_integer(s):
-                raise TypeError(f"positional indexing with slice must be integers, passed was {s} of {type(s)}")
+                raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
        return list(self._data.keys())[sl]
--- a/dios/lib.py
+++ b/dios/lib.py
@@ -5,12 +5,12 @@ import contextlib
 import operator as op


-@contextlib.contextmanager
-def reraise(prefix="", postfix=""):
-    try:
-        yield
-    except Exception as e:
-        raise type(e)(prefix + str(e) + postfix) from e
+# @contextlib.contextmanager
+# def reraise(prefix="", postfix=""):
+#     try:
+#         yield
+#     except Exception as e:
+#         raise type(e)(prefix + str(e) + postfix) from e




--- a/test/run_dios.py
+++ b/test/run_dios.py
@@ -5,12 +5,13 @@ import numpy as np
 if __name__ == '__main__':
    # dios_options[Options.mixed_itype_policy] = 'error'

-    df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
-    df[[True, False]]
+    # df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1d', start='2000-01-01'))
+    # df[[True, False]]

    dios = DictOfSeries(data=[234.54, 5, 5, 4, np.nan, 5, 4, 5])

-    dios = abs(-dios)
+    dios = abs(~dios)
+
    print(all(dios == dios))

    dtser = pd.Series([2,4,4123,122,4], index=pd.date_range(freq='1d', periods=5, start='2000-01-01'))