loc done

26cc1ebf · Bert Palm · d57bcfe9 · 26cc1ebf · 26cc1ebf · 26cc1ebf
Commit 26cc1ebf authored 5 years ago by Bert Palm 🎇
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -283,8 +283,8 @@ class DictOfSeries:
        # bool indexer
        # ------------
        # bool indexer always work on rows, so they need to have
-        # an index wo which we can align to. This is necessary
-        # because we can hold series of different indices.
+        # an index, to which we can align to. This is necessary
+        # because we could hold series of different lenght/indexes.
        if is_bool_indexer(key):
            if not isinstance(key, pd.Series):
                raise ValueError("Must pass Series with boolean values only")
@@ -321,10 +321,12 @@ class DictOfSeries:

    @property
    def loc(self):
+        from dios.locator import _LocIndexer
        return _LocIndexer(self)

    @property
    def iloc(self):
+        from dios.locator import _iLocIndexer
        return _iLocIndexer(self)

    def __str__(self):
@@ -552,202 +554,3 @@ class DictOfSeries:
        if len(news) == 0:
            return None
        return news.squeeze()
-
-
-class _Indexer:
-    def __init__(self, _dios):
-        self._dios = _dios
-        self._data = _dios._data
-        self._unpack_value = _dios._unpack_value
-
-
-class _LocIndexer(_Indexer):
-
-    def __init__(self, _dios):
-        super().__init__(_dios)
-        self._set_item = _dios._set_item
-
-    def __getitem__(self, key):
-        rkey, cols, lowdim = self._unpack_key(key)
-        if is_scalar(rkey[0]):
-            return self._series(rkey, cols, lowdim)
-        elif lowdim:
-            return self._scalar(rkey[0], cols[0])
-        else:
-            new = self._dios.copy_empty()
-            for i, _ in enumerate(cols):
-                c, r = cols[i], rkey[i]
-                new[c] = self._data[c].loc[r]
-            return new
-
-    def _series(self, rkey, cols, lowdim):
-        if lowdim:
-            return self._scalar(rkey[0], cols[0])
-        new = pd.Series()
-        for c in cols:
-            try:
-                new[c] = self._data[c].loc[rkey]
-            except KeyError:
-                new[c] = np.nan
-
-    def _scalar(self, r, c):
-        return self._data[c].loc[r]
-
-    def __setitem__(self, key, value):
-        ixs, keys, _ = self._unpack_key(key)
-        gen = self._unpack_value(keys, ixs, value)
-        for tup in gen:
-            self._set_item(*tup)
-
-    def _unpack_key(self, key):
-        # if we have a tuple, we have a rows- and a column-indexer
-        # if not, we only have a row-indexer and work on all columns
-        lowdim = False
-        if isinstance(key, tuple):
-            rkey, ckey, *fail = key
-            if fail:
-                raise KeyError("To many indexers")
-
-            # prepare ckey
-            ckey = list(ckey) if is_iterator(ckey) else ckey
-
-            # determine columns
-            if is_nested_list_like(ckey) or is_dios_like(ckey):
-                raise ValueError("Cannot index with multidimensional key")
-            if isinstance(ckey, str):
-                cols = [ckey]
-                lowdim = True
-            elif isinstance(ckey, slice):
-                cols = self._col_slice_to_col_list(ckey)
-            else:
-                try:
-                    # list, boolean-list or series
-                    cols, *_ = self._dios._unpack_key(ckey)
-                except Exception as e:
-                    raise e
-        else:
-            cols = list(self._data.index)
-            rkey = key
-        # blowup
-        rkey = [rkey] * len(cols)
-        return rkey, cols, lowdim
-
-    def _col_slice_to_col_list(self, cslice):
-        """ see here:
-        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
-        """
-        keys = list(self._data.index)
-        try:
-            start = keys.index(cslice.start) if cslice.start is not None else None
-            stop = keys.index(cslice.stop) if cslice.stop is not None else None
-        except ValueError:
-            raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
-        if not is_integer(cslice.step) or cslice.step <= 0:
-            return []
-        return keys[slice(start, stop + 1, cslice.step)]
-
-
-class _iLocIndexer(_Indexer):
-
-    def __getitem__(self, key):
-        rkey, cols, lowdim = self._unpack_key(key)
-        if is_scalar(rkey[0]):
-            return self._series(rkey, cols, lowdim)
-        elif lowdim:
-            return self._scalar(rkey[0], cols[0])
-        else:
-            new = self._dios.copy_empty()
-            for i, _ in enumerate(cols):
-                c, r = cols[i], rkey[i]
-                new[c] = self._data[c].iloc[r]
-            return new
-
-    def _series(self, rkey, cols, lowdim):
-        if lowdim:
-            return self._scalar(rkey[0], cols[0])
-        new = pd.Series()
-        for c in cols:
-            try:
-                new[c] = self._data[c].iloc[rkey]
-            except KeyError:
-                new[c] = np.nan
-
-    def _scalar(self, r, c):
-        return self._data[c].iloc[r]
-
-    def __setitem__(self, key, value):
-        ixs, keys, _ = self._unpack_key(key)
-        gen = self._unpack_value(keys, ixs, value)
-        for tup in gen:
-            self._set_item_positional(*tup)
-        raise NotImplemented
-
-    def _set_item_positional(self, key, ix, val):
-        ser = self._data[key]
-        if isinstance(val, pd.Series):
-            index = ser.iloc[ix].index
-            index = index.intersection(val.index)
-            if not index.empty:
-                ser.loc[index] = val.loc[index].copy()
-        else:
-            ser.iloc[ix] = val
-
-    def _unpack_key(self, key):
-        # if we have a tuple, we have a rows- and a column-indexer
-        # if not, we only have a row-indexer and work on all columns
-        lowdim = False
-        if isinstance(key, tuple):
-            rkey, ckey, *fail = key
-            if fail:
-                raise KeyError("To many indexers")
-
-            # prepare ckey
-            ckey = list(ckey) if is_iterator(ckey) else ckey
-
-            # determine columns
-            if is_integer(ckey):
-                self._check_keys([ckey])
-                cols = self._integers_to_col_list([ckey])
-                lowdim = True
-            elif isinstance(ckey, slice):
-                cols = self._col_slice_to_col_list(ckey)
-            elif is_list_like(ckey) and not is_nested_list_like(ckey):
-                arr = np.array(ckey)
-                if is_bool_array(arr):
-                    raise NotImplementedError
-                self._check_keys(ckey)
-                cols = self._integers_to_col_list(ckey)
-            elif isinstance(ckey, pd.Series):
-                raise NotImplementedError
-            elif is_bool_indexer(ckey):
-                raise NotImplementedError
-            else:
-                raise KeyError(f"{ckey} of type {type(ckey)}")
-        else:
-            cols = list(self._data.index)
-            rkey = key
-
-        # blowup
-        rkey = [rkey] * len(cols)
-        return rkey, cols, lowdim
-
-    def _check_keys(self, keys):
-        bound = len(self._data)
-        for k in keys:
-            if not is_integer(k):
-                raise ValueError(f"{type(k)} is not integer")
-            if k not in range(-bound, bound):
-                raise KeyError("positional indexer(s) are out-of-bounds in columns")
-
-    def _integers_to_col_list(self, ints):
-        klist = list(self._data.index)
-        ks = set()
-        for i in ints:
-            ks.add(klist[i])
-        return list(ks)
-
-    def _col_slice_to_col_list(self, sl):
-        for s in [sl.start, sl.stop, sl.step]:
-            if not is_integer(s):
-                raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
-        return list(self._data.index)[sl]
--- a/dios/locator.py
+++ b/dios/locator.py
+from dios.dios import *
+
+class _Indexer:
+    def __init__(self, _dios):
+        self._dios = _dios
+        self.columns = _dios.columns
+        self._data = _dios._data
+        # self._unpack_value = _dios._unpack_value
+
+
+class _LocIndexer(_Indexer):
+
+    def __init__(self, _dios):
+        super().__init__(_dios)
+        self._set_item = _dios._set_item
+
+
+    def _series(self, rkey, cols, lowdim):
+        if lowdim:
+            return self._scalar(rkey[0], cols[0])
+        new = pd.Series()
+        for c in cols:
+            try:
+                new[c] = self._data[c].loc[rkey]
+            except KeyError:
+                new[c] = np.nan
+
+    def _scalar(self, r, c):
+        return self._data[c].loc[r]
+
+    def __setitem__(self, key, value):
+        data, rkey = self._getdata(key)
+        if data.empty:
+            return
+        if isinstance(data, pd.Series):
+            pass
+
+    def __getitem__(self, key):
+        data, rkey, lowdim = self._getdata(key)
+        colseries = is_hashable(rkey)
+
+        if data.empty:
+            if colseries:
+                data.name = rkey
+                return data  # a empty Series
+            return self._dios.copy_empty()
+
+        if colseries:
+            new = pd.Series()
+        else:
+            new = self._dios.copy_empty()
+
+        if lowdim:
+            return data.loc[rkey]
+
+        for s in data.index:
+            new[s] = data[s].loc[rkey]
+        return new
+
+    def _getdata(self, key):
+        lowdim = False
+        if isinstance(key, tuple):
+            key, ckey, *fail = key
+            if fail:
+                raise KeyError("To many indexers")
+            if is_dios_like(ckey):
+                raise ValueError("Cannot index with multidimensional key")
+            if is_hashable(ckey):
+                lowdim = True
+            try:
+                data = self._data.loc[ckey]
+            except Exception as e:
+                raise e
+        else:
+            data = self._data
+        return data, key, lowdim
+
+    def _col_slice_to_col_list(self, cslice):
+        """ see here:
+        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
+        """
+        keys = list(self._data.index)
+        try:
+            start = keys.index(cslice.start) if cslice.start is not None else None
+            stop = keys.index(cslice.stop) if cslice.stop is not None else None
+        except ValueError:
+            raise KeyError("The slice start label, or the slice stop label, is not present in columns.")
+        if not is_integer(cslice.step) or cslice.step <= 0:
+            return []
+        return keys[slice(start, stop + 1, cslice.step)]
+
+
+class _iLocIndexer(_Indexer):
+
+    def __getitem__(self, key):
+        rkey, cols, lowdim = self._unpack_key(key)
+        if is_scalar(rkey[0]):
+            return self._series(rkey, cols, lowdim)
+        elif lowdim:
+            return self._scalar(rkey[0], cols[0])
+        else:
+            new = self._dios.copy_empty()
+            for i, _ in enumerate(cols):
+                c, r = cols[i], rkey[i]
+                new[c] = self._data[c].iloc[r]
+            return new
+
+    def _series(self, rkey, cols, lowdim):
+        if lowdim:
+            return self._scalar(rkey[0], cols[0])
+        new = pd.Series()
+        for c in cols:
+            try:
+                new[c] = self._data[c].iloc[rkey]
+            except KeyError:
+                new[c] = np.nan
+
+    def _scalar(self, r, c):
+        return self._data[c].iloc[r]
+
+    def __setitem__(self, key, value):
+        ixs, keys, _ = self._unpack_key(key)
+        gen = self._unpack_value(keys, ixs, value)
+        for tup in gen:
+            self._set_item_positional(*tup)
+        raise NotImplemented
+
+    def _set_item_positional(self, key, ix, val):
+        ser = self._data[key]
+        if isinstance(val, pd.Series):
+            index = ser.iloc[ix].index
+            index = index.intersection(val.index)
+            if not index.empty:
+                ser.loc[index] = val.loc[index].copy()
+        else:
+            ser.iloc[ix] = val
+
+    def _unpack_key(self, key):
+        # if we have a tuple, we have a rows- and a column-indexer
+        # if not, we only have a row-indexer and work on all columns
+        lowdim = False
+        if isinstance(key, tuple):
+            rkey, ckey, *fail = key
+            if fail:
+                raise KeyError("To many indexers")
+
+            # prepare ckey
+            ckey = list(ckey) if is_iterator(ckey) else ckey
+
+            # determine columns
+            if is_integer(ckey):
+                self._check_keys([ckey])
+                cols = self._integers_to_col_list([ckey])
+                lowdim = True
+            elif isinstance(ckey, slice):
+                cols = self._col_slice_to_col_list(ckey)
+            elif is_list_like(ckey) and not is_nested_list_like(ckey):
+                arr = np.array(ckey)
+                if is_bool_array(arr):
+                    raise NotImplementedError
+                self._check_keys(ckey)
+                cols = self._integers_to_col_list(ckey)
+            elif isinstance(ckey, pd.Series):
+                raise NotImplementedError
+            elif is_bool_indexer(ckey):
+                raise NotImplementedError
+            else:
+                raise KeyError(f"{ckey} of type {type(ckey)}")
+        else:
+            cols = list(self._data.index)
+            rkey = key
+
+        # blowup
+        rkey = [rkey] * len(cols)
+        return rkey, cols, lowdim
+
+    def _check_keys(self, keys):
+        bound = len(self._data)
+        for k in keys:
+            if not is_integer(k):
+                raise ValueError(f"{type(k)} is not integer")
+            if k not in range(-bound, bound):
+                raise KeyError("positional indexer(s) are out-of-bounds in columns")
+
+    def _integers_to_col_list(self, ints):
+        klist = list(self._data.index)
+        ks = set()
+        for i in ints:
+            ks.add(klist[i])
+        return list(ks)
+
+    def _col_slice_to_col_list(self, sl):
+        for s in [sl.start, sl.stop, sl.step]:
+            if not is_integer(s):
+                raise TypeError(f"positional indexing with slice must be integers, passed type was {type(s)}")
+        return list(self._data.index)[sl]
--- a/test/run_dios.py
+++ b/test/run_dios.py
@@ -5,10 +5,9 @@ import numpy as np
 if __name__ == '__main__':
    # dios_options[OptsFields.mixed_itype_policy] = 'error'

-    df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1y', start='2000-01-01'))
+    # df = pd.DataFrame([1,24,5,456,45], index=pd.date_range(periods=5, freq='1y', start='2000-01-01'))
    # df[[True, False]]

-
    df1 = pd.DataFrame(dict(a=range(5), b=range(0,50,10)))
    df2 = pd.DataFrame(dict(b=[99], a=[888732727]), index=range(3,8))
    d1 = DictOfSeries(df1)
@@ -17,12 +16,12 @@ if __name__ == '__main__':
    df1.to_string()

    d = DictOfSeries(dict(a=[1,2], b=[12,38,32,32,323], ss=[2,23,3,2,3,], z=pd.Series([1,2,3], index=list("abc"))))
-    d['ss'].index = df.index
-    # d=DictOfSeries(df)
-
-    d1[:] = d2
-    print(d)
-    print(d1)
-    print(df1)
+    print(d, type(d))
+    d = d.loc[:,:]
+    print(d, type(d))
+    a = d.loc[:,'a']
+    print(a, type(a))
+    x = d.loc[1,['a', 'ss', 'z']]
+    print(x, type(x))