introduced base-class, cleanup imports

5f9f19aa · Bert Palm · d77f0c9b · 5f9f19aa · 5f9f19aa · 5f9f19aa
Commit 5f9f19aa authored 4 years ago by Bert Palm 🎇
--- a/dios/base.py
+++ b/dios/base.py
+#!/usr/bin/env python
+from . import operators as ops
+from . import lib
+from .lib import (
+    _CAST_POLICIES,
+    _throw_MixedItype_err_or_warn,
+    _find_least_common_itype,
+)
+from abc import abstractmethod
+import pandas as pd
+import operator as op
+import functools as ftools
+from pandas.core.common import is_bool_indexer as _is_bool_indexer
+import pandas.core.dtypes.common as pdcom
+"""
+Unlike the example says, return lists False, not True
+>>is_iterator([1, 2, 3])
+>>False
+"""
+from pandas.core.dtypes.common import is_iterator as _is_iterator
+__author__ = "Bert Palm"
+__email__ = "bert.palm@ufz.de"
+__copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
+class _DiosBase:
+    @property
+    @abstractmethod
+    def _constructor(self):
+        pass
+    def __init__(self, data=None, columns=None, index=None, itype=None, cast_policy='save', fastpath=False):
+        self.cast_policy = cast_policy
+        # we are called internally
+        if fastpath:
+            self._itype = itype or lib.ObjItype
+            if data is not None:
+                self._data = data
+            else:
+                # it is significantly faster, to provide an index and fill it,
+                # than to successively build the index by adding data
+                self._data = pd.Series(dtype='O', index=columns)
+        else:
+            if index is not None and not isinstance(index, pd.Index):
+                index = pd.Index(index)
+            # itype=None means infer the itype by the data, so we first set to the highest
+            # possible itype, then insert data, then infer the best-fitting itype.
+            if itype is None and index is None:
+                self._itype = lib.ObjItype
+            else:
+                if index is not None:
+                    self._itype = lib.get_itype(index)
+                if itype is not None:
+                    self._itype = lib.get_itype(itype)
+            cols = pd.Index([] if columns is None else columns)
+            if not cols.is_unique:
+                raise ValueError("columns must be unique")
+            self._data = pd.Series(dtype='O', index=cols)
+            if data is not None:
+                self._init_insert_data(data, columns, index)
+        # self._data still contain nans at all positions, where
+        # no data was present, but a column-name was given
+        if self._data.hasnans:
+            e = pd.Series(dtype='O', index=index)
+            for c in self.columns[self._data.isna()]:
+                self._insert(c, e.copy())
+        self._data.index.name = 'columns'
+        # we try to infer the itype, but if we still have
+        # no data, we will set the itype lazy, i.e. with
+        # the first non-empty _insert()
+        if itype is None:
+            if self.empty:
+                self._itype = 'INFER'
+            else:
+                self._itype = _find_least_common_itype(self._data)
+                if not self._itype.unique:
+                    _throw_MixedItype_err_or_warn(self.itype)
+    def _init_insert_data(self, data, columns, index):
+        """ Insert items of a iterable in self"""
+        if _is_iterator(data):
+            data = list(data)
+        if _is_dios_like(data) or isinstance(data, dict):
+            if columns is None:
+                pass  # data is dict-like
+            else:
+                data = {k: data[k] for k in data if k in columns}
+        elif isinstance(data, pd.Series):
+            name = data.name or 0
+            if columns is not None and len(columns) > 0:
+                name = self.columns[0]
+            data = {name: data}
+        elif pdcom.is_nested_list_like(data):
+            if columns is None:
+                data = {i: d for i, d in enumerate(data)}
+            elif len(data) == len(columns):
+                data = dict(zip(self.columns, data))
+            else:
+                raise ValueError(f"{len(columns)} columns passed, data implies {len(data)} columns")
+        elif pdcom.is_list_like(data):
+            name = 0 if columns is None or len(columns) < 1 else self.columns[0]
+            data = {name: data}
+        else:
+            raise TypeError("data type not understood")
+        for k in data:
+            self._insert(k, pd.Series(data[k], index=index))
+    # ----------------------------------------------------------------------
+    # Indexing Methods
+    def _insert(self, col, val):
+        """Insert a fresh new value as pd.Series into self"""
+        val = list(val) if _is_iterator(val) else val
+        if _is_dios_like(val):
+            val = val.squeeze()
+            if not isinstance(val, pd.Series):
+                raise ValueError(f"Cannot insert frame-like with more than one column")
+        elif val is None:
+            val = pd.Series()
+        elif not isinstance(val, pd.Series):
+            raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}")
+        # set the itype lazy, i.e. when first non-empty
+        # column is inserted
+        if self._itype == 'INFER':
+            if not val.empty:
+                self._itype = lib.get_itype(val.index)
+                # cast all pre-inserted empty series
+                self._cast_all(self._itype, self._policy)
+                if not self._itype.unique:
+                    _throw_MixedItype_err_or_warn(self._itype)
+        else:
+            val = lib.cast_to_itype(val, self.itype, policy=self._policy)
+        val.name = col
+        self._data.at[col] = val.copy(deep=True)
+    def __getitem__(self, key):
+        """ dios[key] -> dios/series """
+        key = list(key) if _is_iterator(key) else key
+        if isinstance(key, tuple):
+            raise KeyError("tuples are not allowed")
+        if pdcom.is_hashable(key):
+            # NOTE: we use copy here to prevent index
+            # changes, that could result in an invalid
+            # itype. A shallow copy is not sufficient.
+            # work on columns, return series
+            return self._data.at[key].copy()
+        if _is_dios_like(key):
+            # work on rows and columns
+            new = self._getitem_bool_dios(key)
+        elif isinstance(key, slice):
+            # work on rows
+            new = self._slice(key)
+        elif _is_bool_indexer(key):
+            # work on rows
+            new = self._getitem_bool_listlike(key)
+        else:
+            # work on columns
+            data = self._data.loc[key]
+            new = self._constructor(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
+        return new
+    def _slice(self, key):
+        """slices self, return copy"""
+        if key == slice(None):
+            return self.copy()
+        new = self.copy_empty(columns=True)
+        for k in self.columns:
+            new._data.at[k] = self._data.at[k][key]
+        return new
+    def _getitem_bool_dios(self, key):
+        """ Select items by a boolean dios-like drop un-selected indices. """
+        if not _is_bool_dios_like(key):
+            raise ValueError("Must pass DictOfSeries with boolean values only")
+        new = self.copy_empty(columns=True)
+        for k in self.columns.intersection(key.columns):
+            dat = self._data.at[k]
+            val = key[k]
+            # align rows
+            idx = val[val].index.intersection(dat.index)
+            new._data.at[k] = dat[idx]
+        return new
+    def _getitem_bool_listlike(self, key):
+        new = self.copy_empty(columns=True)
+        for k in self.columns:
+            new._data.at[k] = self._data.at[k].loc[key]
+        return new
+    def __setitem__(self, key, value):
+        """ dios[key] = value """
+        key = list(key) if _is_iterator(key) else key
+        if isinstance(key, tuple):
+            raise KeyError(f"{key}. tuples are not allowed")
+        elif pdcom.is_hashable(key):
+            if isinstance(value, pd.Series) or key not in self.columns:
+                self._insert(key, value)
+            elif _is_dios_like(value) or pdcom.is_nested_list_like(value):
+                raise ValueError("Incompatible indexer with multi-dimensional value")
+            else:
+                self._data.at[key][:] = value
+        else:
+            data = self.__getitem__(key)
+            assert isinstance(data, self.__class__), f"getitem returned data of type {type(data)}"
+            # special cases
+            if _is_dios_like(value):
+                self._setitem_dios(data, value)
+            # NOTE: pd.Series also considered list-like
+            elif pdcom.is_list_like(value):
+                self._setitem_listlike(data, value)
+            # default case
+            else:
+                for k in data.columns:
+                    s = data._data.at[k]
+                    s[:] = value
+                    self._data.at[k][s.index] = s
+    def _setitem_listlike(self, data, value):
+        value = value.values if isinstance(value, pd.Series) else value
+        if len(value) != len(data.columns):
+            raise ValueError(f"array-like value of length {len(value)} could "
+                             f"not be broadcast to indexing result of shape "
+                             f"(.., {len(data.columns)})")
+        for i, k in enumerate(data.columns):
+            s = data._data.at[k]
+            s[:] = value[i]
+            self._data.at[k][s.index] = s
+    def _setitem_dios(self, data, value):
+        """ Write values from a dios-like to self.
+        No justification or alignment of columns, but of indices.
+        If value has missing indices, nan's are inserted at that
+        locations, just like `series.loc[:]=val` or `df[:]=val` do.
+        Eg.
+         di[::2] = di[::3]   ->   di[::2]
+            x |        x |            x |
+        ===== |     ==== |       ====== |
+        0   x |     0  z |       0    z |
+        2   x |  =  3  z |   ->  2  NaN |
+        4   x |     6  z |       4  NaN |
+        6   x |                  6    z |
+        Parameter
+        ----------
+        data : dios
+            A maybe trimmed version of self
+        value : dios, pd.Dataframe
+            The value to set with the same column dimension like data
+        """
+        if len(data) != len(value.columns):
+            raise ValueError(f"shape mismatch: values array of shape "
+                             f"(.., {len(value.columns)}) could not "
+                             f"be broadcast to indexing result of "
+                             f"shape (.., {len(data.columns)})")
+        for i, k in enumerate(data):
+            dat = data._data.at[k]
+            # .loc cannot handle empty series,
+            # like `emptySeries.loc[:] = [1,2]`
+            if dat.empty:
+                continue
+            val = value[value.columns[i]]
+            dat.loc[:] = val
+            self._data.at[k].loc[dat.index] = dat
+    def __delitem__(self, key):
+        del self._data[key]
+    # ------------------------------------------------------------------------------
+    # Base properties and basic dunder magic
+    @property
+    def columns(self):
+        return self._data.index
+    @columns.setter
+    def columns(self, cols):
+        index = pd.Index(cols)
+        if not index.is_unique:
+            raise ValueError("columns index must have unique values")
+        self._data.index = index
+    @property
+    def itype(self):
+        if self._itype == 'INFER':
+            return None
+        return self._itype
+    @itype.setter
+    def itype(self, itype):
+        itype = lib.get_itype(itype)
+        self._cast_all(itype, policy=self._policy)
+        self._itype = itype
+    @property
+    def cast_policy(self):
+        return self._policy
+    @cast_policy.setter
+    def cast_policy(self, policy):
+        if policy not in _CAST_POLICIES:
+            raise ValueError(f"policy must be one of {_CAST_POLICIES}")
+        self._policy = policy
+    def _cast_all(self, itype, policy):
+        c = '?'
+        data = self.copy_empty()
+        try:
+            for c in self.columns:
+                data._data.at[c] = lib.cast_to_itype(self._data.at[c], itype, policy=policy)
+        except Exception as e:
+            raise type(e)(f"Column {c}: " + str(e)) from e
+    def __len__(self):
+        return len(self.columns)
+    @property
+    def empty(self):
+        return len(self) == 0 or all(s.empty for s in self._data)
+    def __iter__(self):
+        yield from self.columns
+    def __reversed__(self):
+        yield from reversed(self.columns)
+    def __contains__(self, item):
+        return item in self.columns
+    # ----------------------------------------------------------------------
+    # if copy.copy() is copy.copy(): return copy.copy().copy()
+    def __copy__(self):
+        return self.copy(deep=True)
+    def __deepcopy__(self, memo=None):
+        return self.copy(deep=True)
+    def copy(self, deep=True):
+        if deep:
+            data = pd.Series(dtype='O', index=self.columns)
+            for c in self.columns:
+                data.at[c] = self._data.at[c].copy(deep=True)
+        else:
+            data = self._data
+        kws = dict(itype=self._itype, cast_policy=self._policy)
+        return self._constructor(data=data, fastpath=True, **kws)
+    def copy_empty(self, columns=True):
+        data = None
+        if columns is True:  # is correct
+            data = pd.Series(dtype='O', index=self.columns)
+            for c in self.columns:
+                data.at[c] = pd.Series(dtype=self._data.at[c].dtype)
+        kws = dict(itype=self._itype, cast_policy=self._policy)
+        return self._constructor(data=data, fastpath=True, **kws)
+    # ------------------------------------------------------------------------------
+    # Operators
+    def _op1(self, op):
+        new = self.copy_empty(columns=True)
+        try:
+            for k in self.columns:
+                new[k] = op(self[k])
+        except Exception as e:
+            raise type(e)(f"'{ops.OP_MAP[op]} dios' failed: " + str(e)) from e
+        return new
+    def _op2(self, op, other, align=True, inplace=False):
+        def raiseif(kself, kother, s):
+            if kself != kother:
+                raise ValueError(f"{s} does not match, {s} left: {kself}, {s} right: {kother}")
+        def doalign(left, right):
+            return left.align(right, join='inner') if align else (left, right)
+        def gen():
+            if _is_dios_like(other):
+                raiseif(list(self), list(other), 'keys')
+                for k in self.columns:
+                    left, right = self[k], other[k]
+                    yield k, op(*doalign(left, right))
+            elif isinstance(other, pd.Series):
+                for k in self.columns:
+                    left, right = self[k], other
+                    yield k, op(*doalign(left, right))
+            elif pdcom.is_dict_like(other):
+                raiseif(sorted(self), sorted(other), 'keys')
+                for k in self.columns:
+                    yield k, op(self[k], other[k])
+            elif pdcom.is_nested_list_like(other):
+                raiseif(len(self), len(other), 'length')
+                for i, k in enumerate(self.columns):
+                    yield k, op(self[k], other[i])
+            elif pdcom.is_scalar(other) or pdcom.is_list_like(other):
+                for k in self.columns:
+                    yield k, op(self[k], other)
+            else:
+                raise NotImplementedError
+        new = self if inplace else self.copy_empty(columns=True)
+        try:
+            for k, val in gen():
+                new[k] = val
+        except Exception as e:
+            raise type(e)(f"'dios {ops.OP_MAP[op]} other' failed: " + str(e)) from e
+        return new
+    __neg__ = ftools.partialmethod(_op1, op.neg)
+    __abs__ = ftools.partialmethod(_op1, op.abs)
+    __invert__ = ftools.partialmethod(_op1, op.inv)
+    __eq__ = ftools.partialmethod(_op2, op.eq, align=False)
+    __ne__ = ftools.partialmethod(_op2, op.ne, align=False)
+    __le__ = ftools.partialmethod(_op2, op.le, align=False)
+    __ge__ = ftools.partialmethod(_op2, op.ge, align=False)
+    __lt__ = ftools.partialmethod(_op2, op.lt, align=False)
+    __gt__ = ftools.partialmethod(_op2, op.gt, align=False)
+    __add__ = ftools.partialmethod(_op2, op.add)
+    __sub__ = ftools.partialmethod(_op2, op.sub)
+    __mul__ = ftools.partialmethod(_op2, op.mul)
+    __mod__ = ftools.partialmethod(_op2, op.mod)
+    __truediv__ = ftools.partialmethod(_op2, op.truediv)
+    __floordiv__ = ftools.partialmethod(_op2, op.floordiv)
+    __pow__ = ftools.partialmethod(_op2, op.pow)
+    __and__ = ftools.partialmethod(_op2, op.and_)
+    __or__ = ftools.partialmethod(_op2, op.or_)
+    __xor__ = ftools.partialmethod(_op2, op.xor)
+    # ------------------------------------------------------------------------------
+    # Indexer
+    @property
+    def loc(self):
+        return _LocIndexer(self)
+    @property
+    def iloc(self):
+        return _iLocIndexer(self)
+    @property
+    def aloc(self):
+        return _aLocIndexer(self)
+    @property
+    def at(self):
+        return _AtIndexer(self)
+    @property
+    def iat(self):
+        return _iAtIndexer(self)
+def _is_dios_like(obj) -> bool:
+    # must have columns
+    # columns is some kind of pd.Index
+    # iter will iter through columns
+    # a `in` obj check if obj is in columns
+    # obj[key] will give a pd.Series
+    # obj.squeeze() give pd.Series if len(obj) == 1
+    return isinstance(obj, _DiosBase) or isinstance(obj, pd.DataFrame)
+def _is_bool_series(obj) -> bool:
+    return isinstance(obj, pd.Series) and obj.dtype == bool
+def _is_bool_dios_like(obj) -> bool:
+    if not _is_dios_like(obj):
+        return False
+    dtypes = obj.dtypes
+    if (dtypes == bool).all():
+        return True
+    if (dtypes == 'O').any():
+        return obj.apply(_is_bool_indexer).all()
+    return False
+# keep this here to prevent cyclic import
+from .indexer import _aLocIndexer, _iLocIndexer, _LocIndexer, _iAtIndexer, _AtIndexer
--- a/dios/dios.py
+++ b/dios/dios.py
-from .operators import OP_MAP as _OP_MAP
+from .base import _DiosBase, _is_dios_like
+from .lib import Opts, OptsFields, dios_options
-from .lib import *
+from .lib import _find_least_common_itype
-from .lib import (
-    _CAST_POLICIES,
-    _itype_le, _itype_lt,
-    _throw_MixedItype_err_or_warn,
-    _find_least_common_itype,
-)
+import functools as ftools
 import pandas as pd
+import pandas.core.dtypes.common as pdcom
 import numpy as np
-import operator as op
-import functools as ftools
-import pandas.core.dtypes.common as dcom
-_is_list_like = dcom.is_list_like
-_is_nested_list_like = dcom.is_nested_list_like
-_is_scalar = dcom.is_scalar
-_is_integer = dcom.is_integer
-_is_dict_like = dcom.is_dict_like
-_is_number = dcom.is_number
-_is_hashable = dcom.is_hashable
-from pandas.core.common import is_bool_indexer as _is_bool_indexer
-"""
-Unlike the example says, return lists False, not True
->>is_iterator([1, 2, 3])
->>False
-"""
-from pandas.core.dtypes.common import is_iterator as _is_iterator
-from typing import Union, Any
+class DictOfSeries(_DiosBase):
-class DictOfSeries:
    """ A data frame where every column has its own index.
    DictOfSeries is a collection of pd.Series's which aim to be as close as possible similar to
@@ -44,355 +16,40 @@ class DictOfSeries:
    unlike the former, which provide a single row-index for all columns. This solves problems with
    unaligned data and data which varies widely in length.
-    Indexing with ``di[]``, ``di.loc[]`` and ``di.iloc[]``  should work analogous to these methods 
+    Indexing with ``di[]``, ``di.loc[]`` and ``di.iloc[]``  should work analogous to these methods
-    from pd.DataFrame. The indexer can be a single label, a slice, a list-like, a boolean list-like, 
+    from pd.DataFrame. The indexer can be a single label, a slice, a list-like, a boolean list-like,
-    or a boolean dios/pd.DataFrame and can be used to selectively get or set data.
+    or a boolean DictOfSeries/pd.DataFrame and can be used to selectively get or set data.
    Parameters
    ----------
    data : array-like, Iterable, dict, or scalar value
        Contains data stored in Series.
-    columns : array-like 
+    columns : array-like
        Column labels to use for resulting frame. Will default to
-        RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
+        RangeIndex(0, 1, 2, ..., n) if no column labels are provided.
-    itype : Itype, pd.Index, Itype-string-repr, type
-        Index type that every series in this dios should have. 
-        if None, the index-type is inferred each time a series is inserted
-        or deleted.
-    cast_policy : str
-        Policy to use for down-casting an itype.
-    """
-    # ------------------------------------------------------------------------------
-    # Constructors
-    def __init__(self, data=None, columns=None, index=None, itype=None, cast_policy='save', fastpath=False):
-        self.cast_policy = cast_policy
-        # we are called internally
-        if fastpath:
-            self._itype = itype or ObjItype
-            if data is not None:
-                self._data = data
-            else:
-                # it is significantly faster, to provide an index and fill it,
-                # than to successively build the index by adding data
-                self._data = pd.Series(dtype='O', index=columns)
-        else:
-            if index is not None and not isinstance(index, pd.Index):
-                index = pd.Index(index)
-            # itype=None means infer the itype by the data, so we first set to the highest
-            # possible itype, then insert data, then infer the best-fitting itype.
-            if itype is None and index is None:
-                self._itype = ObjItype
-            else:
-                if index is not None:
-                    self._itype = get_itype(index)
-                if itype is not None:
-                    self._itype = get_itype(itype)
-            cols = pd.Index([] if columns is None else columns)
-            if not cols.is_unique:
-                raise ValueError("columns must be unique")
-            self._data = pd.Series(dtype='O', index=cols)
-            if data is not None:
-                self._init_insert_data(data, columns, index)
-        # self._data still contain nans at all positions, where
-        # no data was present, but a column-name was given
-        if self._data.hasnans:
-            e = pd.Series(dtype='O', index=index)
-            for c in self.columns[self._data.isna()]:
-                self._insert(c, e.copy())
-        self._data.index.name = 'columns'
-        # we try to infer the itype, but if we still have
-        # no data, we will set the itype lazy, i.e. with
-        # the first non-empty _insert()
-        if itype is None:
-            if self.empty:
-                self._itype = 'INFER'
-            else:
-                self._itype = _find_least_common_itype(self._data)
-                if not self._itype.unique:
-                    _throw_MixedItype_err_or_warn(self.itype)
-    def _init_insert_data(self, data, columns, index):
-        """ Insert items of a iterable in self"""
-        if _is_iterator(data):
-            data = list(data)
-        if _is_dios_like(data) or isinstance(data, dict):
-            if columns is None:
-                pass  # data is dict-like
-            else:
-                data = {k: data[k] for k in data if k in columns}
-        elif isinstance(data, pd.Series):
-            name = data.name or 0
-            if columns is not None and len(columns) > 0:
-                name = self.columns[0]
-            data = {name: data}
-        elif _is_nested_list_like(data):
-            if columns is None:
-                data = {i: d for i, d in enumerate(data)}
-            elif len(data) == len(columns):
-                data = dict(zip(self.columns, data))
-            else:
-                raise ValueError(f"{len(columns)} columns passed, data implies {len(data)} columns")
-        elif _is_list_like(data):
-            name = 0 if columns is None or len(columns) < 1 else self.columns[0]
-            data = {name: data}
-        else:
-            raise TypeError("data type not understood")
-        for k in data:
+    index : Index or array-like
-            self._insert(k, pd.Series(data[k], index=index))
+        Index to use to reindex every given series during init. Ignored if omitted.
-    # ----------------------------------------------------------------------
+    itype : Itype, pd.Index, Itype-string-repr or type
-    # Indexing Methods
+        Every series that is inserted, must have an index of this type or any
+        of this types subtypes.
-    def _insert(self, col, val):
+        If None, the itype is inferred as soon as the first non-empty series is inserted.
-        """Insert a fresh new value as pd.Series into self"""
-        val = list(val) if _is_iterator(val) else val
-        if _is_dios_like(val):
-            val = val.squeeze()
-            if not isinstance(val, pd.Series):
-                raise ValueError(f"Cannot insert frame-like with more than one column")
-        elif val is None:
-            val = pd.Series()
-        elif not isinstance(val, pd.Series):
-            raise TypeError(f"Only data of type pandas.Series can be inserted, passed was {type(val)}")
-        # set the itype lazy, i.e. when first non-empty
-        # column is inserted
-        if self._itype == 'INFER':
-            if not val.empty:
-                self._itype = get_itype(val.index)
-                # cast all pre-inserted empty series
-                self._cast_all(self._itype, self._policy)
-                if not self._itype.unique:
-                    _throw_MixedItype_err_or_warn(self._itype)
-        else:
-            val = cast_to_itype(val, self.itype, policy=self._policy)
-        val.name = col
-        self._data.at[col] = val.copy(deep=True)
-    def __getitem__(self, key):
-        """ dios[key] -> dios/series """
-        key = list(key) if _is_iterator(key) else key
-        if isinstance(key, tuple):
-            raise KeyError("tuples are not allowed")
-        if _is_hashable(key):
-            # NOTE: we use copy here to prevent index
-            # changes, that could result in an invalid
-            # itype. A shallow copy is not sufficient.
-            # work on columns, return series
-            return self._data.at[key].copy()
-        if _is_dios_like(key):
-            # work on rows and columns
-            new = self._getitem_bool_dios(key)
-        elif isinstance(key, slice):
-            # work on rows
-            new = self._slice(key)
-        elif _is_bool_indexer(key):
-            # work on rows
-            new = self._getitem_bool_listlike(key)
-        else:
-            # work on columns
-            data = self._data.loc[key]
-            new = DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
-        return new
-    def _slice(self, key):
-        """slices self, return copy"""
-        if key == slice(None):
-            return self.copy()
-        new = self.copy_empty(columns=True)
-        for k in self.columns:
-            new._data.at[k] = self._data.at[k][key]
-        return new
-    def _getitem_bool_dios(self, key):
-        """ Select items by a boolean dios-like drop un-selected indices. """
-        if not _is_bool_dios_like(key):
-            raise ValueError("Must pass DictOfSeries with boolean values only")
-        new = self.copy_empty(columns=True)
-        for k in self.columns.intersection(key.columns):
-            dat = self._data.at[k]
-            val = key[k]
-            # align rows
-            idx = val[val].index.intersection(dat.index)
-            new._data.at[k] = dat[idx]
-        return new
-    def _getitem_bool_listlike(self, key):
-        new = self.copy_empty(columns=True)
-        for k in self.columns:
-            new._data.at[k] = self._data.at[k].loc[key]
-        return new
-    def __setitem__(self, key, value):
-        """ dios[key] = value """
-        key = list(key) if _is_iterator(key) else key
-        if isinstance(key, tuple):
-            raise KeyError(f"{key}. tuples are not allowed")
-        elif _is_hashable(key):
-            if isinstance(value, pd.Series) or key not in self.columns:
-                self._insert(key, value)
-            elif _is_dios_like(value) or _is_nested_list_like(value):
-                raise ValueError("Incompatible indexer with multi-dimensional value")
-            else:
-                self._data.at[key][:] = value
-        else:
-            data = self.__getitem__(key)
-            assert isinstance(data, self.__class__), f"getitem returned data of type {type(data)}"
-            # special cases
-            if _is_dios_like(value):
-                self._setitem_dios(data, value)
-            # NOTE: pd.Series also considered list-like
-            elif _is_list_like(value):
-                self._setitem_listlike(data, value)
-            # default case
-            else:
-                for k in data.columns:
-                    s = data._data.at[k]
-                    s[:] = value
-                    self._data.at[k][s.index] = s
-    def _setitem_listlike(self, data, value):
-        value = value.values if isinstance(value, pd.Series) else value
-        if len(value) != len(data.columns):
-            raise ValueError(f"array-like value of length {len(value)} could "
-                             f"not be broadcast to indexing result of shape "
-                             f"(.., {len(data.columns)})")
-        for i, k in enumerate(data.columns):
-            s = data._data.at[k]
-            s[:] = value[i]
-            self._data.at[k][s.index] = s
-    def _setitem_dios(self, data, value):
-        """ Write values from a dios-like to self.
-        No justification or alignment of columns, but of indices.
-        If value has missing indices, nan's are inserted at that
-        locations, just like `series.loc[:]=val` or `df[:]=val` do.
-        Eg.
-         di[::2] = di[::3]   ->   di[::2]
-            x |        x |            x |
+    cast_policy : {'save', 'force', 'never'}, default 'save'
-        ===== |     ==== |       ====== |
+        Policy used for (down-)casting the index of a series if its type does not match
-        0   x |     0  z |       0    z |
+        the ``itype``.
-        2   x |  =  3  z |   ->  2  NaN |
+    """
-        4   x |     6  z |       4  NaN |
-        6   x |                  6    z |
-        Parameter
-        ----------
-        data : dios
-            A maybe trimmed version of self
-        value : dios, pd.Dataframe
-            The value to set with the same column dimension like data
-        """
-        if len(data) != len(value.columns):
-            raise ValueError(f"shape mismatch: values array of shape "
-                             f"(.., {len(value.columns)}) could not "
-                             f"be broadcast to indexing result of "
-                             f"shape (.., {len(data.columns)})")
-        for i, k in enumerate(data):
-            dat = data._data.at[k]
-            # .loc cannot handle empty series,
-            # like `emptySeries.loc[:] = [1,2]`
-            if dat.empty:
-                continue
-            val = value[value.columns[i]]
-            dat.loc[:] = val
-            self._data.at[k].loc[dat.index] = dat
-    def __delitem__(self, key):
-        del self._data[key]
-    # ------------------------------------------------------------------------------
-    # Base properties and basic dunder magic
-    @property
-    def columns(self):
-        return self._data.index
-    @columns.setter
-    def columns(self, cols):
-        index = pd.Index(cols)
-        if not index.is_unique:
-            raise ValueError("columns index must have unique values")
-        self._data.index = index
-    @property
-    def itype(self):
-        if self._itype == 'INFER':
-            return None
-        return self._itype
-    @itype.setter
+    def __init__(self, data=None, columns=None, index=None, itype=None, cast_policy='save', fastpath=False):
-    def itype(self, itype):
+        super().__init__(
-        itype = get_itype(itype)
+            data=data, columns=columns, index=index, itype=itype, cast_policy=cast_policy, fastpath=fastpath
-        self._cast_all(itype, policy=self._policy)
+        )
-        self._itype = itype
    @property
-    def cast_policy(self):
+    def _constructor(self):
-        return self._policy
+        return DictOfSeries
-    @cast_policy.setter
-    def cast_policy(self, policy):
-        if policy not in _CAST_POLICIES:
-            raise ValueError(f"policy must be one of {_CAST_POLICIES}")
-        self._policy = policy
-    def _cast_all(self, itype, policy):
-        c = '?'
-        data = self.copy_empty()
-        try:
-            for c in self.columns:
-                data._data.at[c] = cast_to_itype(self._data.at[c], itype, policy=policy)
-        except Exception as e:
-            raise type(e)(f"Column {c}: " + str(e)) from e
-    def __len__(self):
-        return len(self.columns)
    @property
    def indexes(self):
@@ -412,23 +69,10 @@ class DictOfSeries:
    def lengths(self):
        return self._data.apply(len)
-    @property
-    def empty(self):
-        return len(self) == 0 or all(s.empty for s in self._data)
    @property
    def size(self):
        return self.lengths.sum()
-    def __iter__(self):
-        yield from self.columns
-    def __reversed__(self):
-        yield from reversed(self.columns)
-    def __contains__(self, item):
-        return item in self.columns
    # ------------------------------------------------------------------------------
    # Dict-like methods
@@ -480,7 +124,7 @@ class DictOfSeries:
    # ------------------------------------------------------------------------------
    # Broadcasting methods and helper
-    def for_each(self, attr_or_callable, **kwargs):
+    def for_each(self, attr_or_callable, **kwds):
        """
        Apply a callable or a pandas.Series method or property on each column.
@@ -492,7 +136,7 @@ class DictOfSeries:
            could be specified as string. If a callable is given it must take
            pandas.Series as the only positional argument.
-        **kwargs: any
+        **kwds: any
            kwargs to passed to callable
        Returns
@@ -554,7 +198,7 @@ class DictOfSeries:
        for c in self.columns:
            dat = self._data.at[c]
            if call:
-                data.at[c] = attr_or_callable(dat, **kwargs)
+                data.at[c] = attr_or_callable(dat, **kwds)
            else:
                data.at[c] = attr_or_callable.fget(dat)
        return data
@@ -618,7 +262,7 @@ class DictOfSeries:
                dat = self._data.at[c].values if raw else self._data.at[c]
                s = func(dat, *args, **kwds)
                result.at[c] = s
-                if _is_scalar(s):
+                if pdcom.is_scalar(s):
                    need_convert = True
                else:
                    need_dios = True
@@ -628,7 +272,7 @@ class DictOfSeries:
                if need_convert:
                    for c in result.index:
                        result.at[c] = pd.Series(result[c])
-                    itype = _find_least_common_itype(result)
+                itype = _find_least_common_itype(result)
                result = DictOfSeries(data=result, itype=itype, fastpath=True)
        else:
            raise ValueError(axis)
@@ -739,6 +383,17 @@ class DictOfSeries:
        data = self.for_each('astype', dtype=dtype, copy=copy, errors=errors)
        return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
+    def memory_usage(self, index=True, deep=False):
+        return self.for_each(pd.Series.memory_usage, index=index, deep=deep).sum()
+    def to_df(self):
+        df_or_ser = self._data.apply(lambda s: s).transpose()
+        return pd.DataFrame() if isinstance(df_or_ser, pd.Series) else df_or_ser
+    @property
+    def debugDf(self):
+        return self.to_df()
    # ----------------------------------------------------------------------
    # Boolean stuff
@@ -781,34 +436,6 @@ class DictOfSeries:
        data = self.for_each('notna')
        return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
-    # ----------------------------------------------------------------------
-    # if copy.copy() is copy.copy(): return copy.copy().copy()
-    def __copy__(self):
-        return self.copy(deep=True)
-    def __deepcopy__(self, memo=None):
-        return self.copy(deep=True)
-    def copy(self, deep=True):
-        if deep:
-            data = pd.Series(dtype='O', index=self.columns)
-            for c in self.columns:
-                data.at[c] = self._data.at[c].copy(deep=True)
-        else:
-            data = self._data
-        kws = dict(itype=self._itype, cast_policy=self._policy)
-        return DictOfSeries(data=data, fastpath=True, **kws)
-    def copy_empty(self, columns=True):
-        data = None
-        if columns is True:  # is correct
-            data = pd.Series(dtype='O', index=self.columns)
-            for c in self.columns:
-                data.at[c] = pd.Series(dtype=self._data.at[c].dtype)
-        kws = dict(itype=self._itype, cast_policy=self._policy)
-        return DictOfSeries(data=data, fastpath=True, **kws)
    # ----------------------------------------------------------------------
    # Rendering Methods
@@ -875,113 +502,6 @@ class DictOfSeries:
        return pprint_dios(self, **kwargs)
-    def memory_usage(self, index=True, deep=False):
-        return self.for_each(pd.Series.memory_usage, index=index, deep=deep).sum()
-    def to_df(self):
-        df_or_ser = self._data.apply(lambda s: s).transpose()
-        return pd.DataFrame() if isinstance(df_or_ser, pd.Series) else df_or_ser
-    @property
-    def debugDf(self):
-        return self.to_df()
-    # ------------------------------------------------------------------------------
-    # Operators
-    def _op1(self, op):
-        new = self.copy_empty(columns=True)
-        try:
-            for k in self.columns:
-                new[k] = op(self[k])
-        except Exception as e:
-            raise type(e)(f"'{_OP_MAP[op]} dios' failed: " + str(e)) from e
-        return new
-    def _op2(self, op, other, align=True, inplace=False):
-        def raiseif(kself, kother, s):
-            if kself != kother:
-                raise ValueError(f"{s} does not match, {s} left: {kself}, {s} right: {kother}")
-        def doalign(left, right):
-            return left.align(right, join='inner') if align else (left, right)
-        def gen():
-            if _is_dios_like(other):
-                raiseif(list(self), list(other), 'keys')
-                for k in self.columns:
-                    left, right = self[k], other[k]
-                    yield k, op(*doalign(left, right))
-            elif isinstance(other, pd.Series):
-                for k in self.columns:
-                    left, right = self[k], other
-                    yield k, op(*doalign(left, right))
-            elif _is_dict_like(other):
-                raiseif(sorted(self), sorted(other), 'keys')
-                for k in self.columns:
-                    yield k, op(self[k], other[k])
-            elif _is_nested_list_like(other):
-                raiseif(len(self), len(other), 'length')
-                for i, k in enumerate(self.columns):
-                    yield k, op(self[k], other[i])
-            elif _is_scalar(other) or _is_list_like(other):
-                for k in self.columns:
-                    yield k, op(self[k], other)
-            else:
-                raise NotImplementedError
-        new = self if inplace else self.copy_empty(columns=True)
-        try:
-            for k, val in gen():
-                new[k] = val
-        except Exception as e:
-            raise type(e)(f"'dios {_OP_MAP[op]} other' failed: " + str(e)) from e
-        return new
-    __neg__ = ftools.partialmethod(_op1, op.neg)
-    __abs__ = ftools.partialmethod(_op1, op.abs)
-    __invert__ = ftools.partialmethod(_op1, op.inv)
-    __eq__ = ftools.partialmethod(_op2, op.eq, align=False)
-    __ne__ = ftools.partialmethod(_op2, op.ne, align=False)
-    __le__ = ftools.partialmethod(_op2, op.le, align=False)
-    __ge__ = ftools.partialmethod(_op2, op.ge, align=False)
-    __lt__ = ftools.partialmethod(_op2, op.lt, align=False)
-    __gt__ = ftools.partialmethod(_op2, op.gt, align=False)
-    __add__ = ftools.partialmethod(_op2, op.add)
-    __sub__ = ftools.partialmethod(_op2, op.sub)
-    __mul__ = ftools.partialmethod(_op2, op.mul)
-    __mod__ = ftools.partialmethod(_op2, op.mod)
-    __truediv__ = ftools.partialmethod(_op2, op.truediv)
-    __floordiv__ = ftools.partialmethod(_op2, op.floordiv)
-    __pow__ = ftools.partialmethod(_op2, op.pow)
-    __and__ = ftools.partialmethod(_op2, op.and_)
-    __or__ = ftools.partialmethod(_op2, op.or_)
-    __xor__ = ftools.partialmethod(_op2, op.xor)
-    # ------------------------------------------------------------------------------
-    # Indexer
-    @property
-    def loc(self):
-        return _LocIndexer(self)
-    @property
-    def iloc(self):
-        return _iLocIndexer(self)
-    @property
-    def aloc(self):
-        return _aLocIndexer(self)
-    @property
-    def at(self):
-        return _AtIndexer(self)
-    @property
-    def iat(self):
-        return _iAtIndexer(self)
 def _empty_repr(di):
    return f"Empty DictOfSeries\n" \
@@ -1119,35 +639,6 @@ def _to_aligned_df(dios, no_value=' '):
    return df
-def _is_list_like_not_nested(obj):
-    return _is_list_like(obj) and not _is_nested_list_like(obj)
-def _is_dios_like(obj) -> bool:
-    # must have columns
-    # columns is some kind of pd.Index
-    # iter will iter through columns
-    # a `in` obj check if obj is in columns
-    # obj[key] will give a pd.Series
-    # obj.squeeze() give pd.Series if len(obj) == 1
-    return isinstance(obj, DictOfSeries) or isinstance(obj, pd.DataFrame)
-def _is_bool_series(obj) -> bool:
-    return isinstance(obj, pd.Series) and obj.dtype == bool
-def _is_bool_dios_like(obj) -> bool:
-    if not _is_dios_like(obj):
-        return False
-    dtypes = obj.dtypes
-    if (dtypes == bool).all():
-        return True
-    if (dtypes == 'O').any():
-        return obj.apply(_is_bool_indexer).all()
-    return False
 def to_dios(obj) -> DictOfSeries:
    if isinstance(obj, DictOfSeries):
        return obj
@@ -1163,6 +654,3 @@ def __monkey_patch_pandas():
 __monkey_patch_pandas()
-# keep this here to prevent cyclic import
-from .indexer import _aLocIndexer, _iLocIndexer, _LocIndexer, _iAtIndexer, _AtIndexer
--- a/dios/indexer.py
+++ b/dios/indexer.py
-from .dios import (
+from .base import (
-    DictOfSeries,
+    _DiosBase,
    _is_dios_like,
-    _is_bool_series,
-    _is_list_like_not_nested,
    _is_bool_dios_like,
    _is_iterator)
@@ -23,7 +21,7 @@ _is_null_slice = ccom.is_null_slice
 class _Indexer:
-    def __init__(self, obj: DictOfSeries):
+    def __init__(self, obj: _DiosBase):
        self.obj = obj
        self._data = obj._data
@@ -271,7 +269,7 @@ class _aLocIndexer(_Indexer):
        if lowdim:
            return data.squeeze()
        else:
-            return DictOfSeries(data=data, fastpath=True, **kws)
+            return self.obj._constructor(data=data, fastpath=True, **kws)
    def __setitem__(self, key, value):
        rowkeys, colkeys, _ = self._unpack_key_aloc(key)