options, lib, keep track of index type

28c06f47 · Bert Palm · 9e154aa1 · 28c06f47 · 28c06f47 · 28c06f47
Commit 28c06f47 authored 5 years ago by Bert Palm 🎇
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -14,69 +14,6 @@ from pandas.core.dtypes.common import (
 from pandas.core.indexing import need_slice


-def item_from_zerodim(key):
-    # if isinstance(key, DictOfSeries) and len(key) == 1:
-    #     todo what if squeeze return a 1-value-series? squeeze again?
-    # return key.squeeze()
-    return pdlib.item_from_zerodim(key)
-
-
-class _LocIndexer:
-
-    def __init__(self, _dios):
-        self._dios = _dios
-        # short handles
-        self._data = _dios._data
-        self._check_keys = _dios._check_keys
-
-    def __getitem__(self, key):
-        # if we have a tuple, we have rows and columns
-        # if not we have only rows and work on all columns
-        if isinstance(key, tuple):
-            rkey, ckey, *fail = key
-            if fail:
-                raise KeyError("To many indexers")
-
-            # prepare ckey
-            if is_iterator(ckey):
-                ckey = list(ckey)
-
-            # determine columns
-            if isinstance(ckey, str):
-                self._check_keys([ckey])
-                cols = [ckey]
-            elif isinstance(ckey, slice):
-                cols = self._col_slice_to_col_list(ckey)
-            elif is_list_like(ckey):
-                self._check_keys(ckey)
-                cols = ckey
-            else:
-                raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
-        else:
-            cols = self._data.keys()
-            rkey = key
-
-        # pass the row-key directly to pd.Series.loc[row-key]
-        new = DictOfSeries()
-        for c in cols:
-            new[c] = self._data[c].loc[rkey]
-        return new
-
-    def _col_slice_to_col_list(self, rslice):
-        """ see here:
-        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
-        """
-        keys = list(self._data.keys)
-        try:
-            start = keys.index(rslice.start) if rslice.start is not None else None
-            stop = keys.index(rslice.stop) if rslice.stop is not None else None
-        except ValueError:
-            raise KeyError("The slice start label or the slice stop label is not present in the columns.")
-        if not is_integer(rslice) and rslice > 0:
-            raise TypeError("The step parameter of the slice must be positive integer.")
-        return keys[slice(start, stop + 1, rslice.step)]
-
-
 class DictOfSeries:
    """
    DictionaryOfSeries is a collection of pd.Series's which aim to be as close as possible similar to
@@ -112,10 +49,16 @@ class DictOfSeries:
    def __init__(self, indextype=None, **kwargs):
        self._data = OrderedDict()

-        # We need to keep track if the index type of every Series is the
-        # same, because if we have different types, it would make slicing
-        # impossible.
-        self._indextype = None
+        # We need to keep track of the type of the index of every new Series.
+        # If the types differ slicing will almost always fail, because a datetime-like
+        # slice cannont work on a numeric index and vice versa..
+        if indextype is not None:
+            indextype = get_indextype(indextype)
+            check_mixed_indextype_option(indextype)
+            check_allowed_indextypes(indextype)
+        self._indextype = indextype
+
+        # fill initial given values in the dios
        for kw in kwargs:
            self[kw] = kwargs[kw]

@@ -146,17 +89,15 @@ class DictOfSeries:
        return self._indextype

    def _set_indextype(self, idx):
-        itype = 'other'
-        if is_dtIndex_like(idx):
-            itype = 'datetime'
-        elif is_numIndex_like(idx):
-            itype = 'numeric'
+        """ Set indextype of dios.
+
+        Note: If ``self._indextype`` and ``idx`` are of the same type,
+        ``self._indextype`` stays unchanged.
+        """
        if self._indextype is None:
-            self._indextype = itype
-            return
-        if self._indextype == itype:
-            return
-        self._indextype = 'mixed'
+            self._indextype = get_indextype(idx)
+        elif self._indextype != get_indextype(idx):
+            self._indextype = IdxTypes.mixed

    def _check_keys(self, keys):
        missing = [k for k in keys if k not in self.columns]
@@ -535,3 +476,60 @@ class DictOfSeries:
            return None
        return news.squeeze()

+
+class _LocIndexer:
+
+    def __init__(self, _dios):
+        self._dios = _dios
+        # short handles
+        self._data = _dios._data
+        self._check_keys = _dios._check_keys
+
+    def __getitem__(self, key):
+        # if we have a tuple, we have rows and columns
+        # if not we have only rows and work on all columns
+        if isinstance(key, tuple):
+            rkey, ckey, *fail = key
+            if fail:
+                raise KeyError("To many indexers")
+
+            # prepare ckey
+            if is_iterator(ckey):
+                ckey = list(ckey)
+
+            # determine columns
+            if isinstance(ckey, str):
+                self._check_keys([ckey])
+                cols = [ckey]
+            elif isinstance(ckey, slice):
+                cols = self._col_slice_to_col_list(ckey)
+            elif is_list_like(ckey):
+                self._check_keys(ckey)
+                cols = ckey
+            else:
+                raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
+        else:
+            cols = self._data.keys()
+            rkey = key
+
+        # pass the row-key directly to pd.Series.loc[row-key]
+        new = DictOfSeries()
+        for c in cols:
+            new[c] = self._data[c].loc[rkey]
+        return new
+
+    def _col_slice_to_col_list(self, rslice):
+        """ see here:
+        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
+        """
+        keys = list(self._data.keys)
+        try:
+            start = keys.index(rslice.start) if rslice.start is not None else None
+            stop = keys.index(rslice.stop) if rslice.stop is not None else None
+        except ValueError:
+            raise KeyError("The slice start label or the slice stop label is not present in the columns.")
+        if not is_integer(rslice) and rslice > 0:
+            raise TypeError("The step parameter of the slice must be positive integer.")
+        return keys[slice(start, stop + 1, rslice.step)]
+
+
--- a/dios/lib.py
+++ b/dios/lib.py
-
 import pandas as pd
-import pandas._libs.lib as pdlib
+import warnings
+from dios.options import *
+
+
+def _get_storage_class_values(cls):
+    return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]

-class IdxType:
+
+class IdxTypes:
    nunmeric = 'numeric'
    datetime = 'datetime'
    mixed = 'mixed'
    other = 'other'

-def is_dtIndex_like(i):
-    return isinstance(i, pd.DatetimeIndex)

+idxtypes = _get_storage_class_values(IdxTypes)
+
+
+def check_mixed_indextype_option(idxtype):
+    if dios_options[Options.mixed_indextyes]:
+        warnings.warn(f"Using dios_option[{Options.mixed_indextyes}]=True is highly experimental, "
+                      f"please do not report any bugs!", DiosOptionsWarning)
+        return
+
+
+def check_allowed_indextypes(idxtype):
+    if idxtype not in [IdxTypes.nunmeric, IdxTypes.datetime]:
+        raise ValueError("The index of the given object is not of supported type")
+
+
+def get_indextype(obj):
+    if _is_dtIndex_like(obj):
+        return IdxTypes.datetime
+
+    if _is_numIndex_like(obj):
+        return IdxTypes.nunmeric
+
+    if _is_pdIndex_like(obj):
+        return IdxTypes.other
+
+    for itype in idxtypes:
+        if obj == itype:
+            return itype
+
+    raise ValueError(f"{type(obj)} is not a indextype nor any known subtype of pd.Index")
+
+
+def _is_dtIndex_like(i):
+    if isinstance(i, pd.DatetimeIndex):
+        return True
+    try:
+        if i == pd.DatetimeIndex:
+            return True
+    except TypeError:
+        return False
+
+
+def _is_numIndex_like(i):
+    tup = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
+    if isinstance(i, tup):
+        return True
+    # was a pd.xxxIndex was given
+    for it in tup:
+        try:
+            if it == i:
+                return True
+        except TypeError:
+            pass
+    return False

-def is_numIndex_like(i):
-    return isinstance(i, (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index))

-dios_options = dict(
-    disp_max_rows=10,
-    disp_max_vars=4,
+def _is_pdIndex_like(i):
+    """See here:
+    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.html#pandas.Index
+    """
+    if isinstance(i, pd.Index):
+        return True
+    tup = (pd.RangeIndex, pd.CategoricalIndex, pd.MultiIndex, pd.IntervalIndex,
+           pd.DatetimeIndex, pd.TimedeltaIndex,
+           pd.PeriodIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
+    # was a pd.xxxIndex was given
+    for it in tup:
+        try:
+            if it == i:
+                return True
+        except TypeError:
+            pass
+    return False

-    # 0: accept all
-    # 1: accept if at least one keys is is in both DioS
-    # 2: accept if all keys of the src-DioS in the dest-DioS
-    # 3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
-    #   otherwise its the same than creating a new dios)
-    dios_to_dios_method=3
-)

--- a/dios/options.py
+++ b/dios/options.py
+from dios.lib import IdxTypes
+
+
+class DiosOptionsWarning(UserWarning):
+    pass
+
+
+class Options:
+    """storage class for dios options dict keys"""
+
+    """Set the number of rows and variables to display in a call that use
+    ``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do."""
+    disp_max_rows = "disp_max_rows "
+    disp_max_vars = "disp_max_vars"
+
+    """
+    0: accept all
+    1: accept if at least one keys is is in both DioS
+    2: accept if all keys of the src-DioS in the dest-DioS
+    3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
+      otherwise its the same than creating a new dios)"""
+    dios_to_dios_method = "dios_to_dios_method"
+
+    """
+    If we have different types of indexes in the dios, slicing will almost always fail.
+    It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
+    To set this to True is highly experimental, any arising issues or errors should be
+    handled by the user."""
+    mixed_indextyes = "mixed_indextyes"
+
+    allowed_indextypes = "allowed_indextypes"
+
+
+dios_options = {
+    Options.disp_max_rows : 10,
+    Options.disp_max_vars: 4,
+    Options.dios_to_dios_method: 3,
+    Options.mixed_indextyes: False,
+    Options.allowed_indextypes: [IdxTypes.datetime, IdxTypes.nunmeric]
+}
+