From 28c06f478065fa6f6bd4393728d7bf30f04135fd Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Tue, 11 Feb 2020 19:02:42 +0100
Subject: [PATCH] options, lib, keep track of index type

---
 dios/dios.py    | 152 ++++++++++++++++++++++++------------------------
 dios/lib.py     |  96 ++++++++++++++++++++++++------
 dios/options.py |  41 +++++++++++++
 3 files changed, 195 insertions(+), 94 deletions(-)
 create mode 100644 dios/options.py

diff --git a/dios/dios.py b/dios/dios.py
index a748a56..7482402 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -14,69 +14,6 @@ from pandas.core.dtypes.common import (
 from pandas.core.indexing import need_slice
 
 
-def item_from_zerodim(key):
-    # if isinstance(key, DictOfSeries) and len(key) == 1:
-    #     todo what if squeeze return a 1-value-series? squeeze again?
-    # return key.squeeze()
-    return pdlib.item_from_zerodim(key)
-
-
-class _LocIndexer:
-
-    def __init__(self, _dios):
-        self._dios = _dios
-        # short handles
-        self._data = _dios._data
-        self._check_keys = _dios._check_keys
-
-    def __getitem__(self, key):
-        # if we have a tuple, we have rows and columns
-        # if not we have only rows and work on all columns
-        if isinstance(key, tuple):
-            rkey, ckey, *fail = key
-            if fail:
-                raise KeyError("To many indexers")
-
-            # prepare ckey
-            if is_iterator(ckey):
-                ckey = list(ckey)
-
-            # determine columns
-            if isinstance(ckey, str):
-                self._check_keys([ckey])
-                cols = [ckey]
-            elif isinstance(ckey, slice):
-                cols = self._col_slice_to_col_list(ckey)
-            elif is_list_like(ckey):
-                self._check_keys(ckey)
-                cols = ckey
-            else:
-                raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
-        else:
-            cols = self._data.keys()
-            rkey = key
-
-        # pass the row-key directly to pd.Series.loc[row-key]
-        new = DictOfSeries()
-        for c in cols:
-            new[c] = self._data[c].loc[rkey]
-        return new
-
-    def _col_slice_to_col_list(self, rslice):
-        """ see here:
-        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
-        """
-        keys = list(self._data.keys)
-        try:
-            start = keys.index(rslice.start) if rslice.start is not None else None
-            stop = keys.index(rslice.stop) if rslice.stop is not None else None
-        except ValueError:
-            raise KeyError("The slice start label or the slice stop label is not present in the columns.")
-        if not is_integer(rslice) and rslice > 0:
-            raise TypeError("The step parameter of the slice must be positive integer.")
-        return keys[slice(start, stop + 1, rslice.step)]
-
-
 class DictOfSeries:
     """
     DictionaryOfSeries is a collection of pd.Series's which aim to be as close as possible similar to
@@ -112,10 +49,16 @@ class DictOfSeries:
     def __init__(self, indextype=None, **kwargs):
         self._data = OrderedDict()
 
-        # We need to keep track if the index type of every Series is the
-        # same, because if we have different types, it would make slicing
-        # impossible.
-        self._indextype = None
+        # We need to keep track of the type of the index of every new Series.
+        # If the types differ slicing will almost always fail, because a datetime-like
+        # slice cannont work on a numeric index and vice versa..
+        if indextype is not None:
+            indextype = get_indextype(indextype)
+            check_mixed_indextype_option(indextype)
+            check_allowed_indextypes(indextype)
+        self._indextype = indextype
+
+        # fill initial given values in the dios
         for kw in kwargs:
             self[kw] = kwargs[kw]
 
@@ -146,17 +89,15 @@ class DictOfSeries:
         return self._indextype
 
     def _set_indextype(self, idx):
-        itype = 'other'
-        if is_dtIndex_like(idx):
-            itype = 'datetime'
-        elif is_numIndex_like(idx):
-            itype = 'numeric'
+        """ Set indextype of dios.
+
+        Note: If ``self._indextype`` and ``idx`` are of the same type,
+        ``self._indextype`` stays unchanged.
+        """
         if self._indextype is None:
-            self._indextype = itype
-            return
-        if self._indextype == itype:
-            return
-        self._indextype = 'mixed'
+            self._indextype = get_indextype(idx)
+        elif self._indextype != get_indextype(idx):
+            self._indextype = IdxTypes.mixed
 
     def _check_keys(self, keys):
         missing = [k for k in keys if k not in self.columns]
@@ -535,3 +476,60 @@ class DictOfSeries:
             return None
         return news.squeeze()
 
+
+class _LocIndexer:
+
+    def __init__(self, _dios):
+        self._dios = _dios
+        # short handles
+        self._data = _dios._data
+        self._check_keys = _dios._check_keys
+
+    def __getitem__(self, key):
+        # if we have a tuple, we have rows and columns
+        # if not we have only rows and work on all columns
+        if isinstance(key, tuple):
+            rkey, ckey, *fail = key
+            if fail:
+                raise KeyError("To many indexers")
+
+            # prepare ckey
+            if is_iterator(ckey):
+                ckey = list(ckey)
+
+            # determine columns
+            if isinstance(ckey, str):
+                self._check_keys([ckey])
+                cols = [ckey]
+            elif isinstance(ckey, slice):
+                cols = self._col_slice_to_col_list(ckey)
+            elif is_list_like(ckey):
+                self._check_keys(ckey)
+                cols = ckey
+            else:
+                raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
+        else:
+            cols = self._data.keys()
+            rkey = key
+
+        # pass the row-key directly to pd.Series.loc[row-key]
+        new = DictOfSeries()
+        for c in cols:
+            new[c] = self._data[c].loc[rkey]
+        return new
+
+    def _col_slice_to_col_list(self, rslice):
+        """ see here:
+        https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
+        """
+        keys = list(self._data.keys)
+        try:
+            start = keys.index(rslice.start) if rslice.start is not None else None
+            stop = keys.index(rslice.stop) if rslice.stop is not None else None
+        except ValueError:
+            raise KeyError("The slice start label or the slice stop label is not present in the columns.")
+        if not is_integer(rslice) and rslice > 0:
+            raise TypeError("The step parameter of the slice must be positive integer.")
+        return keys[slice(start, stop + 1, rslice.step)]
+
+
diff --git a/dios/lib.py b/dios/lib.py
index a50950f..9febd21 100644
--- a/dios/lib.py
+++ b/dios/lib.py
@@ -1,29 +1,91 @@
-
 import pandas as pd
-import pandas._libs.lib as pdlib
+import warnings
+from dios.options import *
+
+
+def _get_storage_class_values(cls):
+    return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
 
-class IdxType:
+
+class IdxTypes:
     nunmeric = 'numeric'
     datetime = 'datetime'
     mixed = 'mixed'
     other = 'other'
 
-def is_dtIndex_like(i):
-    return isinstance(i, pd.DatetimeIndex)
 
+idxtypes = _get_storage_class_values(IdxTypes)
+
+
+def check_mixed_indextype_option(idxtype):
+    if dios_options[Options.mixed_indextyes]:
+        warnings.warn(f"Using dios_option[{Options.mixed_indextyes}]=True is highly experimental, "
+                      f"please do not report any bugs!", DiosOptionsWarning)
+        return
+
+
+def check_allowed_indextypes(idxtype):
+    if idxtype not in [IdxTypes.nunmeric, IdxTypes.datetime]:
+        raise ValueError("The index of the given object is not of supported type")
+
+
+def get_indextype(obj):
+    if _is_dtIndex_like(obj):
+        return IdxTypes.datetime
+
+    if _is_numIndex_like(obj):
+        return IdxTypes.nunmeric
+
+    if _is_pdIndex_like(obj):
+        return IdxTypes.other
+
+    for itype in idxtypes:
+        if obj == itype:
+            return itype
+
+    raise ValueError(f"{type(obj)} is not a indextype nor any known subtype of pd.Index")
+
+
+def _is_dtIndex_like(i):
+    if isinstance(i, pd.DatetimeIndex):
+        return True
+    try:
+        if i == pd.DatetimeIndex:
+            return True
+    except TypeError:
+        return False
+
+
+def _is_numIndex_like(i):
+    tup = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
+    if isinstance(i, tup):
+        return True
+    # was a pd.xxxIndex was given
+    for it in tup:
+        try:
+            if it == i:
+                return True
+        except TypeError:
+            pass
+    return False
 
-def is_numIndex_like(i):
-    return isinstance(i, (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index))
 
-dios_options = dict(
-    disp_max_rows=10,
-    disp_max_vars=4,
+def _is_pdIndex_like(i):
+    """See here:
+    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.html#pandas.Index
+    """
+    if isinstance(i, pd.Index):
+        return True
+    tup = (pd.RangeIndex, pd.CategoricalIndex, pd.MultiIndex, pd.IntervalIndex,
+           pd.DatetimeIndex, pd.TimedeltaIndex,
+           pd.PeriodIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
+    # was a pd.xxxIndex was given
+    for it in tup:
+        try:
+            if it == i:
+                return True
+        except TypeError:
+            pass
+    return False
 
-    # 0: accept all
-    # 1: accept if at least one keys is is in both DioS
-    # 2: accept if all keys of the src-DioS in the dest-DioS
-    # 3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
-    #   otherwise its the same than creating a new dios)
-    dios_to_dios_method=3
-)
 
diff --git a/dios/options.py b/dios/options.py
new file mode 100644
index 0000000..8104558
--- /dev/null
+++ b/dios/options.py
@@ -0,0 +1,41 @@
+from dios.lib import IdxTypes
+
+
+class DiosOptionsWarning(UserWarning):
+    pass
+
+
+class Options:
+    """storage class for dios options dict keys"""
+
+    """Set the number of rows and variables to display in a call that use
+    ``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do."""
+    disp_max_rows = "disp_max_rows "
+    disp_max_vars = "disp_max_vars"
+
+    """
+    0: accept all
+    1: accept if at least one keys is is in both DioS
+    2: accept if all keys of the src-DioS in the dest-DioS
+    3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
+      otherwise its the same than creating a new dios)"""
+    dios_to_dios_method = "dios_to_dios_method"
+
+    """
+    If we have different types of indexes in the dios, slicing will almost always fail.
+    It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
+    To set this to True is highly experimental, any arising issues or errors should be
+    handled by the user."""
+    mixed_indextyes = "mixed_indextyes"
+
+    allowed_indextypes = "allowed_indextypes"
+
+
+dios_options = {
+    Options.disp_max_rows : 10,
+    Options.disp_max_vars: 4,
+    Options.dios_to_dios_method: 3,
+    Options.mixed_indextyes: False,
+    Options.allowed_indextypes: [IdxTypes.datetime, IdxTypes.nunmeric]
+}
+
-- 
GitLab