From c863d8a16220375c1a85c7a22b39c913148e2a40 Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Fri, 14 Feb 2020 18:37:38 +0100
Subject: [PATCH] working

---
 dios/__init__.py |   5 ++
 dios/dios.py     | 191 ++++++++++++++++++++---------------------------
 dios/errors.py   |  24 ++++++
 dios/itypes.py   |  73 +++++++++++-------
 dios/lib.py      |  52 +------------
 dios/options.py  |  45 +++++++++--
 6 files changed, 196 insertions(+), 194 deletions(-)
 create mode 100644 dios/errors.py

diff --git a/dios/__init__.py b/dios/__init__.py
index 34cb18b..4b13497 100644
--- a/dios/__init__.py
+++ b/dios/__init__.py
@@ -1,5 +1,10 @@
 
+# low level
+from dios.errors import *
 from dios.lib import *
 from dios.options import *
+
+# high level
+from dios.itypes import *
 from dios.dios import *
 
diff --git a/dios/dios.py b/dios/dios.py
index 2e3701a..b453add 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -1,5 +1,7 @@
 from dios.lib import *
 from dios.options import *
+from dios.itypes import *
+from dios.errors import *
 import pandas as pd
 import numpy as np
 import operator as op
@@ -57,24 +59,21 @@ class DictOfSeries:
 
     """
 
-    def __init__(self, data=None, columns=None, itype=None, downcast_policy='lossless'):
+    def __init__(self, data=None, columns=None, itype=MixedItype, downcast_policy='lossless'):
 
         self._data = OrderedDict()
 
         # We need to keep track of the index-type (itype) of every new Series.
         # If the itypes differ between different series, slicing will almost always fail
         # (eg. a datetime-like slice cannot work on a numeric index and vice versa).
-        #
-        # May data was given, so we firstly set itype to MixedItype, then insert all data,
-        # and check/cast the itype afterwards, otherwise __setitem_new() will set the itype,
-        # which may prevent inserting series with other (higher) itypes.
+        self._itype = None
+
         with reraise("param itype: "):
-            self._itype = get_itype(itype)
+            self.itype = get_itype(itype)
 
-        policies = ['force', 'lossless', 'never']
-        if downcast_policy not in policies:
-            raise ValueError(f"downcast_policy must be one of {policies}")
-        self._downcast_policy = downcast_policy
+        if downcast_policy not in CAST_POLICIES:
+            raise ValueError(f"downcast_policy must be one of {CAST_POLICIES}")
+        self._policy = downcast_policy
 
         if data is not None:
             self.__init_insert_data__(data)
@@ -83,76 +82,23 @@ class DictOfSeries:
         if columns is not None:
             self.columns = columns
 
-        # infer the itype by the data
-        inferred_itype = self.__find_least_common_itype()
-        itype = inferred_itype if itype is None else get_itype(itype)
-
-        # We use the itype.setter to make all checks. If the given itype was of a lower type
-        # than the inferred itype, a cast is tried on every series.
-        if itype is not None:
-            self.itype = itype
-
-        # user created a empty dios: data=None(->inferred=None), itype=None
-        else:
-            self._itype = None
-
     def __init_insert_data__(self, data):
         if isinstance(data, DictOfSeries):
             for k in data:
                 self[k] = data[k]
-
-        if is_iterator(data):
-            data = list(data)
-
-        if is_dict_like(data):
-            for k in data:
-                self[k] = data[k]
-            return
-
-        # take care: dict's also list-like
-        if is_nested_list_like(data):
-            for i, d in enumerate(data):
-                self[str(i)] = d
-            return
-
-        if is_list_like(data):
-            self['0'] = data
-            return
-
-    def __find_least_common_itype(self):
-
-        def all_itypes_le(itypes, super_itype):
-            for itype in itypes:
-                if itype_le(itype, super_itype):
-                    continue
-                return False
-            return True
-
-        itypes = []
-        for k in self.columns:
-            itypes.append(get_itype(self._data[k].index))
-
-        if not itypes:
-            return None
-
-        found = None
-
-        # check supertypes
-        super_itypes = [MixedItype, NumericItype]
-        for super_itype in super_itypes:
-            if all_itypes_le(itypes, super_itype):
-                found = super_itype
-                continue
-            break
-        assert found, "At least this should be MixedItype"
-
-        # check base types
-        single_itypes = [DatetimeItype, IntegerItype, FloatItype]
-        for single_itype in single_itypes:
-            if all_itypes_le(itypes, single_itype):
-                found = single_itype
-                break
-        return found
+        else:
+            if is_iterator(data):
+                data = list(data)
+
+            if is_dict_like(data):
+                for k in data:
+                    self[k] = data[k]
+            elif is_nested_list_like(data):
+                for i, d in enumerate(data):
+                    self[str(i)] = d
+            elif is_list_like(data):
+                self['0'] = data
+        return
 
     @property
     def columns(self):
@@ -178,6 +124,7 @@ class DictOfSeries:
 
     @property
     def values(self):
+        # will make all series same length, inset nan's
         return to_object_array(self._data.values()).transpose()
 
     @property
@@ -192,22 +139,20 @@ class DictOfSeries:
     def itype(self, itype_like):
         itype = get_itype(itype_like)
 
-        if not is_itype_subtype(self._itype, itype):
-            # try to cast all series to the new itype
+        if not itype_le(self._itype, itype):
             self.__cast_all(itype)
 
         self._itype = itype
 
         if not itype.unique:
-            throwMixedItypeErrWarn(f"Using a {itype} as dios.itype is experimental. As soon as series with "
-                                   f"different index types are inserted, slicing will almost always fail. "
-                                   f"You are hereby warned!")
+            throw_MixedItype_err_or_warn(f"Using a {itype} as dios.itype is experimental. As soon as series with "
+                                         f"different index types are inserted, slicing will almost always fail. "
+                                         f"You are hereby warned!")
 
     def __cast_all(self, itype):
         for k in self.columns:
-            casted = cast_to_fit_itype(self._data[k].copy(), itype, policy=self._downcast_policy)
-            if casted is None:
-                raise ItypeCastError(f"Cast series indicees to the given itype failed for series in column {k}.")
+            with reraise(f"Column {k}: "):
+                casted = cast_to_itype(self._data[k], itype, policy=self._policy)
             self._data[k] = casted
 
     def _check_keys(self, keys):
@@ -237,7 +182,7 @@ class DictOfSeries:
         if isinstance(key, slice):
             return self._slice(self.columns, key)
 
-        if is_list_like(key):
+        if is_list_like(key) and not is_nested_list_like(key):
             self._check_keys(key)
             return self._getitem_listlike(key)
 
@@ -250,14 +195,14 @@ class DictOfSeries:
     def _getitem_listlike(self, keys):
         new = self.copy_empty()
         for k in keys:
-            new[k] = self._get_item(k)
+            new._data[k] = self._get_item(k)
         return new
 
     def _slice(self, keys, slicer):
         """ Return a slice of self"""
         new = self.copy_empty()
         for k in keys:
-            new[k] = self._get_item(k)[slicer]
+            new._data[k] = self._get_item(k)[slicer]
         return new
 
     def __setitem__(self, key, value):
@@ -310,30 +255,15 @@ class DictOfSeries:
 
     def _setitem_new(self, key, value, bypass_checks=False):
         v = value
+        if isinstance(v, DictOfSeries):
+            v = v.squeeze()
+        elif is_list_like(v) and not is_nested_list_like(v):
+            v = pd.Series(v)
 
-        # if the checks was already done, we skip them here,
-        # also the Errormessage wouldn't fully apply.
-        if not bypass_checks:
-            if isinstance(v, DictOfSeries):
-                v = v.squeeze()
-
-            elif is_list_like(v):
-                v = pd.Series(v)  # upcast
-
-            if not isinstance(v, pd.Series):
-                raise ValueError(f"Only pd.Series and DictOfSeries (of length 1) can be assigned new")
-
-        itype = get_itype(v.index)
-
-        if self._itype is None:
-            # if the user created a empty dios
-            self._itype = itype
-
-        v = cast_to_fit_itype(v, self._itype, policy=self._downcast_policy)
-        if v is None:
-            raise ValueError(f"Itype mismach. Policy `{self._downcast_policy}` forbid to down-cast"
-                             f"itype `{itype}` to itype `{self.itype}`. key: {key}")
+        if not isinstance(v, pd.Series):
+            raise ValueError(f"Only pd.Series can be inserted directly")
 
+        v = cast_to_itype(v, self._itype, policy=self._policy)
         self._data[key] = v.copy(deep=True)
 
     def _setitem(self, key, val, sl=None):
@@ -341,7 +271,8 @@ class DictOfSeries:
 
         # series, dios['a'] = series
         if isinstance(val, pd.Series) and sl is None:
-            self._setitem_new(key, val, bypass_checks=True)
+            val = cast_to_itype(val, self._itype, policy=self._policy)
+            self._data[key] = val.copy(deep=True)
             return
 
         sl = sl or slice(None)
@@ -365,7 +296,8 @@ class DictOfSeries:
         return
 
     def _setitem_dios(self, keys, slicer, other):
-        keys = get_dios_to_dios_keys(keys, other)
+        method = dios_options[Options.dios_to_dios_method]
+        keys = get_dios_to_dios_keys(keys, other, method)
         for k in keys:
             self._setitem(k, other[k], slicer)
 
@@ -471,7 +403,8 @@ class DictOfSeries:
     def __op2__(self, other, op):
         new = self.copy_empty()
         if isinstance(other, DictOfSeries):
-            keys = get_dios_to_dios_keys(self.columns, other)
+            method = dios_options[Options.dios_to_dios_method]
+            keys = get_dios_to_dios_keys(self.columns, other, method)
             for k in keys:
                 new[k] = op(self[k], other[k])
         else:
@@ -609,6 +542,40 @@ class DictOfSeries:
             return None
         return news.squeeze()
 
+    # def __find_least_common_itype(self):
+    #     def all_itypes_le(itypes, super_itype):
+    #         for itype in itypes:
+    #             if itype_le(itype, super_itype):
+    #                 continue
+    #             return False
+    #         return True
+    #
+    #     itypes = []
+    #     for k in self.columns:
+    #         itypes.append(get_itype(self._data[k].index))
+    #
+    #     if not itypes:
+    #         return None
+    #
+    #     found = None
+    #
+    #     # check supertypes
+    #     super_itypes = [MixedItype, NumericItype]
+    #     for super_itype in super_itypes:
+    #         if all_itypes_le(itypes, super_itype):
+    #             found = super_itype
+    #             continue
+    #         break
+    #     assert found, "At least this should be MixedItype"
+    #
+    #     # check base types
+    #     single_itypes = [DatetimeItype, IntegerItype, FloatItype]
+    #     for single_itype in single_itypes:
+    #         if all_itypes_le(itypes, single_itype):
+    #             found = single_itype
+    #             break
+    #     return found
+    #
 
 class _Indexer:
     def __init__(self, _dios):
diff --git a/dios/errors.py b/dios/errors.py
new file mode 100644
index 0000000..9df116f
--- /dev/null
+++ b/dios/errors.py
@@ -0,0 +1,24 @@
+import warnings
+# do not import dios-stuff here
+
+
+class ItypeWarning(RuntimeWarning):
+    pass
+
+
+class ItypeCastWarning(ItypeWarning):
+    pass
+
+
+class ItypeCastError(RuntimeError):
+    pass
+
+
+class OptionsWarning(UserWarning):
+    pass
+
+
+class OptionsError(RuntimeError):
+    pass
+
+
diff --git a/dios/itypes.py b/dios/itypes.py
index 5ce9710..e74d19a 100644
--- a/dios/itypes.py
+++ b/dios/itypes.py
@@ -1,16 +1,16 @@
 import pandas as pd
+from dios.options import *
+from dios.lib import *
+from dios.errors import *
 
 
-class ItypeWarning(RuntimeWarning):
-    pass
+class CastPolicy:
+    force = 'force'
+    lossless = 'lossless'
+    never = 'never'
 
 
-class ItypeCastWarning(ItypeWarning):
-    pass
-
-
-class ItypeCastError(RuntimeError):
-    pass
+CAST_POLICIES = get_storage_class_values(CastPolicy)
 
 
 class __Itype:
@@ -133,7 +133,7 @@ def itype_le(a, b):
     return is_itype_like(a, b)
 
 
-def cast_to_fit_itype(series, itype, policy='force'):
+def cast_to_itype(series, itype, policy='lossless', err='raise', inplace=False):
     """ Cast a series (more explicit the type of the index) to fit the itype of a dios.
 
     Return the casted series if successful, None otherwise.
@@ -142,6 +142,13 @@ def cast_to_fit_itype(series, itype, policy='force'):
         This is very basic number-casting, so in most cases, information from
         the old index will be lost after the cast.
     """
+
+    if policy not in CAST_POLICIES:
+        raise ValueError(f"policy={policy}")
+    if err not in ['raise', 'ignore']:
+        raise ValueError(f"err={err}")
+    if not inplace:
+        series = series.copy()
     series.itype = get_itype(series.index)
 
     # up-cast issn't necessary because a dios with a higher
@@ -154,37 +161,53 @@ def cast_to_fit_itype(series, itype, policy='force'):
     if itype_le(series.itype, itype):  # a <= b
         return series
 
-    if policy in ['forbid', 'no-downcast', 'no-cast', 'never']:
-        return None
+    e = f"A series index of type `{type(series.index)}` cannot be casted to Itype {itype.name}"
 
-    elif policy == 'force':
-        # any (dt/float/mixed)      -> int,      always OK
-        # any (dt/float/mixed)      -> num(int), always OK
-        # any (dt/int/mixed)        -> float,    always OK
-        # any (int/float/mixed)     -> dt,       always FAIL
+    # cast any -> dt always fail.
+    if is_itype(itype, DatetimeItype):
+        pass
+    else:
+        e += f", as forbidden by the cast-policy `{policy}`."
+
+    if policy == CAST_POLICIES[CastPolicy.never]:
+        pass
+
+    elif policy == CAST_POLICIES[CastPolicy.force]:
+        # cast any (dt/float/mixed) -> int
+        # cast any (dt/float/mixed) -> num
         if is_itype(itype, IntegerItype) or is_itype(itype, NumericItype):  # a == b or a == c
             series.index = pd.RangeIndex(len(series))
             return series
+        # cast any (dt/int/mixed) -> float
         if is_itype(itype, FloatItype):  # a == b
             series.index = pd.Float64Index(range(len(series)))
             return series
-        if is_itype(itype, DatetimeItype):  # a == b
-            return None
-        return None
 
-    elif policy == 'lossless':
-        # int   -> float, always OK
-        # float -> int,   maybe if unique
-        # mixed -> any,   always FAIL
-        # dt    -> any,   always FAIL
+    elif policy == CAST_POLICIES[CastPolicy.lossless]:
+        # cast int   -> float
         if is_itype(itype, IntegerItype) and is_itype(series.itype, FloatItype):  # a == b and c == d
             series.index = series.index.astype(float)
             return series
+        # cast float -> int, maybe if unique
         if is_itype(itype, FloatItype) and is_itype(series.itype, IntegerItype):  # a == b and c == d
             series.index = series.index.astype(int)
             if series.index.is_unique:
                 return series
+            e = f"The cast with policy {policy} from series index type `{type(series.index)}` to " \
+                f"itype {itype.name} resulted in a non-unique index."
+        # cast mixed -> int/float always fail
+
+    if err == 'raise':
+        raise ItypeCastError(e)
+    else:
         return None
 
-    raise ValueError(f"Unknown policy `{policy}`.")
 
+def throw_MixedItype_err_or_warn(msg):
+    if dios_options[Options.mixed_itype_policy] in ['ignore', 'silent']:
+        pass
+    elif dios_options[Options.mixed_itype_policy] in ['error', 'err']:
+        raise ItypeCastError(msg)
+    else:
+        warnings.warn(msg, ItypeWarning)
+    return
diff --git a/dios/lib.py b/dios/lib.py
index 27df4f4..e52ffc3 100644
--- a/dios/lib.py
+++ b/dios/lib.py
@@ -1,9 +1,7 @@
-from dios.itypes import *
-from dios.options import *
-
 import pandas as pd
-import warnings
+import numpy as np
 import contextlib
+# do not import dios-stuff here
 
 
 @contextlib.contextmanager
@@ -14,51 +12,7 @@ def reraise(prefix="", postfix=""):
         raise type(e)(prefix + str(e) + postfix) from e
 
 
-def _get_storage_class_values(cls):
+def get_storage_class_values(cls):
     return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
 
 
-def throwMixedItypeErrWarn(msg):
-    if dios_options[Options.mixed_itype_policy] in ['ignore', 'silent']:
-        pass
-    elif dios_options[Options.mixed_itype_policy] in ['error', 'err']:
-        raise ItypeCastError(msg)
-    else:
-        warnings.warn(msg, ItypeWarning)
-    return
-
-
-# todo: make method an kwarg and remove dios_options access
-def get_dios_to_dios_keys(keys, other):
-    # we can assume that all keys are exist in self._data
-    method = dios_options[Options.dios_to_dios_method]
-    err_append = "consider changing dios.option['dios_to_dios_method']"
-
-    # assign where possible, otherwise ignore
-    if method == 0:
-        keys = [k for k in keys if k in other.columns]
-
-    # at least one key must be in self
-    elif method == 1:
-        keys = [k for k in keys if k in other.columns]
-        if not keys:
-            raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
-
-    # all keys must be in self, but more keys could exist in other,
-    # eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
-    # eg. ``dios[['a','b']] = dios['a']`` will fail
-    elif method == 2:
-        fail = [k for k in keys if k not in other.columns]
-        if fail:
-            raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
-
-    # keys in both dios's must be equal
-    elif method == 3:
-        fail = set(keys).symmetric_difference(set(other.columns))
-        if fail:
-            raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
-
-    else:
-        raise OptionsError(f"{method} is an invalid value for dios.option[dios_to_dios]")
-
-    return keys
diff --git a/dios/options.py b/dios/options.py
index 1fc8eb4..9350c8e 100644
--- a/dios/options.py
+++ b/dios/options.py
@@ -1,10 +1,4 @@
-
-class OptionsWarning(UserWarning):
-    pass
-
-
-class OptionsError(RuntimeError):
-    pass
+# do not import dios-stuff here
 
 
 class Options:
@@ -18,13 +12,19 @@ class Options:
     """
     0: accept all
     1: accept if at least one keys is is in both DioS
-    2: accept if all keys of the src-DioS in the dest-DioS
     3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
       otherwise its the same than creating a new dios)"""
     dios_to_dios_method = "dios_to_dios_method"
 
     mixed_itype_policy = "mixed_itype_policy"
 
+
+class OptionsDiosToDios:
+    all_must_match = 'all'
+    at_least_one = 'one'
+    any_matching = 'any'
+
+
 # set default values
 dios_options = {
     Options.disp_max_rows: 10,
@@ -32,3 +32,32 @@ dios_options = {
     Options.dios_to_dios_method: 3,
     Options.mixed_itype_policy: 'warn',
 }
+
+
+def get_dios_to_dios_keys(keys, other, method):
+
+    err_append = "consider changing dios.option['dios_to_dios_method']"
+
+    if method == OptionsDiosToDios.any_matching:
+        keys = [k for k in keys if k in other.columns]
+
+    elif method == OptionsDiosToDios.at_least_one:
+        keys = [k for k in keys if k in other.columns]
+        if not keys:
+            raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
+
+    # elif method == 2:
+    #     fail = [k for k in keys if k not in other.columns]
+    #     if fail:
+    #         raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
+
+    # keys in both dios's must be equal
+    elif OptionsDiosToDios.all_must_match:
+        fail = set(keys).symmetric_difference(set(other.columns))
+        if fail:
+            raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
+
+    else:
+        raise ValueError(method)
+
+    return keys
-- 
GitLab