Skip to content
Snippets Groups Projects
Commit 680ea4a8 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

itype done

parent 4f62bda2
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
from dios.lib import *
from dios.options import *
import pandas as pd
import numpy as np
import operator as op
import datetime as dt
from collections import OrderedDict
from pandas.core.dtypes.common import (
......@@ -13,7 +11,6 @@ from pandas.core.dtypes.common import (
is_dict_like,
)
from pandas.core.dtypes.common import is_iterator as _is_iterator
from pandas.core.indexing import need_slice
def is_iterator(obj):
......@@ -51,7 +48,7 @@ class DictOfSeries:
Todos:
-----
todo: allow any hashable obj as column identifier
todo: to_discuss!! allow any hashable obj as column identifier
Currently we only allow strings as identifier, to be more df-like we should allow any
hashable object (unlike df we may should exclude stuff like: ``None`` or ``np.nan`` ??)
......@@ -64,20 +61,24 @@ class DictOfSeries:
# We need to keep track of the index-type (itype) of every new Series.
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
#
# May data was given, so we firstly set itype to MixedItype, then insert all data,
# and check/cast the itype afterwards, otherwise __setitem_new() will set the itype,
# which may prevent inserting series with other (higher) itypes.
self._itype = MixedItype
self.__init_insert_data__(data)
# use property.setter to make necessary checks
# we use the columns.setter to make all necessary checks
self.columns = columns
# 1. infer itype
# check with given -> fine
# check with given -> cast -> fine
# check with given -> cast -> err out
# given None:
# is unique -> fine
# not unique -> err out
# infer the itype by the data
inferred_itype = self.__find_least_common_itype()
itype = inferred_itype if itype is None else get_itype(itype)
# We use the itype.setter to make all checks. If the given itype was of a lower type
# than the inferred itype, a cast is tried on every series.
self.itype = itype
def __init_insert_data__(self, data):
if data is None:
......@@ -98,6 +99,38 @@ class DictOfSeries:
if is_list_like(data):
self['0'] = data
def __find_least_common_itype(self):
def all_itypes_le(itypes, super_itype):
for itype in itypes:
if itype_le(itype, super_itype):
continue
return False
return True
itypes = []
for k in self.columns:
itypes.append(get_itype(self._data[k].index))
found = None
# check supertypes
super_itypes = [MixedItype, NumericItype]
for super_itype in super_itypes:
if all_itypes_le(itypes, super_itype):
found = super_itype
continue
break
assert found, "At least this should be MixedItype"
# check base types
single_itypes = [DatetimeItype, IntegerItype, FloatItype]
for single_itype in single_itypes:
if all_itypes_le(itypes, single_itype):
found = single_itypes
break
return found
@property
def columns(self):
return list(self._data.keys())
......@@ -126,9 +159,24 @@ class DictOfSeries:
@itype.setter
def itype(self, itype_like):
if is_itype_subtype(self._itype, itype_like):
self._itype = itype_like
raise NotImplementedError("futur throw `mixed` warning")
itype = get_itype(itype_like)
if not is_itype_subtype(self._itype, itype):
# try to cast all series to the new itype
self.__cast_all(itype)
self._itype = itype
if not itype.unique:
throw(f"Using a {itype} as dios.itype is experimental. As soon as series with different index types "
f"are inserted, slicing will almost always fail. You are hereby warned!", ItypeWarning)
def __cast_all(self, itype):
for k in self.columns:
casted = cast_to_fit_itype(self._data[k].copy(), itype)
if casted is None:
raise ItypeCastError(f"Cast series indicees to the given itype failed for series in column {k}.")
self._data[k] = casted
def _check_keys(self, keys):
missing = [k for k in keys if k not in self.columns]
......@@ -348,43 +396,6 @@ class DictOfSeries:
def __delitem__(self, key):
del self._data[key]
self.__set_mixed_itype_from_all_keys()
def __set_mixed_itype_from_all_keys(self):
""" If the itype of dios is ``mixed`` and the itype of any stored
Series change, we need to check the itype of all other Series, to
validate the dios-wide itype."""
if len(self) == 0:
self._itype = None
return
if len(self) == 1:
self._itype = get_itype(self.squeeze().index)
return
# ``mixed`` isn't allowed in general, so we're done
if not dios_options[Options.allow_mixed_itypes]:
return
# itype wasn't ``mixed``, so we're done
if self._itype != IdxTypes.mixed:
return
# check all types
types = set()
for k in self._data.keys():
idx = self._data[k].index
types.add(get_itype(idx))
# If we have at least two different
# itypes, ``mixed`` still apply.
if len(types) > 1:
return
# index is of a single new type
self._itype = types.pop()
return
def __copy__(self):
return self.copy(deep=True)
......@@ -565,6 +576,8 @@ class _LocIndexer(_Indexer):
# list_like -> check length
for c in cols:
self._data[c].loc[rkey] = value
# todo loc.__setitem__(self, key, value):
raise NotImplementedError
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
......@@ -618,6 +631,10 @@ class _iLocIndexer(_Indexer):
new[c] = self._data[c].iloc[rkey]
return new
def __setitem__(self, key, value):
# todo iloc.__setitem__(self, key, value):
raise NotImplementedError
def _unpack_key(self, key):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
......
import pandas as pd
class ItypeWarning(RuntimeWarning):
pass
class ItypeCastWarning(ItypeWarning):
pass
class ItypeCastError(RuntimeError):
pass
class __Itype:
def __init__(self):
raise RuntimeError("DatetimeItype does not allow instances of itself.")
......@@ -10,21 +22,18 @@ class DatetimeItype(__Itype):
name = 'datetime'
unique = True
subtypes = (pd.DatetimeIndex,)
cast_to = ...
class IntegerItype(__Itype):
name = 'integer'
unique = True
subtypes = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index,)
cast_to = int
subtypes = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, int)
class FloatItype(__Itype):
name = 'float'
subtypes = (pd.Float64Index,)
subtypes = (pd.Float64Index, float)
unique = True
cast_to = float
# class MultiItype(__Itype):
......@@ -76,11 +85,6 @@ def is_itype_like(obj, itype):
return is_itype(obj, itype) or is_itype_subtype(obj, itype)
def get_minimal_itype(obj):
""" alias for get_itype(), see there for more info"""
return get_itype(obj)
def get_itype(obj):
"""
Return the according Itype, by any of any possible user input, like
......@@ -95,7 +99,7 @@ def get_itype(obj):
return obj
# check if it is the actual type, not a subtype
types = [DatetimeItype, IntegerItype, FloatItype, OtherItype, NumericItype, MixedItype]
types = [DatetimeItype, IntegerItype, FloatItype, NumericItype, MixedItype]
for t in types:
if is_itype(obj, t):
return t
......@@ -111,6 +115,18 @@ def get_itype(obj):
raise ValueError(f"{obj} is not a itype, nor any known subtype of a itype, nor a itype string alias")
def itype_eq(a, b):
return is_itype(a, b)
def itype_lt(a, b):
return is_itype_subtype(a, b)
def itype_le(a, b):
return is_itype_like(a, b)
def cast_to_fit_itype(series, itype):
""" Cast a series (more explicit the type of the index) to fit the itype of a dios.
......@@ -147,4 +163,3 @@ def cast_to_fit_itype(series, itype):
return None
return None
import pandas as pd
from dios.itypes import *
from dios.options import *
import pandas as pd
import warnings
......@@ -7,5 +8,41 @@ def _get_storage_class_values(cls):
return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
class CastWarning(RuntimeWarning):
pass
def throw(msg, wtype):
warnings.warn(msg, wtype)
# todo: make method an kwarg and remove dios_options access
def get_dios_to_dios_keys(keys, other):
# we can assume that all keys are exist in self._data
method = dios_options[Options.dios_to_dios_method]
err_append = "consider changing dios.option['dios_to_dios_method']"
# assign where possible, otherwise ignore
if method == 0:
keys = [k for k in keys if k in other.columns]
# at least one key must be in self
elif method == 1:
keys = [k for k in keys if k in other.columns]
if not keys:
raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
# all keys must be in self, but more keys could exist in other,
# eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
# eg. ``dios[['a','b']] = dios['a']`` will fail
elif method == 2:
fail = [k for k in keys if k not in other.columns]
if fail:
raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
# keys in both dios's must be equal
elif method == 3:
fail = set(keys).symmetric_difference(set(other.columns))
if fail:
raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
else:
raise OptionsError(f"{method} is an invalid value for dios.option[dios_to_dios]")
return keys
import warnings
class OptionsWarning(UserWarning):
pass
......@@ -25,71 +23,10 @@ class Options:
otherwise its the same than creating a new dios)"""
dios_to_dios_method = "dios_to_dios_method"
"""
If we have different types of indexes in the dios, slicing will almost always fail.
It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
To set this to True is highly experimental, any arising issues or errors should be
handled by the user."""
allow_mixed_itypes = "allow_mixed_itypes"
allowed_indextypes = "allowed_indextypes"
class __OptionsDict(dict):
""" Simple dict that throw a warning, if a special value is inserted at a special key"""
def __setitem__(self, key, value):
# throw a warning when user set ``mixed_indextyes = True``
if key == Options.allow_mixed_itypes and value:
warnings.warn(f"Using ``dios_option[{Options.allow_mixed_itypes}]=True`` is highly experimental, "
f"please do not report any bugs!", OptionsWarning)
return super().__setitem__(key, value)
# set default values
dios_options = __OptionsDict()
dios_options[Options.disp_max_rows] = 10
dios_options[Options.disp_max_vars] = 4
dios_options[Options.dios_to_dios_method] = 3
dios_options[Options.allow_mixed_itypes] = False
dios_options[Options.allowed_indextypes] = [IdxTypes.datetime, IdxTypes.nunmeric]
def check_allowed_itypes(idxtype):
if idxtype not in dios_options[Options.allowed_indextypes]:
raise RuntimeError(f"The index type `{idxtype}` is not allowed by the "
f"`dios_option[{Options.allowed_indextypes}] = {dios_options[Options.allowed_indextypes]}`")
def get_dios_to_dios_keys(keys, other):
# we can assume that all keys are exist in self._data
method = dios_options[Options.dios_to_dios_method]
err_append = "consider changing dios.option['dios_to_dios_method']"
# assign where possible, otherwise ignore
if method == 0:
keys = [k for k in keys if k in other.columns]
# at least one key must be in self
elif method == 1:
keys = [k for k in keys if k in other.columns]
if not keys:
raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
# all keys must be in self, but more keys could exist in other,
# eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
# eg. ``dios[['a','b']] = dios['a']`` will fail
elif method == 2:
fail = [k for k in keys if k not in other.columns]
if fail:
raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
# keys in both dios's must be equal
elif method == 3:
fail = set(keys).symmetric_difference(set(other.columns))
if fail:
raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
else:
raise OptionsError(f"{method} is an invalid value for dios.option[dios_to_dios]")
return keys
dios_options = {
Options.disp_max_rows: 10,
Options.disp_max_vars: 4,
Options.dios_to_dios_method: 3,
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment