Skip to content
Snippets Groups Projects
Commit 4f62bda2 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

added cast itype

parent 6138cd39
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
......@@ -10,6 +10,7 @@ from pandas.core.dtypes.common import (
is_list_like,
is_scalar,
is_integer,
is_dict_like,
)
from pandas.core.dtypes.common import is_iterator as _is_iterator
from pandas.core.indexing import need_slice
......@@ -56,20 +57,46 @@ class DictOfSeries:
"""
def __init__(self, itype=None, **kwargs):
def __init__(self, data=None, itype=None, columns=None):
self._data = OrderedDict()
# We need to keep track of the index-type (itype) of every new Series.
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
if itype is not None:
itype = get_itype(itype)
check_allowed_itypes(itype)
self._itype = itype
self._itype = MixedItype
self.__init_insert_data__(data)
# use property.setter to make necessary checks
self.columns = columns
# 1. infer itype
# check with given -> fine
# check with given -> cast -> fine
# check with given -> cast -> err out
# given None:
# is unique -> fine
# not unique -> err out
def __init_insert_data__(self, data):
if data is None:
return
if isinstance(data, DictOfSeries):
for k in data:
self[k] = data[k]
if is_iterator(data):
data = list(data)
# fill initial given values in the dios
for kw in kwargs:
self[kw] = kwargs[kw]
if is_dict_like(data):
for k in data:
self[k] = data[k]
# take care: dict's also list-like
if is_list_like(data):
self['0'] = data
@property
def columns(self):
......@@ -78,10 +105,10 @@ class DictOfSeries:
@columns.setter
def columns(self, new):
if not isinstance(new, list):
raise NotImplementedError("Only lists supported so far")
raise TypeError("column names must be given as a list")
if len(set(new)) != len(new):
raise ValueError("Names must be unique")
raise ValueError("column names must be unique")
if len(new) != len(self.columns):
raise ValueError(f"Length mismatch: Columns has {len(self.columns)} elements, "
......@@ -97,22 +124,11 @@ class DictOfSeries:
def itype(self):
return self._itype
def _set_itype(self, idx):
""" Set itype of dios.
Note: If ``self._itype`` and ``idx`` are of the same type,
``self._itype`` stays unchanged.
"""
idx = get_itype(idx)
check_allowed_itypes(idx)
if self._itype is None:
self._itype = idx
elif self._itype != idx:
if dios_options[Options.allow_mixed_itypes]:
self._itype = IdxTypes.mixed
else:
raise ValueError(f"Only objects which have a index of type `{self._itype}` can be inserted.")
@itype.setter
def itype(self, itype_like):
if is_itype_subtype(self._itype, itype_like):
self._itype = itype_like
raise NotImplementedError("futur throw `mixed` warning")
def _check_keys(self, keys):
missing = [k for k in keys if k not in self.columns]
......@@ -227,7 +243,16 @@ class DictOfSeries:
if not isinstance(v, pd.Series):
raise ValueError(f"Only pd.Series and DictOfSeries (of length 1) can be assigned new")
self._set_itype(v.index)
if self._itype is None:
# if the user created a empty dios or
# the last emelent was deleted
self._itype = get_itype(v.index)
v = cast_to_fit_itype(v, self._itype)
if v is None:
itype = get_itype(v.index)
raise ValueError(f"Itype mismach. Data of key `{key}`, with (infered) itype `{itype}` "
f"cannot be inserted in a dios with itype `{self.itype}`.")
self._data[key] = v.copy(deep=True)
def _setitem(self, key, val, sl=None):
......@@ -370,8 +395,8 @@ class DictOfSeries:
def copy(self, deep=True):
new = DictOfSeries()
new._itype = self.itype
# We use `_data` here because all checks have already been done.
# So this should be much faster, especially because we use the underlying dict for
# We use `_data` here, because all checks are already done.
# So this should be much faster, especially, because we use the underlying dict for
# getting and setting the values, instead of ``__setitem__`` and ``__getitem__``.
# Note: don't use same approach elsewhere, unless you're very sure what you do.
for k in self._data:
......
......@@ -10,24 +10,21 @@ class DatetimeItype(__Itype):
name = 'datetime'
unique = True
subtypes = (pd.DatetimeIndex,)
cast_to = ...
class IntegerItype(__Itype):
name = 'integer'
unique = True
subtypes = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index,)
cast_to = int
class FloatItype(__Itype):
name = 'float'
subtypes = (pd.Float64Index,)
unique = True
class OtherItype(__Itype):
name = "other"
subtypes = (pd.CategoricalIndex, pd.IntervalIndex, pd.PeriodIndex,)
unique = True
cast_to = float
# class MultiItype(__Itype):
......@@ -38,18 +35,17 @@ class OtherItype(__Itype):
class NumericItype(__Itype):
name = "numeric"
subtypes = (IntegerItype.subtypes + FloatItype.subtypes)
_subitypes = (IntegerItype, FloatItype)
subtypes = (_subitypes + IntegerItype.subtypes + FloatItype.subtypes)
unique = False
class MixedItype (__Itype):
class MixedItype(__Itype):
name = "mixed"
unique = False
subtypes = (DatetimeItype.subtypes +
NumericItype.subtypes +
OtherItype.subtypes +
# pd.MultiIndex, not supported
())
_subitypes = (DatetimeItype, IntegerItype, FloatItype, NumericItype)
_otheritypes = (pd.CategoricalIndex, pd.IntervalIndex, pd.PeriodIndex, pd.TimedeltaIndex)
subtypes = (_subitypes + _otheritypes + DatetimeItype.subtypes + NumericItype.subtypes)
def is_itype(obj, itype):
......@@ -57,6 +53,7 @@ def is_itype(obj, itype):
# user gave a Itype, like ``DatetimeItype``
if issubclass(obj, itype):
return True
# todo: iter through itype as it could be a tuple, if called like ``is_itype(o, (t1,t2))``
# user gave a string, like 'datetime'
if isinstance(obj, str) and obj == itype.name:
return True
......@@ -79,6 +76,11 @@ def is_itype_like(obj, itype):
return is_itype(obj, itype) or is_itype_subtype(obj, itype)
def get_minimal_itype(obj):
""" alias for get_itype(), see there for more info"""
return get_itype(obj)
def get_itype(obj):
"""
Return the according Itype, by any of any possible user input, like
......@@ -93,17 +95,56 @@ def get_itype(obj):
return obj
# check if it is the actual type, not a subtype
types = [DatetimeItype, NumericItype, IntegerItype, FloatItype, OtherItype, MixedItype]
types = [DatetimeItype, IntegerItype, FloatItype, OtherItype, NumericItype, MixedItype]
for t in types:
if is_itype(obj, t):
return t
# If the above failed, we try to infer the itype by its subtypes.
# We just check the unique types, because the non-unique are just
# collections of unique subtypes.
# collections of unique subtypes, and would have be detected by any
# of the upper if-statements
for t in types:
if is_itype_subtype(obj, t) and t.unique:
return t
raise ValueError(f"{obj} is not a itype, nor any known subtype of a itype, nor a itype string alias")
def cast_to_fit_itype(series, itype):
""" Cast a series (more explicit the type of the index) to fit the itype of a dios.
Return the casted series if successful, None otherwise.
Note:
This is very basic number-casting, so in most cases, information from
the old index will be lost after the cast.
"""
series.itype = get_itype(series.index)
# up-cast issn't necessary because a dios with a higher
# itype always can take lower itypes
# dt -> dt -> mixed
# int -> int -> num -> mixed
# float -> float -> num -> mixed
# num -> num -> mixed
# mixed -> mixed
if is_itype_subtype(series.itype, itype):
return series
# any (dt/float/num/mixed) -> int/num OK
if is_itype(itype, IntegerItype) or is_itype(itype, NumericItype):
series.index = pd.RangeIndex(len(series))
return series
# any (dt/int/num/mixed) -> float OK
if is_itype(itype, FloatItype):
series.index = pd.Float64Index(range(len(series)))
return series
# any (int/float/num/mixed) -> dt FAIL
if is_itype(itype, DatetimeItype):
return None
return None
import pandas as pd
from dios.itypes import *
import warnings
def _get_storage_class_values(cls):
return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
class CastWarning(RuntimeWarning):
pass
from dios.itypes import IdxTypes
import warnings
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment