Skip to content
Snippets Groups Projects
Commit c863d8a1 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

working

parent 9b8414df
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
# low level
from dios.errors import *
from dios.lib import *
from dios.options import *
# high level
from dios.itypes import *
from dios.dios import *
from dios.lib import *
from dios.options import *
from dios.itypes import *
from dios.errors import *
import pandas as pd
import numpy as np
import operator as op
......@@ -57,24 +59,21 @@ class DictOfSeries:
"""
def __init__(self, data=None, columns=None, itype=None, downcast_policy='lossless'):
def __init__(self, data=None, columns=None, itype=MixedItype, downcast_policy='lossless'):
self._data = OrderedDict()
# We need to keep track of the index-type (itype) of every new Series.
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
#
# May data was given, so we firstly set itype to MixedItype, then insert all data,
# and check/cast the itype afterwards, otherwise __setitem_new() will set the itype,
# which may prevent inserting series with other (higher) itypes.
self._itype = None
with reraise("param itype: "):
self._itype = get_itype(itype)
self.itype = get_itype(itype)
policies = ['force', 'lossless', 'never']
if downcast_policy not in policies:
raise ValueError(f"downcast_policy must be one of {policies}")
self._downcast_policy = downcast_policy
if downcast_policy not in CAST_POLICIES:
raise ValueError(f"downcast_policy must be one of {CAST_POLICIES}")
self._policy = downcast_policy
if data is not None:
self.__init_insert_data__(data)
......@@ -83,76 +82,23 @@ class DictOfSeries:
if columns is not None:
self.columns = columns
# infer the itype by the data
inferred_itype = self.__find_least_common_itype()
itype = inferred_itype if itype is None else get_itype(itype)
# We use the itype.setter to make all checks. If the given itype was of a lower type
# than the inferred itype, a cast is tried on every series.
if itype is not None:
self.itype = itype
# user created a empty dios: data=None(->inferred=None), itype=None
else:
self._itype = None
def __init_insert_data__(self, data):
if isinstance(data, DictOfSeries):
for k in data:
self[k] = data[k]
if is_iterator(data):
data = list(data)
if is_dict_like(data):
for k in data:
self[k] = data[k]
return
# take care: dict's also list-like
if is_nested_list_like(data):
for i, d in enumerate(data):
self[str(i)] = d
return
if is_list_like(data):
self['0'] = data
return
def __find_least_common_itype(self):
def all_itypes_le(itypes, super_itype):
for itype in itypes:
if itype_le(itype, super_itype):
continue
return False
return True
itypes = []
for k in self.columns:
itypes.append(get_itype(self._data[k].index))
if not itypes:
return None
found = None
# check supertypes
super_itypes = [MixedItype, NumericItype]
for super_itype in super_itypes:
if all_itypes_le(itypes, super_itype):
found = super_itype
continue
break
assert found, "At least this should be MixedItype"
# check base types
single_itypes = [DatetimeItype, IntegerItype, FloatItype]
for single_itype in single_itypes:
if all_itypes_le(itypes, single_itype):
found = single_itype
break
return found
else:
if is_iterator(data):
data = list(data)
if is_dict_like(data):
for k in data:
self[k] = data[k]
elif is_nested_list_like(data):
for i, d in enumerate(data):
self[str(i)] = d
elif is_list_like(data):
self['0'] = data
return
@property
def columns(self):
......@@ -178,6 +124,7 @@ class DictOfSeries:
@property
def values(self):
# will make all series same length, inset nan's
return to_object_array(self._data.values()).transpose()
@property
......@@ -192,22 +139,20 @@ class DictOfSeries:
def itype(self, itype_like):
itype = get_itype(itype_like)
if not is_itype_subtype(self._itype, itype):
# try to cast all series to the new itype
if not itype_le(self._itype, itype):
self.__cast_all(itype)
self._itype = itype
if not itype.unique:
throwMixedItypeErrWarn(f"Using a {itype} as dios.itype is experimental. As soon as series with "
f"different index types are inserted, slicing will almost always fail. "
f"You are hereby warned!")
throw_MixedItype_err_or_warn(f"Using a {itype} as dios.itype is experimental. As soon as series with "
f"different index types are inserted, slicing will almost always fail. "
f"You are hereby warned!")
def __cast_all(self, itype):
for k in self.columns:
casted = cast_to_fit_itype(self._data[k].copy(), itype, policy=self._downcast_policy)
if casted is None:
raise ItypeCastError(f"Cast series indicees to the given itype failed for series in column {k}.")
with reraise(f"Column {k}: "):
casted = cast_to_itype(self._data[k], itype, policy=self._policy)
self._data[k] = casted
def _check_keys(self, keys):
......@@ -237,7 +182,7 @@ class DictOfSeries:
if isinstance(key, slice):
return self._slice(self.columns, key)
if is_list_like(key):
if is_list_like(key) and not is_nested_list_like(key):
self._check_keys(key)
return self._getitem_listlike(key)
......@@ -250,14 +195,14 @@ class DictOfSeries:
def _getitem_listlike(self, keys):
new = self.copy_empty()
for k in keys:
new[k] = self._get_item(k)
new._data[k] = self._get_item(k)
return new
def _slice(self, keys, slicer):
""" Return a slice of self"""
new = self.copy_empty()
for k in keys:
new[k] = self._get_item(k)[slicer]
new._data[k] = self._get_item(k)[slicer]
return new
def __setitem__(self, key, value):
......@@ -310,30 +255,15 @@ class DictOfSeries:
def _setitem_new(self, key, value, bypass_checks=False):
v = value
if isinstance(v, DictOfSeries):
v = v.squeeze()
elif is_list_like(v) and not is_nested_list_like(v):
v = pd.Series(v)
# if the checks was already done, we skip them here,
# also the Errormessage wouldn't fully apply.
if not bypass_checks:
if isinstance(v, DictOfSeries):
v = v.squeeze()
elif is_list_like(v):
v = pd.Series(v) # upcast
if not isinstance(v, pd.Series):
raise ValueError(f"Only pd.Series and DictOfSeries (of length 1) can be assigned new")
itype = get_itype(v.index)
if self._itype is None:
# if the user created a empty dios
self._itype = itype
v = cast_to_fit_itype(v, self._itype, policy=self._downcast_policy)
if v is None:
raise ValueError(f"Itype mismach. Policy `{self._downcast_policy}` forbid to down-cast"
f"itype `{itype}` to itype `{self.itype}`. key: {key}")
if not isinstance(v, pd.Series):
raise ValueError(f"Only pd.Series can be inserted directly")
v = cast_to_itype(v, self._itype, policy=self._policy)
self._data[key] = v.copy(deep=True)
def _setitem(self, key, val, sl=None):
......@@ -341,7 +271,8 @@ class DictOfSeries:
# series, dios['a'] = series
if isinstance(val, pd.Series) and sl is None:
self._setitem_new(key, val, bypass_checks=True)
val = cast_to_itype(val, self._itype, policy=self._policy)
self._data[key] = val.copy(deep=True)
return
sl = sl or slice(None)
......@@ -365,7 +296,8 @@ class DictOfSeries:
return
def _setitem_dios(self, keys, slicer, other):
keys = get_dios_to_dios_keys(keys, other)
method = dios_options[Options.dios_to_dios_method]
keys = get_dios_to_dios_keys(keys, other, method)
for k in keys:
self._setitem(k, other[k], slicer)
......@@ -471,7 +403,8 @@ class DictOfSeries:
def __op2__(self, other, op):
new = self.copy_empty()
if isinstance(other, DictOfSeries):
keys = get_dios_to_dios_keys(self.columns, other)
method = dios_options[Options.dios_to_dios_method]
keys = get_dios_to_dios_keys(self.columns, other, method)
for k in keys:
new[k] = op(self[k], other[k])
else:
......@@ -609,6 +542,40 @@ class DictOfSeries:
return None
return news.squeeze()
# def __find_least_common_itype(self):
# def all_itypes_le(itypes, super_itype):
# for itype in itypes:
# if itype_le(itype, super_itype):
# continue
# return False
# return True
#
# itypes = []
# for k in self.columns:
# itypes.append(get_itype(self._data[k].index))
#
# if not itypes:
# return None
#
# found = None
#
# # check supertypes
# super_itypes = [MixedItype, NumericItype]
# for super_itype in super_itypes:
# if all_itypes_le(itypes, super_itype):
# found = super_itype
# continue
# break
# assert found, "At least this should be MixedItype"
#
# # check base types
# single_itypes = [DatetimeItype, IntegerItype, FloatItype]
# for single_itype in single_itypes:
# if all_itypes_le(itypes, single_itype):
# found = single_itype
# break
# return found
#
class _Indexer:
def __init__(self, _dios):
......
import warnings
# do not import dios-stuff here
class ItypeWarning(RuntimeWarning):
pass
class ItypeCastWarning(ItypeWarning):
pass
class ItypeCastError(RuntimeError):
pass
class OptionsWarning(UserWarning):
pass
class OptionsError(RuntimeError):
pass
import pandas as pd
from dios.options import *
from dios.lib import *
from dios.errors import *
class ItypeWarning(RuntimeWarning):
pass
class CastPolicy:
force = 'force'
lossless = 'lossless'
never = 'never'
class ItypeCastWarning(ItypeWarning):
pass
class ItypeCastError(RuntimeError):
pass
CAST_POLICIES = get_storage_class_values(CastPolicy)
class __Itype:
......@@ -133,7 +133,7 @@ def itype_le(a, b):
return is_itype_like(a, b)
def cast_to_fit_itype(series, itype, policy='force'):
def cast_to_itype(series, itype, policy='lossless', err='raise', inplace=False):
""" Cast a series (more explicit the type of the index) to fit the itype of a dios.
Return the casted series if successful, None otherwise.
......@@ -142,6 +142,13 @@ def cast_to_fit_itype(series, itype, policy='force'):
This is very basic number-casting, so in most cases, information from
the old index will be lost after the cast.
"""
if policy not in CAST_POLICIES:
raise ValueError(f"policy={policy}")
if err not in ['raise', 'ignore']:
raise ValueError(f"err={err}")
if not inplace:
series = series.copy()
series.itype = get_itype(series.index)
# up-cast issn't necessary because a dios with a higher
......@@ -154,37 +161,53 @@ def cast_to_fit_itype(series, itype, policy='force'):
if itype_le(series.itype, itype): # a <= b
return series
if policy in ['forbid', 'no-downcast', 'no-cast', 'never']:
return None
e = f"A series index of type `{type(series.index)}` cannot be casted to Itype {itype.name}"
elif policy == 'force':
# any (dt/float/mixed) -> int, always OK
# any (dt/float/mixed) -> num(int), always OK
# any (dt/int/mixed) -> float, always OK
# any (int/float/mixed) -> dt, always FAIL
# cast any -> dt always fail.
if is_itype(itype, DatetimeItype):
pass
else:
e += f", as forbidden by the cast-policy `{policy}`."
if policy == CAST_POLICIES[CastPolicy.never]:
pass
elif policy == CAST_POLICIES[CastPolicy.force]:
# cast any (dt/float/mixed) -> int
# cast any (dt/float/mixed) -> num
if is_itype(itype, IntegerItype) or is_itype(itype, NumericItype): # a == b or a == c
series.index = pd.RangeIndex(len(series))
return series
# cast any (dt/int/mixed) -> float
if is_itype(itype, FloatItype): # a == b
series.index = pd.Float64Index(range(len(series)))
return series
if is_itype(itype, DatetimeItype): # a == b
return None
return None
elif policy == 'lossless':
# int -> float, always OK
# float -> int, maybe if unique
# mixed -> any, always FAIL
# dt -> any, always FAIL
elif policy == CAST_POLICIES[CastPolicy.lossless]:
# cast int -> float
if is_itype(itype, IntegerItype) and is_itype(series.itype, FloatItype): # a == b and c == d
series.index = series.index.astype(float)
return series
# cast float -> int, maybe if unique
if is_itype(itype, FloatItype) and is_itype(series.itype, IntegerItype): # a == b and c == d
series.index = series.index.astype(int)
if series.index.is_unique:
return series
e = f"The cast with policy {policy} from series index type `{type(series.index)}` to " \
f"itype {itype.name} resulted in a non-unique index."
# cast mixed -> int/float always fail
if err == 'raise':
raise ItypeCastError(e)
else:
return None
raise ValueError(f"Unknown policy `{policy}`.")
def throw_MixedItype_err_or_warn(msg):
if dios_options[Options.mixed_itype_policy] in ['ignore', 'silent']:
pass
elif dios_options[Options.mixed_itype_policy] in ['error', 'err']:
raise ItypeCastError(msg)
else:
warnings.warn(msg, ItypeWarning)
return
from dios.itypes import *
from dios.options import *
import pandas as pd
import warnings
import numpy as np
import contextlib
# do not import dios-stuff here
@contextlib.contextmanager
......@@ -14,51 +12,7 @@ def reraise(prefix="", postfix=""):
raise type(e)(prefix + str(e) + postfix) from e
def _get_storage_class_values(cls):
def get_storage_class_values(cls):
return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
def throwMixedItypeErrWarn(msg):
if dios_options[Options.mixed_itype_policy] in ['ignore', 'silent']:
pass
elif dios_options[Options.mixed_itype_policy] in ['error', 'err']:
raise ItypeCastError(msg)
else:
warnings.warn(msg, ItypeWarning)
return
# todo: make method an kwarg and remove dios_options access
def get_dios_to_dios_keys(keys, other):
# we can assume that all keys are exist in self._data
method = dios_options[Options.dios_to_dios_method]
err_append = "consider changing dios.option['dios_to_dios_method']"
# assign where possible, otherwise ignore
if method == 0:
keys = [k for k in keys if k in other.columns]
# at least one key must be in self
elif method == 1:
keys = [k for k in keys if k in other.columns]
if not keys:
raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
# all keys must be in self, but more keys could exist in other,
# eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
# eg. ``dios[['a','b']] = dios['a']`` will fail
elif method == 2:
fail = [k for k in keys if k not in other.columns]
if fail:
raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
# keys in both dios's must be equal
elif method == 3:
fail = set(keys).symmetric_difference(set(other.columns))
if fail:
raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
else:
raise OptionsError(f"{method} is an invalid value for dios.option[dios_to_dios]")
return keys
class OptionsWarning(UserWarning):
pass
class OptionsError(RuntimeError):
pass
# do not import dios-stuff here
class Options:
......@@ -18,13 +12,19 @@ class Options:
"""
0: accept all
1: accept if at least one keys is is in both DioS
2: accept if all keys of the src-DioS in the dest-DioS
3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
otherwise its the same than creating a new dios)"""
dios_to_dios_method = "dios_to_dios_method"
mixed_itype_policy = "mixed_itype_policy"
class OptionsDiosToDios:
all_must_match = 'all'
at_least_one = 'one'
any_matching = 'any'
# set default values
dios_options = {
Options.disp_max_rows: 10,
......@@ -32,3 +32,32 @@ dios_options = {
Options.dios_to_dios_method: 3,
Options.mixed_itype_policy: 'warn',
}
def get_dios_to_dios_keys(keys, other, method):
err_append = "consider changing dios.option['dios_to_dios_method']"
if method == OptionsDiosToDios.any_matching:
keys = [k for k in keys if k in other.columns]
elif method == OptionsDiosToDios.at_least_one:
keys = [k for k in keys if k in other.columns]
if not keys:
raise KeyError("src-DioS and dest-DioS need to share at least one key, " + err_append)
# elif method == 2:
# fail = [k for k in keys if k not in other.columns]
# if fail:
# raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
# keys in both dios's must be equal
elif OptionsDiosToDios.all_must_match:
fail = set(keys).symmetric_difference(set(other.columns))
if fail:
raise KeyError(f"{fail} is not in both of src- and dest-dios, " + err_append)
else:
raise ValueError(method)
return keys
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment