Skip to content
Snippets Groups Projects
Commit 28c06f47 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

options, lib, keep track of index type

parent 9e154aa1
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
......@@ -14,69 +14,6 @@ from pandas.core.dtypes.common import (
from pandas.core.indexing import need_slice
def item_from_zerodim(key):
# if isinstance(key, DictOfSeries) and len(key) == 1:
# todo what if squeeze return a 1-value-series? squeeze again?
# return key.squeeze()
return pdlib.item_from_zerodim(key)
class _LocIndexer:
def __init__(self, _dios):
self._dios = _dios
# short handles
self._data = _dios._data
self._check_keys = _dios._check_keys
def __getitem__(self, key):
# if we have a tuple, we have rows and columns
# if not we have only rows and work on all columns
if isinstance(key, tuple):
rkey, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
# prepare ckey
if is_iterator(ckey):
ckey = list(ckey)
# determine columns
if isinstance(ckey, str):
self._check_keys([ckey])
cols = [ckey]
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey):
self._check_keys(ckey)
cols = ckey
else:
raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
else:
cols = self._data.keys()
rkey = key
# pass the row-key directly to pd.Series.loc[row-key]
new = DictOfSeries()
for c in cols:
new[c] = self._data[c].loc[rkey]
return new
def _col_slice_to_col_list(self, rslice):
""" see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys = list(self._data.keys)
try:
start = keys.index(rslice.start) if rslice.start is not None else None
stop = keys.index(rslice.stop) if rslice.stop is not None else None
except ValueError:
raise KeyError("The slice start label or the slice stop label is not present in the columns.")
if not is_integer(rslice) and rslice > 0:
raise TypeError("The step parameter of the slice must be positive integer.")
return keys[slice(start, stop + 1, rslice.step)]
class DictOfSeries:
"""
DictionaryOfSeries is a collection of pd.Series's which aim to be as close as possible similar to
......@@ -112,10 +49,16 @@ class DictOfSeries:
def __init__(self, indextype=None, **kwargs):
self._data = OrderedDict()
# We need to keep track if the index type of every Series is the
# same, because if we have different types, it would make slicing
# impossible.
self._indextype = None
# We need to keep track of the type of the index of every new Series.
# If the types differ slicing will almost always fail, because a datetime-like
# slice cannont work on a numeric index and vice versa..
if indextype is not None:
indextype = get_indextype(indextype)
check_mixed_indextype_option(indextype)
check_allowed_indextypes(indextype)
self._indextype = indextype
# fill initial given values in the dios
for kw in kwargs:
self[kw] = kwargs[kw]
......@@ -146,17 +89,15 @@ class DictOfSeries:
return self._indextype
def _set_indextype(self, idx):
itype = 'other'
if is_dtIndex_like(idx):
itype = 'datetime'
elif is_numIndex_like(idx):
itype = 'numeric'
""" Set indextype of dios.
Note: If ``self._indextype`` and ``idx`` are of the same type,
``self._indextype`` stays unchanged.
"""
if self._indextype is None:
self._indextype = itype
return
if self._indextype == itype:
return
self._indextype = 'mixed'
self._indextype = get_indextype(idx)
elif self._indextype != get_indextype(idx):
self._indextype = IdxTypes.mixed
def _check_keys(self, keys):
missing = [k for k in keys if k not in self.columns]
......@@ -535,3 +476,60 @@ class DictOfSeries:
return None
return news.squeeze()
class _LocIndexer:
def __init__(self, _dios):
self._dios = _dios
# short handles
self._data = _dios._data
self._check_keys = _dios._check_keys
def __getitem__(self, key):
# if we have a tuple, we have rows and columns
# if not we have only rows and work on all columns
if isinstance(key, tuple):
rkey, ckey, *fail = key
if fail:
raise KeyError("To many indexers")
# prepare ckey
if is_iterator(ckey):
ckey = list(ckey)
# determine columns
if isinstance(ckey, str):
self._check_keys([ckey])
cols = [ckey]
elif isinstance(ckey, slice):
cols = self._col_slice_to_col_list(ckey)
elif is_list_like(ckey):
self._check_keys(ckey)
cols = ckey
else:
raise KeyError(f"Type {type(ckey)} is not supported to select columns.")
else:
cols = self._data.keys()
rkey = key
# pass the row-key directly to pd.Series.loc[row-key]
new = DictOfSeries()
for c in cols:
new[c] = self._data[c].loc[rkey]
return new
def _col_slice_to_col_list(self, rslice):
""" see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys = list(self._data.keys)
try:
start = keys.index(rslice.start) if rslice.start is not None else None
stop = keys.index(rslice.stop) if rslice.stop is not None else None
except ValueError:
raise KeyError("The slice start label or the slice stop label is not present in the columns.")
if not is_integer(rslice) and rslice > 0:
raise TypeError("The step parameter of the slice must be positive integer.")
return keys[slice(start, stop + 1, rslice.step)]
import pandas as pd
import pandas._libs.lib as pdlib
import warnings
from dios.options import *
def _get_storage_class_values(cls):
return [getattr(cls, c) for c in cls.__dict__ if not c.startswith("_")]
class IdxType:
class IdxTypes:
nunmeric = 'numeric'
datetime = 'datetime'
mixed = 'mixed'
other = 'other'
def is_dtIndex_like(i):
return isinstance(i, pd.DatetimeIndex)
idxtypes = _get_storage_class_values(IdxTypes)
def check_mixed_indextype_option(idxtype):
if dios_options[Options.mixed_indextyes]:
warnings.warn(f"Using dios_option[{Options.mixed_indextyes}]=True is highly experimental, "
f"please do not report any bugs!", DiosOptionsWarning)
return
def check_allowed_indextypes(idxtype):
if idxtype not in [IdxTypes.nunmeric, IdxTypes.datetime]:
raise ValueError("The index of the given object is not of supported type")
def get_indextype(obj):
if _is_dtIndex_like(obj):
return IdxTypes.datetime
if _is_numIndex_like(obj):
return IdxTypes.nunmeric
if _is_pdIndex_like(obj):
return IdxTypes.other
for itype in idxtypes:
if obj == itype:
return itype
raise ValueError(f"{type(obj)} is not a indextype nor any known subtype of pd.Index")
def _is_dtIndex_like(i):
if isinstance(i, pd.DatetimeIndex):
return True
try:
if i == pd.DatetimeIndex:
return True
except TypeError:
return False
def _is_numIndex_like(i):
tup = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
if isinstance(i, tup):
return True
# was a pd.xxxIndex was given
for it in tup:
try:
if it == i:
return True
except TypeError:
pass
return False
def is_numIndex_like(i):
return isinstance(i, (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index))
dios_options = dict(
disp_max_rows=10,
disp_max_vars=4,
def _is_pdIndex_like(i):
"""See here:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.html#pandas.Index
"""
if isinstance(i, pd.Index):
return True
tup = (pd.RangeIndex, pd.CategoricalIndex, pd.MultiIndex, pd.IntervalIndex,
pd.DatetimeIndex, pd.TimedeltaIndex,
pd.PeriodIndex, pd.Int64Index, pd.UInt64Index, pd.Float64Index)
# was a pd.xxxIndex was given
for it in tup:
try:
if it == i:
return True
except TypeError:
pass
return False
# 0: accept all
# 1: accept if at least one keys is is in both DioS
# 2: accept if all keys of the src-DioS in the dest-DioS
# 3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
# otherwise its the same than creating a new dios)
dios_to_dios_method=3
)
from dios.lib import IdxTypes
class DiosOptionsWarning(UserWarning):
pass
class Options:
"""storage class for dios options dict keys"""
"""Set the number of rows and variables to display in a call that use
``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do."""
disp_max_rows = "disp_max_rows "
disp_max_vars = "disp_max_vars"
"""
0: accept all
1: accept if at least one keys is is in both DioS
2: accept if all keys of the src-DioS in the dest-DioS
3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
otherwise its the same than creating a new dios)"""
dios_to_dios_method = "dios_to_dios_method"
"""
If we have different types of indexes in the dios, slicing will almost always fail.
It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
To set this to True is highly experimental, any arising issues or errors should be
handled by the user."""
mixed_indextyes = "mixed_indextyes"
allowed_indextypes = "allowed_indextypes"
dios_options = {
Options.disp_max_rows : 10,
Options.disp_max_vars: 4,
Options.dios_to_dios_method: 3,
Options.mixed_indextyes: False,
Options.allowed_indextypes: [IdxTypes.datetime, IdxTypes.nunmeric]
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment