Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
4f62bda2
Commit
4f62bda2
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
added cast itype
parent
6138cd39
No related branches found
No related tags found
2 merge requests
!2
Develop
,
!1
complete rework
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
dios/dios.py
+54
-29
54 additions, 29 deletions
dios/dios.py
dios/itypes.py
+56
-15
56 additions, 15 deletions
dios/itypes.py
dios/lib.py
+6
-0
6 additions, 0 deletions
dios/lib.py
dios/options.py
+0
-1
0 additions, 1 deletion
dios/options.py
with
116 additions
and
45 deletions
dios/dios.py
+
54
−
29
View file @
4f62bda2
...
...
@@ -10,6 +10,7 @@ from pandas.core.dtypes.common import (
is_list_like
,
is_scalar
,
is_integer
,
is_dict_like
,
)
from
pandas.core.dtypes.common
import
is_iterator
as
_is_iterator
from
pandas.core.indexing
import
need_slice
...
...
@@ -56,20 +57,46 @@ class DictOfSeries:
"""
def
__init__
(
self
,
itype
=
None
,
**
kwargs
):
def
__init__
(
self
,
data
=
None
,
itype
=
None
,
columns
=
None
):
self
.
_data
=
OrderedDict
()
# We need to keep track of the index-type (itype) of every new Series.
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
if
itype
is
not
None
:
itype
=
get_itype
(
itype
)
check_allowed_itypes
(
itype
)
self
.
_itype
=
itype
self
.
_itype
=
MixedItype
self
.
__init_insert_data__
(
data
)
# use property.setter to make necessary checks
self
.
columns
=
columns
# 1. infer itype
# check with given -> fine
# check with given -> cast -> fine
# check with given -> cast -> err out
# given None:
# is unique -> fine
# not unique -> err out
def
__init_insert_data__
(
self
,
data
):
if
data
is
None
:
return
if
isinstance
(
data
,
DictOfSeries
):
for
k
in
data
:
self
[
k
]
=
data
[
k
]
if
is_iterator
(
data
):
data
=
list
(
data
)
# fill initial given values in the dios
for
kw
in
kwargs
:
self
[
kw
]
=
kwargs
[
kw
]
if
is_dict_like
(
data
):
for
k
in
data
:
self
[
k
]
=
data
[
k
]
# take care: dict's also list-like
if
is_list_like
(
data
):
self
[
'
0
'
]
=
data
@property
def
columns
(
self
):
...
...
@@ -78,10 +105,10 @@ class DictOfSeries:
@columns.setter
def
columns
(
self
,
new
):
if
not
isinstance
(
new
,
list
):
raise
NotImplementedError
(
"
Only lists supported so far
"
)
raise
TypeError
(
"
column names must be given as a list
"
)
if
len
(
set
(
new
))
!=
len
(
new
):
raise
ValueError
(
"
N
ames must be unique
"
)
raise
ValueError
(
"
column n
ames must be unique
"
)
if
len
(
new
)
!=
len
(
self
.
columns
):
raise
ValueError
(
f
"
Length mismatch: Columns has
{
len
(
self
.
columns
)
}
elements,
"
...
...
@@ -97,22 +124,11 @@ class DictOfSeries:
def
itype
(
self
):
return
self
.
_itype
def
_set_itype
(
self
,
idx
):
"""
Set itype of dios.
Note: If ``self._itype`` and ``idx`` are of the same type,
``self._itype`` stays unchanged.
"""
idx
=
get_itype
(
idx
)
check_allowed_itypes
(
idx
)
if
self
.
_itype
is
None
:
self
.
_itype
=
idx
elif
self
.
_itype
!=
idx
:
if
dios_options
[
Options
.
allow_mixed_itypes
]:
self
.
_itype
=
IdxTypes
.
mixed
else
:
raise
ValueError
(
f
"
Only objects which have a index of type `
{
self
.
_itype
}
` can be inserted.
"
)
@itype.setter
def
itype
(
self
,
itype_like
):
if
is_itype_subtype
(
self
.
_itype
,
itype_like
):
self
.
_itype
=
itype_like
raise
NotImplementedError
(
"
futur throw `mixed` warning
"
)
def
_check_keys
(
self
,
keys
):
missing
=
[
k
for
k
in
keys
if
k
not
in
self
.
columns
]
...
...
@@ -227,7 +243,16 @@ class DictOfSeries:
if
not
isinstance
(
v
,
pd
.
Series
):
raise
ValueError
(
f
"
Only pd.Series and DictOfSeries (of length 1) can be assigned new
"
)
self
.
_set_itype
(
v
.
index
)
if
self
.
_itype
is
None
:
# if the user created a empty dios or
# the last emelent was deleted
self
.
_itype
=
get_itype
(
v
.
index
)
v
=
cast_to_fit_itype
(
v
,
self
.
_itype
)
if
v
is
None
:
itype
=
get_itype
(
v
.
index
)
raise
ValueError
(
f
"
Itype mismach. Data of key `
{
key
}
`, with (infered) itype `
{
itype
}
`
"
f
"
cannot be inserted in a dios with itype `
{
self
.
itype
}
`.
"
)
self
.
_data
[
key
]
=
v
.
copy
(
deep
=
True
)
def
_setitem
(
self
,
key
,
val
,
sl
=
None
):
...
...
@@ -370,8 +395,8 @@ class DictOfSeries:
def
copy
(
self
,
deep
=
True
):
new
=
DictOfSeries
()
new
.
_itype
=
self
.
itype
# We use `_data` here because all checks
hav
e already
been
done.
# So this should be much faster, especially because we use the underlying dict for
# We use `_data` here
,
because all checks
ar
e already done.
# So this should be much faster, especially
,
because we use the underlying dict for
# getting and setting the values, instead of ``__setitem__`` and ``__getitem__``.
# Note: don't use same approach elsewhere, unless you're very sure what you do.
for
k
in
self
.
_data
:
...
...
This diff is collapsed.
Click to expand it.
dios/itypes.py
+
56
−
15
View file @
4f62bda2
...
...
@@ -10,24 +10,21 @@ class DatetimeItype(__Itype):
name
=
'
datetime
'
unique
=
True
subtypes
=
(
pd
.
DatetimeIndex
,)
cast_to
=
...
class
IntegerItype
(
__Itype
):
name
=
'
integer
'
unique
=
True
subtypes
=
(
pd
.
RangeIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,)
cast_to
=
int
class
FloatItype
(
__Itype
):
name
=
'
float
'
subtypes
=
(
pd
.
Float64Index
,)
unique
=
True
class
OtherItype
(
__Itype
):
name
=
"
other
"
subtypes
=
(
pd
.
CategoricalIndex
,
pd
.
IntervalIndex
,
pd
.
PeriodIndex
,)
unique
=
True
cast_to
=
float
# class MultiItype(__Itype):
...
...
@@ -38,18 +35,17 @@ class OtherItype(__Itype):
class
NumericItype
(
__Itype
):
name
=
"
numeric
"
subtypes
=
(
IntegerItype
.
subtypes
+
FloatItype
.
subtypes
)
_subitypes
=
(
IntegerItype
,
FloatItype
)
subtypes
=
(
_subitypes
+
IntegerItype
.
subtypes
+
FloatItype
.
subtypes
)
unique
=
False
class
MixedItype
(
__Itype
):
class
MixedItype
(
__Itype
):
name
=
"
mixed
"
unique
=
False
subtypes
=
(
DatetimeItype
.
subtypes
+
NumericItype
.
subtypes
+
OtherItype
.
subtypes
+
# pd.MultiIndex, not supported
())
_subitypes
=
(
DatetimeItype
,
IntegerItype
,
FloatItype
,
NumericItype
)
_otheritypes
=
(
pd
.
CategoricalIndex
,
pd
.
IntervalIndex
,
pd
.
PeriodIndex
,
pd
.
TimedeltaIndex
)
subtypes
=
(
_subitypes
+
_otheritypes
+
DatetimeItype
.
subtypes
+
NumericItype
.
subtypes
)
def
is_itype
(
obj
,
itype
):
...
...
@@ -57,6 +53,7 @@ def is_itype(obj, itype):
# user gave a Itype, like ``DatetimeItype``
if
issubclass
(
obj
,
itype
):
return
True
# todo: iter through itype as it could be a tuple, if called like ``is_itype(o, (t1,t2))``
# user gave a string, like 'datetime'
if
isinstance
(
obj
,
str
)
and
obj
==
itype
.
name
:
return
True
...
...
@@ -79,6 +76,11 @@ def is_itype_like(obj, itype):
return
is_itype
(
obj
,
itype
)
or
is_itype_subtype
(
obj
,
itype
)
def
get_minimal_itype
(
obj
):
"""
alias for get_itype(), see there for more info
"""
return
get_itype
(
obj
)
def
get_itype
(
obj
):
"""
Return the according Itype, by any of any possible user input, like
...
...
@@ -93,17 +95,56 @@ def get_itype(obj):
return
obj
# check if it is the actual type, not a subtype
types
=
[
DatetimeItype
,
NumericItype
,
IntegerItype
,
FloatItype
,
OtherItype
,
MixedItype
]
types
=
[
DatetimeItype
,
IntegerItype
,
FloatItype
,
OtherItype
,
NumericItype
,
MixedItype
]
for
t
in
types
:
if
is_itype
(
obj
,
t
):
return
t
# If the above failed, we try to infer the itype by its subtypes.
# We just check the unique types, because the non-unique are just
# collections of unique subtypes.
# collections of unique subtypes, and would have be detected by any
# of the upper if-statements
for
t
in
types
:
if
is_itype_subtype
(
obj
,
t
)
and
t
.
unique
:
return
t
raise
ValueError
(
f
"
{
obj
}
is not a itype, nor any known subtype of a itype, nor a itype string alias
"
)
def
cast_to_fit_itype
(
series
,
itype
):
"""
Cast a series (more explicit the type of the index) to fit the itype of a dios.
Return the casted series if successful, None otherwise.
Note:
This is very basic number-casting, so in most cases, information from
the old index will be lost after the cast.
"""
series
.
itype
=
get_itype
(
series
.
index
)
# up-cast issn't necessary because a dios with a higher
# itype always can take lower itypes
# dt -> dt -> mixed
# int -> int -> num -> mixed
# float -> float -> num -> mixed
# num -> num -> mixed
# mixed -> mixed
if
is_itype_subtype
(
series
.
itype
,
itype
):
return
series
# any (dt/float/num/mixed) -> int/num OK
if
is_itype
(
itype
,
IntegerItype
)
or
is_itype
(
itype
,
NumericItype
):
series
.
index
=
pd
.
RangeIndex
(
len
(
series
))
return
series
# any (dt/int/num/mixed) -> float OK
if
is_itype
(
itype
,
FloatItype
):
series
.
index
=
pd
.
Float64Index
(
range
(
len
(
series
)))
return
series
# any (int/float/num/mixed) -> dt FAIL
if
is_itype
(
itype
,
DatetimeItype
):
return
None
return
None
This diff is collapsed.
Click to expand it.
dios/lib.py
+
6
−
0
View file @
4f62bda2
import
pandas
as
pd
from
dios.itypes
import
*
import
warnings
def
_get_storage_class_values
(
cls
):
return
[
getattr
(
cls
,
c
)
for
c
in
cls
.
__dict__
if
not
c
.
startswith
(
"
_
"
)]
class
CastWarning
(
RuntimeWarning
):
pass
This diff is collapsed.
Click to expand it.
dios/options.py
+
0
−
1
View file @
4f62bda2
from
dios.itypes
import
IdxTypes
import
warnings
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment