Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
680ea4a8
Commit
680ea4a8
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
itype done
parent
4f62bda2
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!2
Develop
,
!1
complete rework
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
dios/dios.py
+69
-52
69 additions, 52 deletions
dios/dios.py
dios/itypes.py
+27
-12
27 additions, 12 deletions
dios/itypes.py
dios/lib.py
+40
-3
40 additions, 3 deletions
dios/lib.py
dios/options.py
+5
-68
5 additions, 68 deletions
dios/options.py
with
141 additions
and
135 deletions
dios/dios.py
+
69
−
52
View file @
680ea4a8
from
dios.lib
import
*
from
dios.options
import
*
import
pandas
as
pd
import
numpy
as
np
import
operator
as
op
import
datetime
as
dt
from
collections
import
OrderedDict
from
pandas.core.dtypes.common
import
(
...
...
@@ -13,7 +11,6 @@ from pandas.core.dtypes.common import (
is_dict_like
,
)
from
pandas.core.dtypes.common
import
is_iterator
as
_is_iterator
from
pandas.core.indexing
import
need_slice
def
is_iterator
(
obj
):
...
...
@@ -51,7 +48,7 @@ class DictOfSeries:
Todos:
-----
todo: allow any hashable obj as column identifier
todo:
to_discuss!!
allow any hashable obj as column identifier
Currently we only allow strings as identifier, to be more df-like we should allow any
hashable object (unlike df we may should exclude stuff like: ``None`` or ``np.nan`` ??)
...
...
@@ -64,20 +61,24 @@ class DictOfSeries:
# We need to keep track of the index-type (itype) of every new Series.
# If the itypes differ between different series, slicing will almost always fail
# (eg. a datetime-like slice cannot work on a numeric index and vice versa).
#
# May data was given, so we firstly set itype to MixedItype, then insert all data,
# and check/cast the itype afterwards, otherwise __setitem_new() will set the itype,
# which may prevent inserting series with other (higher) itypes.
self
.
_itype
=
MixedItype
self
.
__init_insert_data__
(
data
)
# use
property
.setter to make necessary checks
#
we
use
the columns
.setter to make
all
necessary checks
self
.
columns
=
columns
#
1.
infer itype
# check with given -> fine
# check with given -> cast -> fine
# check with given -> cast -> err out
#
given None:
#
is unique -> fine
# not unique -> err out
# infer
the
itype
by the data
inferred_itype
=
self
.
__find_least_common_itype
()
itype
=
inferred_itype
if
itype
is
None
else
get_itype
(
itype
)
#
We use the itype.setter to make all checks. If the given itype was of a lower type
#
than the inferred itype, a cast is tried on every series.
self
.
itype
=
itype
def
__init_insert_data__
(
self
,
data
):
if
data
is
None
:
...
...
@@ -98,6 +99,38 @@ class DictOfSeries:
if
is_list_like
(
data
):
self
[
'
0
'
]
=
data
def
__find_least_common_itype
(
self
):
def
all_itypes_le
(
itypes
,
super_itype
):
for
itype
in
itypes
:
if
itype_le
(
itype
,
super_itype
):
continue
return
False
return
True
itypes
=
[]
for
k
in
self
.
columns
:
itypes
.
append
(
get_itype
(
self
.
_data
[
k
].
index
))
found
=
None
# check supertypes
super_itypes
=
[
MixedItype
,
NumericItype
]
for
super_itype
in
super_itypes
:
if
all_itypes_le
(
itypes
,
super_itype
):
found
=
super_itype
continue
break
assert
found
,
"
At least this should be MixedItype
"
# check base types
single_itypes
=
[
DatetimeItype
,
IntegerItype
,
FloatItype
]
for
single_itype
in
single_itypes
:
if
all_itypes_le
(
itypes
,
single_itype
):
found
=
single_itypes
break
return
found
@property
def
columns
(
self
):
return
list
(
self
.
_data
.
keys
())
...
...
@@ -126,9 +159,24 @@ class DictOfSeries:
@itype.setter
def
itype
(
self
,
itype_like
):
if
is_itype_subtype
(
self
.
_itype
,
itype_like
):
self
.
_itype
=
itype_like
raise
NotImplementedError
(
"
futur throw `mixed` warning
"
)
itype
=
get_itype
(
itype_like
)
if
not
is_itype_subtype
(
self
.
_itype
,
itype
):
# try to cast all series to the new itype
self
.
__cast_all
(
itype
)
self
.
_itype
=
itype
if
not
itype
.
unique
:
throw
(
f
"
Using a
{
itype
}
as dios.itype is experimental. As soon as series with different index types
"
f
"
are inserted, slicing will almost always fail. You are hereby warned!
"
,
ItypeWarning
)
def
__cast_all
(
self
,
itype
):
for
k
in
self
.
columns
:
casted
=
cast_to_fit_itype
(
self
.
_data
[
k
].
copy
(),
itype
)
if
casted
is
None
:
raise
ItypeCastError
(
f
"
Cast series indicees to the given itype failed for series in column
{
k
}
.
"
)
self
.
_data
[
k
]
=
casted
def
_check_keys
(
self
,
keys
):
missing
=
[
k
for
k
in
keys
if
k
not
in
self
.
columns
]
...
...
@@ -348,43 +396,6 @@ class DictOfSeries:
def
__delitem__
(
self
,
key
):
del
self
.
_data
[
key
]
self
.
__set_mixed_itype_from_all_keys
()
def
__set_mixed_itype_from_all_keys
(
self
):
"""
If the itype of dios is ``mixed`` and the itype of any stored
Series change, we need to check the itype of all other Series, to
validate the dios-wide itype.
"""
if
len
(
self
)
==
0
:
self
.
_itype
=
None
return
if
len
(
self
)
==
1
:
self
.
_itype
=
get_itype
(
self
.
squeeze
().
index
)
return
# ``mixed`` isn't allowed in general, so we're done
if
not
dios_options
[
Options
.
allow_mixed_itypes
]:
return
# itype wasn't ``mixed``, so we're done
if
self
.
_itype
!=
IdxTypes
.
mixed
:
return
# check all types
types
=
set
()
for
k
in
self
.
_data
.
keys
():
idx
=
self
.
_data
[
k
].
index
types
.
add
(
get_itype
(
idx
))
# If we have at least two different
# itypes, ``mixed`` still apply.
if
len
(
types
)
>
1
:
return
# index is of a single new type
self
.
_itype
=
types
.
pop
()
return
def
__copy__
(
self
):
return
self
.
copy
(
deep
=
True
)
...
...
@@ -565,6 +576,8 @@ class _LocIndexer(_Indexer):
# list_like -> check length
for
c
in
cols
:
self
.
_data
[
c
].
loc
[
rkey
]
=
value
# todo loc.__setitem__(self, key, value):
raise
NotImplementedError
def
_unpack_key
(
self
,
key
):
# if we have a tuple, we have a rows- and a column-indexer
...
...
@@ -618,6 +631,10 @@ class _iLocIndexer(_Indexer):
new
[
c
]
=
self
.
_data
[
c
].
iloc
[
rkey
]
return
new
def
__setitem__
(
self
,
key
,
value
):
# todo iloc.__setitem__(self, key, value):
raise
NotImplementedError
def
_unpack_key
(
self
,
key
):
# if we have a tuple, we have a rows- and a column-indexer
# if not, we only have a row-indexer and work on all columns
...
...
This diff is collapsed.
Click to expand it.
dios/itypes.py
+
27
−
12
View file @
680ea4a8
import
pandas
as
pd
class
ItypeWarning
(
RuntimeWarning
):
pass
class
ItypeCastWarning
(
ItypeWarning
):
pass
class
ItypeCastError
(
RuntimeError
):
pass
class
__Itype
:
def
__init__
(
self
):
raise
RuntimeError
(
"
DatetimeItype does not allow instances of itself.
"
)
...
...
@@ -10,21 +22,18 @@ class DatetimeItype(__Itype):
name
=
'
datetime
'
unique
=
True
subtypes
=
(
pd
.
DatetimeIndex
,)
cast_to
=
...
class
IntegerItype
(
__Itype
):
name
=
'
integer
'
unique
=
True
subtypes
=
(
pd
.
RangeIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,)
cast_to
=
int
subtypes
=
(
pd
.
RangeIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,
int
)
class
FloatItype
(
__Itype
):
name
=
'
float
'
subtypes
=
(
pd
.
Float64Index
,)
subtypes
=
(
pd
.
Float64Index
,
float
)
unique
=
True
cast_to
=
float
# class MultiItype(__Itype):
...
...
@@ -76,11 +85,6 @@ def is_itype_like(obj, itype):
return
is_itype
(
obj
,
itype
)
or
is_itype_subtype
(
obj
,
itype
)
def
get_minimal_itype
(
obj
):
"""
alias for get_itype(), see there for more info
"""
return
get_itype
(
obj
)
def
get_itype
(
obj
):
"""
Return the according Itype, by any of any possible user input, like
...
...
@@ -95,7 +99,7 @@ def get_itype(obj):
return
obj
# check if it is the actual type, not a subtype
types
=
[
DatetimeItype
,
IntegerItype
,
FloatItype
,
OtherItype
,
NumericItype
,
MixedItype
]
types
=
[
DatetimeItype
,
IntegerItype
,
FloatItype
,
NumericItype
,
MixedItype
]
for
t
in
types
:
if
is_itype
(
obj
,
t
):
return
t
...
...
@@ -111,6 +115,18 @@ def get_itype(obj):
raise
ValueError
(
f
"
{
obj
}
is not a itype, nor any known subtype of a itype, nor a itype string alias
"
)
def
itype_eq
(
a
,
b
):
return
is_itype
(
a
,
b
)
def
itype_lt
(
a
,
b
):
return
is_itype_subtype
(
a
,
b
)
def
itype_le
(
a
,
b
):
return
is_itype_like
(
a
,
b
)
def
cast_to_fit_itype
(
series
,
itype
):
"""
Cast a series (more explicit the type of the index) to fit the itype of a dios.
...
...
@@ -147,4 +163,3 @@ def cast_to_fit_itype(series, itype):
return
None
return
None
This diff is collapsed.
Click to expand it.
dios/lib.py
+
40
−
3
View file @
680ea4a8
import
pandas
as
pd
from
dios.itypes
import
*
from
dios.options
import
*
import
pandas
as
pd
import
warnings
...
...
@@ -7,5 +8,41 @@ def _get_storage_class_values(cls):
return
[
getattr
(
cls
,
c
)
for
c
in
cls
.
__dict__
if
not
c
.
startswith
(
"
_
"
)]
class
CastWarning
(
RuntimeWarning
):
pass
def
throw
(
msg
,
wtype
):
warnings
.
warn
(
msg
,
wtype
)
# todo: make method an kwarg and remove dios_options access
def
get_dios_to_dios_keys
(
keys
,
other
):
# we can assume that all keys are exist in self._data
method
=
dios_options
[
Options
.
dios_to_dios_method
]
err_append
=
"
consider changing dios.option[
'
dios_to_dios_method
'
]
"
# assign where possible, otherwise ignore
if
method
==
0
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
# at least one key must be in self
elif
method
==
1
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
if
not
keys
:
raise
KeyError
(
"
src-DioS and dest-DioS need to share at least one key,
"
+
err_append
)
# all keys must be in self, but more keys could exist in other,
# eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
# eg. ``dios[['a','b']] = dios['a']`` will fail
elif
method
==
2
:
fail
=
[
k
for
k
in
keys
if
k
not
in
other
.
columns
]
if
fail
:
raise
KeyError
(
f
"
{
fail
}
are missing in the destiny-dios,
"
+
err_append
)
# keys in both dios's must be equal
elif
method
==
3
:
fail
=
set
(
keys
).
symmetric_difference
(
set
(
other
.
columns
))
if
fail
:
raise
KeyError
(
f
"
{
fail
}
is not in both of src- and dest-dios,
"
+
err_append
)
else
:
raise
OptionsError
(
f
"
{
method
}
is an invalid value for dios.option[dios_to_dios]
"
)
return
keys
This diff is collapsed.
Click to expand it.
dios/options.py
+
5
−
68
View file @
680ea4a8
import
warnings
class
OptionsWarning
(
UserWarning
):
pass
...
...
@@ -25,71 +23,10 @@ class Options:
otherwise its the same than creating a new dios)
"""
dios_to_dios_method
=
"
dios_to_dios_method
"
"""
If we have different types of indexes in the dios, slicing will almost always fail.
It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
To set this to True is highly experimental, any arising issues or errors should be
handled by the user.
"""
allow_mixed_itypes
=
"
allow_mixed_itypes
"
allowed_indextypes
=
"
allowed_indextypes
"
class
__OptionsDict
(
dict
):
"""
Simple dict that throw a warning, if a special value is inserted at a special key
"""
def
__setitem__
(
self
,
key
,
value
):
# throw a warning when user set ``mixed_indextyes = True``
if
key
==
Options
.
allow_mixed_itypes
and
value
:
warnings
.
warn
(
f
"
Using ``dios_option[
{
Options
.
allow_mixed_itypes
}
]=True`` is highly experimental,
"
f
"
please do not report any bugs!
"
,
OptionsWarning
)
return
super
().
__setitem__
(
key
,
value
)
# set default values
dios_options
=
__OptionsDict
()
dios_options
[
Options
.
disp_max_rows
]
=
10
dios_options
[
Options
.
disp_max_vars
]
=
4
dios_options
[
Options
.
dios_to_dios_method
]
=
3
dios_options
[
Options
.
allow_mixed_itypes
]
=
False
dios_options
[
Options
.
allowed_indextypes
]
=
[
IdxTypes
.
datetime
,
IdxTypes
.
nunmeric
]
def
check_allowed_itypes
(
idxtype
):
if
idxtype
not
in
dios_options
[
Options
.
allowed_indextypes
]:
raise
RuntimeError
(
f
"
The index type `
{
idxtype
}
` is not allowed by the
"
f
"
`dios_option[
{
Options
.
allowed_indextypes
}
] =
{
dios_options
[
Options
.
allowed_indextypes
]
}
`
"
)
def
get_dios_to_dios_keys
(
keys
,
other
):
# we can assume that all keys are exist in self._data
method
=
dios_options
[
Options
.
dios_to_dios_method
]
err_append
=
"
consider changing dios.option[
'
dios_to_dios_method
'
]
"
# assign where possible, otherwise ignore
if
method
==
0
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
# at least one key must be in self
elif
method
==
1
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
if
not
keys
:
raise
KeyError
(
"
src-DioS and dest-DioS need to share at least one key,
"
+
err_append
)
# all keys must be in self, but more keys could exist in other,
# eg. ``dios['a'] = dios[['a','b']]`` will update column-a but not column-b
# eg. ``dios[['a','b']] = dios['a']`` will fail
elif
method
==
2
:
fail
=
[
k
for
k
in
keys
if
k
not
in
other
.
columns
]
if
fail
:
raise
KeyError
(
f
"
{
fail
}
are missing in the destiny-dios,
"
+
err_append
)
# keys in both dios's must be equal
elif
method
==
3
:
fail
=
set
(
keys
).
symmetric_difference
(
set
(
other
.
columns
))
if
fail
:
raise
KeyError
(
f
"
{
fail
}
is not in both of src- and dest-dios,
"
+
err_append
)
else
:
raise
OptionsError
(
f
"
{
method
}
is an invalid value for dios.option[dios_to_dios]
"
)
return
keys
dios_options
=
{
Options
.
disp_max_rows
:
10
,
Options
.
disp_max_vars
:
4
,
Options
.
dios_to_dios_method
:
3
,
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment