Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
28c06f47
Commit
28c06f47
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
options, lib, keep track of index type
parent
9e154aa1
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!2
Develop
,
!1
complete rework
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
dios/dios.py
+75
-77
75 additions, 77 deletions
dios/dios.py
dios/lib.py
+79
-17
79 additions, 17 deletions
dios/lib.py
dios/options.py
+41
-0
41 additions, 0 deletions
dios/options.py
with
195 additions
and
94 deletions
dios/dios.py
+
75
−
77
View file @
28c06f47
...
...
@@ -14,69 +14,6 @@ from pandas.core.dtypes.common import (
from
pandas.core.indexing
import
need_slice
def
item_from_zerodim
(
key
):
# if isinstance(key, DictOfSeries) and len(key) == 1:
# todo what if squeeze return a 1-value-series? squeeze again?
# return key.squeeze()
return
pdlib
.
item_from_zerodim
(
key
)
class
_LocIndexer
:
def
__init__
(
self
,
_dios
):
self
.
_dios
=
_dios
# short handles
self
.
_data
=
_dios
.
_data
self
.
_check_keys
=
_dios
.
_check_keys
def
__getitem__
(
self
,
key
):
# if we have a tuple, we have rows and columns
# if not we have only rows and work on all columns
if
isinstance
(
key
,
tuple
):
rkey
,
ckey
,
*
fail
=
key
if
fail
:
raise
KeyError
(
"
To many indexers
"
)
# prepare ckey
if
is_iterator
(
ckey
):
ckey
=
list
(
ckey
)
# determine columns
if
isinstance
(
ckey
,
str
):
self
.
_check_keys
([
ckey
])
cols
=
[
ckey
]
elif
isinstance
(
ckey
,
slice
):
cols
=
self
.
_col_slice_to_col_list
(
ckey
)
elif
is_list_like
(
ckey
):
self
.
_check_keys
(
ckey
)
cols
=
ckey
else
:
raise
KeyError
(
f
"
Type
{
type
(
ckey
)
}
is not supported to select columns.
"
)
else
:
cols
=
self
.
_data
.
keys
()
rkey
=
key
# pass the row-key directly to pd.Series.loc[row-key]
new
=
DictOfSeries
()
for
c
in
cols
:
new
[
c
]
=
self
.
_data
[
c
].
loc
[
rkey
]
return
new
def
_col_slice_to_col_list
(
self
,
rslice
):
"""
see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys
=
list
(
self
.
_data
.
keys
)
try
:
start
=
keys
.
index
(
rslice
.
start
)
if
rslice
.
start
is
not
None
else
None
stop
=
keys
.
index
(
rslice
.
stop
)
if
rslice
.
stop
is
not
None
else
None
except
ValueError
:
raise
KeyError
(
"
The slice start label or the slice stop label is not present in the columns.
"
)
if
not
is_integer
(
rslice
)
and
rslice
>
0
:
raise
TypeError
(
"
The step parameter of the slice must be positive integer.
"
)
return
keys
[
slice
(
start
,
stop
+
1
,
rslice
.
step
)]
class
DictOfSeries
:
"""
DictionaryOfSeries is a collection of pd.Series
'
s which aim to be as close as possible similar to
...
...
@@ -112,10 +49,16 @@ class DictOfSeries:
def
__init__
(
self
,
indextype
=
None
,
**
kwargs
):
self
.
_data
=
OrderedDict
()
# We need to keep track if the index type of every Series is the
# same, because if we have different types, it would make slicing
# impossible.
self
.
_indextype
=
None
# We need to keep track of the type of the index of every new Series.
# If the types differ slicing will almost always fail, because a datetime-like
# slice cannont work on a numeric index and vice versa..
if
indextype
is
not
None
:
indextype
=
get_indextype
(
indextype
)
check_mixed_indextype_option
(
indextype
)
check_allowed_indextypes
(
indextype
)
self
.
_indextype
=
indextype
# fill initial given values in the dios
for
kw
in
kwargs
:
self
[
kw
]
=
kwargs
[
kw
]
...
...
@@ -146,17 +89,15 @@ class DictOfSeries:
return
self
.
_indextype
def
_set_indextype
(
self
,
idx
):
itype
=
'
other
'
if
is_dtIndex_like
(
idx
):
itype
=
'
datetime
'
elif
is_numIndex_like
(
idx
):
itype
=
'
numeric
'
"""
Set indextype of dios.
Note: If ``self._indextype`` and ``idx`` are of the same type,
``self._indextype`` stays unchanged.
"""
if
self
.
_indextype
is
None
:
self
.
_indextype
=
itype
return
if
self
.
_indextype
==
itype
:
return
self
.
_indextype
=
'
mixed
'
self
.
_indextype
=
get_indextype
(
idx
)
elif
self
.
_indextype
!=
get_indextype
(
idx
):
self
.
_indextype
=
IdxTypes
.
mixed
def
_check_keys
(
self
,
keys
):
missing
=
[
k
for
k
in
keys
if
k
not
in
self
.
columns
]
...
...
@@ -535,3 +476,60 @@ class DictOfSeries:
return
None
return
news
.
squeeze
()
class
_LocIndexer
:
def
__init__
(
self
,
_dios
):
self
.
_dios
=
_dios
# short handles
self
.
_data
=
_dios
.
_data
self
.
_check_keys
=
_dios
.
_check_keys
def
__getitem__
(
self
,
key
):
# if we have a tuple, we have rows and columns
# if not we have only rows and work on all columns
if
isinstance
(
key
,
tuple
):
rkey
,
ckey
,
*
fail
=
key
if
fail
:
raise
KeyError
(
"
To many indexers
"
)
# prepare ckey
if
is_iterator
(
ckey
):
ckey
=
list
(
ckey
)
# determine columns
if
isinstance
(
ckey
,
str
):
self
.
_check_keys
([
ckey
])
cols
=
[
ckey
]
elif
isinstance
(
ckey
,
slice
):
cols
=
self
.
_col_slice_to_col_list
(
ckey
)
elif
is_list_like
(
ckey
):
self
.
_check_keys
(
ckey
)
cols
=
ckey
else
:
raise
KeyError
(
f
"
Type
{
type
(
ckey
)
}
is not supported to select columns.
"
)
else
:
cols
=
self
.
_data
.
keys
()
rkey
=
key
# pass the row-key directly to pd.Series.loc[row-key]
new
=
DictOfSeries
()
for
c
in
cols
:
new
[
c
]
=
self
.
_data
[
c
].
loc
[
rkey
]
return
new
def
_col_slice_to_col_list
(
self
,
rslice
):
"""
see here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-slicing-with-labels
"""
keys
=
list
(
self
.
_data
.
keys
)
try
:
start
=
keys
.
index
(
rslice
.
start
)
if
rslice
.
start
is
not
None
else
None
stop
=
keys
.
index
(
rslice
.
stop
)
if
rslice
.
stop
is
not
None
else
None
except
ValueError
:
raise
KeyError
(
"
The slice start label or the slice stop label is not present in the columns.
"
)
if
not
is_integer
(
rslice
)
and
rslice
>
0
:
raise
TypeError
(
"
The step parameter of the slice must be positive integer.
"
)
return
keys
[
slice
(
start
,
stop
+
1
,
rslice
.
step
)]
This diff is collapsed.
Click to expand it.
dios/lib.py
+
79
−
17
View file @
28c06f47
import
pandas
as
pd
import
pandas._libs.lib
as
pdlib
import
warnings
from
dios.options
import
*
def
_get_storage_class_values
(
cls
):
return
[
getattr
(
cls
,
c
)
for
c
in
cls
.
__dict__
if
not
c
.
startswith
(
"
_
"
)]
class
IdxType
:
class
IdxTypes
:
nunmeric
=
'
numeric
'
datetime
=
'
datetime
'
mixed
=
'
mixed
'
other
=
'
other
'
def
is_dtIndex_like
(
i
):
return
isinstance
(
i
,
pd
.
DatetimeIndex
)
idxtypes
=
_get_storage_class_values
(
IdxTypes
)
def
check_mixed_indextype_option
(
idxtype
):
if
dios_options
[
Options
.
mixed_indextyes
]:
warnings
.
warn
(
f
"
Using dios_option[
{
Options
.
mixed_indextyes
}
]=True is highly experimental,
"
f
"
please do not report any bugs!
"
,
DiosOptionsWarning
)
return
def
check_allowed_indextypes
(
idxtype
):
if
idxtype
not
in
[
IdxTypes
.
nunmeric
,
IdxTypes
.
datetime
]:
raise
ValueError
(
"
The index of the given object is not of supported type
"
)
def
get_indextype
(
obj
):
if
_is_dtIndex_like
(
obj
):
return
IdxTypes
.
datetime
if
_is_numIndex_like
(
obj
):
return
IdxTypes
.
nunmeric
if
_is_pdIndex_like
(
obj
):
return
IdxTypes
.
other
for
itype
in
idxtypes
:
if
obj
==
itype
:
return
itype
raise
ValueError
(
f
"
{
type
(
obj
)
}
is not a indextype nor any known subtype of pd.Index
"
)
def
_is_dtIndex_like
(
i
):
if
isinstance
(
i
,
pd
.
DatetimeIndex
):
return
True
try
:
if
i
==
pd
.
DatetimeIndex
:
return
True
except
TypeError
:
return
False
def
_is_numIndex_like
(
i
):
tup
=
(
pd
.
RangeIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,
pd
.
Float64Index
)
if
isinstance
(
i
,
tup
):
return
True
# was a pd.xxxIndex was given
for
it
in
tup
:
try
:
if
it
==
i
:
return
True
except
TypeError
:
pass
return
False
def
is_numIndex_like
(
i
):
return
isinstance
(
i
,
(
pd
.
RangeIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,
pd
.
Float64Index
))
dios_options
=
dict
(
disp_max_rows
=
10
,
disp_max_vars
=
4
,
def
_is_pdIndex_like
(
i
):
"""
See here:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.html#pandas.Index
"""
if
isinstance
(
i
,
pd
.
Index
):
return
True
tup
=
(
pd
.
RangeIndex
,
pd
.
CategoricalIndex
,
pd
.
MultiIndex
,
pd
.
IntervalIndex
,
pd
.
DatetimeIndex
,
pd
.
TimedeltaIndex
,
pd
.
PeriodIndex
,
pd
.
Int64Index
,
pd
.
UInt64Index
,
pd
.
Float64Index
)
# was a pd.xxxIndex was given
for
it
in
tup
:
try
:
if
it
==
i
:
return
True
except
TypeError
:
pass
return
False
# 0: accept all
# 1: accept if at least one keys is is in both DioS
# 2: accept if all keys of the src-DioS in the dest-DioS
# 3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
# otherwise its the same than creating a new dios)
dios_to_dios_method
=
3
)
This diff is collapsed.
Click to expand it.
dios/options.py
0 → 100644
+
41
−
0
View file @
28c06f47
from
dios.lib
import
IdxTypes
class
DiosOptionsWarning
(
UserWarning
):
pass
class
Options
:
"""
storage class for dios options dict keys
"""
"""
Set the number of rows and variables to display in a call that use
``__repr__`` or ``__str__`` like e.g. ``print(dios)`` do.
"""
disp_max_rows
=
"
disp_max_rows
"
disp_max_vars
=
"
disp_max_vars
"
"""
0: accept all
1: accept if at least one keys is is in both DioS
2: accept if all keys of the src-DioS in the dest-DioS
3: accept if both dios have the exact same keys (makes only sense for assignments with slicer,
otherwise its the same than creating a new dios)
"""
dios_to_dios_method
=
"
dios_to_dios_method
"
"""
If we have different types of indexes in the dios, slicing will almost always fail.
It is because, eg. a numeric slice cannot work on a pd.DatetimeIndex and vice versa.
To set this to True is highly experimental, any arising issues or errors should be
handled by the user.
"""
mixed_indextyes
=
"
mixed_indextyes
"
allowed_indextypes
=
"
allowed_indextypes
"
dios_options
=
{
Options
.
disp_max_rows
:
10
,
Options
.
disp_max_vars
:
4
,
Options
.
dios_to_dios_method
:
3
,
Options
.
mixed_indextyes
:
False
,
Options
.
allowed_indextypes
:
[
IdxTypes
.
datetime
,
IdxTypes
.
nunmeric
]
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment