Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
38a9b89f
Commit
38a9b89f
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
hardcore reduce
parent
1bc35760
No related branches found
No related tags found
1 merge request
!2
Develop
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
dios/dios.py
+19
-26
19 additions, 26 deletions
dios/dios.py
dios/locator.py
+53
-168
53 additions, 168 deletions
dios/locator.py
test/__init__.py
+1
-0
1 addition, 0 deletions
test/__init__.py
test/test_dflike__setget__.py
+49
-33
49 additions, 33 deletions
test/test_dflike__setget__.py
with
122 additions
and
227 deletions
dios/dios.py
+
19
−
26
View file @
38a9b89f
...
...
@@ -190,7 +190,7 @@ class DictOfSeries:
else
:
# work on columns
new
=
self
.
copy_empty
()
new
.
_data
=
self
.
_data
.
loc
[
key
]
new
.
_data
=
self
.
_data
[
key
]
return
new
def
_slice
(
self
,
key
):
...
...
@@ -200,10 +200,7 @@ class DictOfSeries:
new
=
self
.
copy_empty
()
for
k
in
self
.
columns
:
# we cannot use loc here, because s.loc[:4]
# is inclusive, whereas s[:4] isn't :(
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
][
key
]
return
new
def
_getitem_bool_dios
(
self
,
key
):
...
...
@@ -225,8 +222,7 @@ class DictOfSeries:
def
_getitem_bool_listlike
(
self
,
key
):
new
=
self
.
copy_empty
()
for
k
in
self
.
columns
:
ser
=
self
.
_data
.
at
[
k
]
new
.
_data
.
at
[
k
]
=
ser
.
loc
[
key
]
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
].
loc
[
key
]
return
new
def
__setitem__
(
self
,
key
,
value
):
...
...
@@ -235,25 +231,20 @@ class DictOfSeries:
if
isinstance
(
key
,
tuple
):
raise
KeyError
(
f
"
{
key
}
. tuples are not allowed
"
)
elif
is_hashable
(
key
)
and
key
not
in
self
.
columns
:
self
.
_insert
(
key
,
value
)
return
data
=
self
.
__getitem__
(
key
)
elif
is_hashable
(
key
):
if
isinstance
(
value
,
pd
.
Series
):
self
.
_insert
(
key
,
value
)
else
:
self
.
_data
.
at
[
key
].
loc
[:]
=
value
if
isinstance
(
data
,
pd
.
Series
):
# key must be a scalar
assert
is_hashable
(
key
)
data
.
loc
[:]
=
value
self
.
_data
.
at
[
key
]
=
data
else
:
data
=
self
.
__getitem__
(
key
)
assert
isinstance
(
data
,
self
.
__class__
),
f
"
getitem returned data of type
{
type
(
data
)
}
"
elif
isinstance
(
data
,
self
.
__class__
):
for
k
in
data
.
columns
:
s
=
data
.
_data
.
at
[
k
]
s
.
loc
[:]
=
value
self
.
_data
.
at
[
k
]
=
s
else
:
raise
AssertionError
(
f
"
getitem returned data of type
{
type
(
data
)
}
"
)
s
[:]
=
value
self
.
_data
.
at
[
k
].
loc
[
s
.
index
]
=
s
@property
def
loc
(
self
):
...
...
@@ -337,7 +328,6 @@ class DictOfSeries:
return
item
in
self
.
columns
.
copy
()
def
__delitem__
(
self
,
key
):
# is 'indexing bug' save see hacking.md
del
self
.
_data
[
key
]
def
__copy__
(
self
):
...
...
@@ -477,14 +467,17 @@ class DictOfSeries:
def
pprint
(
dios
,
max_rows
=
10
,
max_cols
=
2
,
delim
=
'
'
):
sstr
=
[]
cols
=
list
(
dios
.
columns
)
if
dios
.
empty
:
return
"
Empty DictionaryOfSeries
"
sstr
=
[]
cols
=
list
(
dios
.
columns
)
for
c
in
dios
.
columns
:
sstr
.
append
(
dios
[
c
].
to_string
(
max_rows
=
max_rows
).
split
(
'
\n
'
))
if
dios
[
c
].
empty
:
sstr
.
append
([
'
no data
'
])
else
:
sstr
.
append
(
dios
[
c
].
to_string
(
max_rows
=
max_rows
).
split
(
'
\n
'
))
maxlen
=
max
([
len
(
x
)
for
x
in
sstr
])
...
...
This diff is collapsed.
Click to expand it.
dios/locator.py
+
53
−
168
View file @
38a9b89f
...
...
@@ -7,16 +7,8 @@ class _Indexer:
self
.
_dios
=
_dios
self
.
_data
=
_dios
.
_data
def
_unpack_key
(
self
,
key
):
# #############################################################################
class
_LocIndexer
(
_Indexer
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
def
__getitem__
(
self
,
key
):
if
isinstance
(
key
,
tuple
):
if
len
(
key
)
>
2
:
raise
KeyError
(
"
To many indexers
"
)
...
...
@@ -29,96 +21,54 @@ class _LocIndexer(_Indexer):
if
is_dios_like
(
rowkey
)
or
is_dios_like
(
colkey
):
raise
ValueError
(
"
Cannot index with multidimensional key
"
)
return
rowkey
,
colkey
# #############################################################################
class
_LocIndexer
(
_Indexer
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
def
__getitem__
(
self
,
key
):
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
data
=
self
.
_data
.
loc
[
colkey
]
# in any case data is a series now,
# either a column-indexed series of series,
# or a simple single row-indexed series (of values)
if
isinstance
(
data
,
pd
.
Series
):
# .loc[any, scalar] - got a single row indexed series
if
is_hashable
(
colkey
):
new
=
data
.
loc
[
rowkey
]
# .loc[scalar, any]
elif
is_hashable
(
rowkey
):
# we do not override data directly to may get
# a better fitting series dtype
new
=
pd
.
Series
(
index
=
type
(
data
.
index
)([]))
for
k
in
data
.
index
:
s
=
data
.
at
[
k
]
new
.
at
[
k
]
=
s
.
loc
[
rowkey
]
# .iloc[:, any] - simple low-cost optimization
elif
isinstance
(
rowkey
,
slice
)
and
rowkey
==
slice
(
None
):
new
=
self
.
_dios
.
copy_empty
()
new
.
_data
=
data
.
copy
()
# .loc[any, scalar]
if
is_hashable
(
colkey
):
new
=
data
.
loc
[
rowkey
]
# .loc[non-scalar, non-scalar]
# .loc[any, non-scalar]
else
:
for
k
in
data
.
index
:
data
.
at
[
k
]
=
data
.
at
[
k
].
loc
[
rowkey
]
if
is_hashable
(
rowkey
):
new
=
data
else
:
new
=
self
.
_dios
.
copy_empty
()
for
k
in
data
.
index
:
new
.
_data
.
at
[
k
]
=
data
.
at
[
k
].
loc
[
rowkey
]
else
:
raise
AssertionError
(
f
"
getitem returned data of type
{
type
(
data
)
}
"
)
new
.
_data
=
data
return
new
def
__setitem__
(
self
,
key
,
value
):
if
isinstance
(
key
,
tuple
):
if
len
(
key
)
>
2
:
raise
KeyError
(
"
To many indexers
"
)
rowkey
,
colkey
=
key
if
isinstance
(
rowkey
,
tuple
):
raise
KeyError
(
f
"
{
key
}
. tuples are not allowed.
"
)
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
# .loc[any, scalar]
if
is_hashable
(
colkey
):
# .loc[dont-care, new-scalar] = val
# if a unknown colkey was given, we insert it and ignore rowkey
if
is_hashable
(
colkey
)
and
colkey
not
in
self
.
_dios
.
columns
:
if
colkey
not
in
self
.
_dios
.
columns
:
self
.
_dios
.
_insert
(
colkey
,
value
)
return
else
:
rowkey
,
colkey
=
key
,
slice
(
None
)
# get .loc[any,any] - we use key(!) here
data
=
self
.
__getitem__
(
key
)
if
is_dios_like
(
value
)
or
is_nested_list_like
(
value
):
raise
TypeError
(
"
.loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.
"
)
# .loc[scalar, any]
if
is_hashable
(
rowkey
):
# .loc[scalar, scalar]
if
is_hashable
(
colkey
):
s
=
self
.
_data
.
at
[
colkey
]
s
.
at
[
rowkey
]
=
value
self
.
_data
.
at
[
colkey
]
=
s
# .loc[scalar, non-scalar] - column-labeled series
else
:
data
.
loc
[:]
=
value
for
k
in
data
.
index
:
s
=
self
.
_data
.
at
[
k
]
s
.
at
[
rowkey
]
=
data
.
at
[
k
]
self
.
_data
.
at
[
k
]
=
s
# .loc[non-scalar, scalar] - single row-labeled series
elif
is_hashable
(
colkey
):
data
.
loc
[
rowkey
]
=
value
self
.
_data
.
at
[
colkey
]
=
data
# .loc[non-scalar, non-scalar]
elif
isinstance
(
data
,
self
.
_dios
.
__class__
):
for
k
in
data
.
columns
:
s
=
data
.
_data
.
at
[
k
]
s
.
loc
[
rowkey
]
=
value
self
.
_data
.
at
[
k
]
=
s
self
.
_data
.
at
[
colkey
].
loc
[
rowkey
]
=
value
# .loc[any, non-scalar]
else
:
raise
AssertionError
(
f
"
getitem returned data of type
{
type
(
data
)
}
"
)
for
s
in
self
.
_data
.
loc
[
colkey
]:
s
.
loc
[
rowkey
]
=
value
# #############################################################################
...
...
@@ -130,97 +80,36 @@ class _iLocIndexer(_Indexer):
super
().
__init__
(
*
args
,
**
kwargs
)
def
__getitem__
(
self
,
key
):
if
isinstance
(
key
,
tuple
):
if
len
(
key
)
>
2
:
raise
KeyError
(
"
To many indexers
"
)
rowkey
,
colkey
=
key
else
:
rowkey
,
colkey
=
key
,
slice
(
None
)
if
isinstance
(
rowkey
,
tuple
):
raise
KeyError
(
f
"
{
key
}
. tuples are not allowed.
"
)
if
is_dios_like
(
rowkey
)
or
is_dios_like
(
colkey
):
raise
ValueError
(
"
Cannot index with multidimensional key
"
)
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
data
=
self
.
_data
.
iloc
[
colkey
]
# in any case data is a series now,
# either a column-indexed series of series,
# or a simple single row-indexed series (of values)
if
isinstance
(
data
,
pd
.
Series
):
# .iloc[any, int] - got a single row indexed series
if
is_integer
(
colkey
):
new
=
data
.
iloc
[
rowkey
]
# .loc[int, any]
elif
is_integer
(
rowkey
):
# we do not override data directly to may get
# a better fitting series dtype
new
=
pd
.
Series
(
index
=
type
(
data
.
index
)([]))
for
k
in
data
.
index
:
s
=
data
.
at
[
k
]
new
.
at
[
k
]
=
s
.
iloc
[
rowkey
]
# .iloc[:, any] - simple low-cost optimization
elif
isinstance
(
rowkey
,
slice
)
and
rowkey
==
slice
(
None
):
new
=
self
.
_dios
.
copy_empty
()
new
.
_data
=
data
.
copy
()
# .iloc[any, scalar]
if
is_integer
(
colkey
):
new
=
data
.
iloc
[
rowkey
]
# .loc[non-int, non-int]
# .iloc[any, non-scalar]
else
:
for
k
in
data
.
index
:
data
.
at
[
k
]
=
data
.
at
[
k
].
iloc
[
rowkey
]
if
is_integer
(
rowkey
):
new
=
data
else
:
new
=
self
.
_dios
.
copy_empty
()
for
k
in
data
.
index
:
new
.
_data
.
at
[
k
]
=
data
.
at
[
k
].
iloc
[
rowkey
]
else
:
raise
AssertionError
(
f
"
getitem returned data of type
{
type
(
data
)
}
"
)
new
.
_data
=
data
return
new
def
__setitem__
(
self
,
key
,
value
):
if
isinstance
(
key
,
tuple
):
rowkey
,
colkey
=
key
else
:
rowkey
,
colkey
=
key
,
slice
(
None
)
# get .iloc[any,any] - we use key(!) here
data
=
self
.
__getitem__
(
key
)
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
if
is_dios_like
(
value
)
or
is_nested_list_like
(
value
):
raise
TypeError
(
"
.loc[] cannot be used to set multi-dimensional values, use .aloc[] instead.
"
)
# .iloc[scalar, any]
if
is_integer
(
rowkey
):
# .iloc[scalar, scalar]
if
is_integer
(
colkey
):
s
=
self
.
_data
.
iat
[
colkey
]
s
.
iat
[
rowkey
]
=
value
self
.
_data
.
iat
[
colkey
]
=
s
# .iloc[scalar, non-scalar] - column-labeled series
else
:
data
.
iloc
[:]
=
value
for
k
in
data
.
index
:
s
=
self
.
_data
.
at
[
k
]
s
.
iat
[
rowkey
]
=
data
.
at
[
k
]
self
.
_data
.
at
[
k
]
=
s
# .iloc[non-scalar, scalar] - single row-labeled series
elif
is_integer
(
colkey
):
data
.
iloc
[
rowkey
]
=
value
self
.
_data
.
iat
[
colkey
]
=
data
# .iloc[non-scalar, non-scalar]
elif
isinstance
(
data
,
self
.
_dios
.
__class__
):
for
k
in
data
.
columns
:
s
=
data
.
_data
.
at
[
k
]
s
.
iloc
[
rowkey
]
=
value
self
.
_data
.
at
[
k
]
=
s
# .iloc[any, scalar]
if
is_integer
(
colkey
):
self
.
_data
.
iat
[
colkey
].
iloc
[
rowkey
]
=
value
# .iloc[any, non-scalar]
else
:
raise
AssertionError
(
f
"
getitem returned data of type
{
type
(
data
)
}
"
)
for
s
in
self
.
_data
.
iloc
[
colkey
]:
s
.
iloc
[
rowkey
]
=
value
# #############################################################################
...
...
@@ -262,9 +151,7 @@ class _AtIndexer(_Indexer):
self
.
_check_key
(
key
)
if
is_dios_like
(
value
)
or
is_nested_list_like
(
value
):
raise
TypeError
(
"
.at[] cannot be used to set multi-dimensional values, use .aloc[] instead.
"
)
s
=
self
.
_data
.
at
[
key
[
1
]]
s
.
at
[
key
[
0
]]
=
value
self
.
_data
.
at
[
key
[
1
]]
=
s
self
.
_data
.
at
[
key
[
1
]].
at
[
key
[
0
]]
=
value
# #############################################################################
...
...
@@ -289,9 +176,7 @@ class _iAtIndexer(_Indexer):
self
.
_check_key
(
key
)
if
is_dios_like
(
value
)
or
is_nested_list_like
(
value
):
raise
TypeError
(
"
.iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.
"
)
s
=
self
.
_data
.
iat
[
key
[
1
]]
s
.
iat
[
key
[
0
]]
=
value
self
.
_data
.
iat
[
key
[
1
]]
=
s
self
.
_data
.
iat
[
key
[
1
]].
iat
[
key
[
0
]]
=
value
# #############################################################################
...
...
This diff is collapsed.
Click to expand it.
test/__init__.py
+
1
−
0
View file @
38a9b89f
from
.test_setup
import
*
This diff is collapsed.
Click to expand it.
test/test_dflike__setget__.py
+
49
−
33
View file @
38a9b89f
...
...
@@ -3,37 +3,37 @@ from test.test_setup import *
import
pytest
def
_test
(
val
,
exp
):
def
_test
(
res
,
exp
):
if
isinstance
(
exp
,
pd
.
DataFrame
):
assert
isinstance
(
val
,
DictOfSeries
)
assert
isinstance
(
res
,
DictOfSeries
)
if
val
.
empty
:
if
res
.
empty
:
for
c
in
exp
:
assert
exp
[
c
].
dropna
().
empty
return
assert
(
val
.
columns
==
exp
.
columns
).
all
()
assert
(
res
.
columns
==
exp
.
columns
).
all
()
for
c
in
exp
:
l
=
val
[
c
]
l
=
res
[
c
]
r
=
exp
[
c
].
dropna
()
assert
isinstance
(
l
,
pd
.
Series
)
assert
isinstance
(
r
,
pd
.
Series
)
assert
(
l
==
r
).
all
()
else
:
assert
type
(
exp
)
==
type
(
val
)
assert
type
(
exp
)
==
type
(
res
)
if
isinstance
(
exp
,
pd
.
Series
):
assert
(
val
==
exp
.
dropna
()).
all
()
assert
(
res
==
exp
.
dropna
()).
all
()
else
:
assert
val
==
exp
assert
res
==
exp
@pytest.mark.parametrize
(
'
idxer
'
,
INDEXERS
)
def
test_dflike__getitem__
(
df_
,
dios_
,
idxer
):
print
(
idxer
)
exp
=
df_
[
idxer
]
val
=
dios_
[
idxer
]
_test
(
val
,
exp
)
res
=
dios_
[
idxer
]
_test
(
res
,
exp
)
@pytest.mark.parametrize
(
'
locL
'
,
LOC_L
)
...
...
@@ -42,8 +42,8 @@ def test_dflike__get_loc__(df_, dios_, locL, locR):
print
(
locL
)
print
(
locR
)
exp
=
df_
.
loc
[
locL
,
locR
]
val
=
dios_
.
loc
[
locL
,
locR
]
_test
(
val
,
exp
)
res
=
dios_
.
loc
[
locL
,
locR
]
_test
(
res
,
exp
)
@pytest.mark.parametrize
(
'
ilocL
'
,
ILOC_L
)
...
...
@@ -52,29 +52,45 @@ def test_dflike__get_iloc__(df_, dios_, ilocL, ilocR):
print
(
ilocL
)
print
(
ilocR
)
exp
=
df_
.
iloc
[
ilocL
,
ilocR
]
val
=
dios_
.
iloc
[
ilocL
,
ilocR
]
#
_test(
val
, exp)
res
=
dios_
.
iloc
[
ilocL
,
ilocR
]
_test
(
res
,
exp
)
if
isinstance
(
exp
,
pd
.
DataFrame
):
assert
isinstance
(
val
,
DictOfSeries
)
if
val
.
empty
:
for
c
in
exp
:
assert
exp
[
c
].
dropna
().
empty
return
VALS
=
[
99
,
]
assert
(
val
.
columns
==
exp
.
columns
).
all
(
)
for
c
in
exp
:
l
=
val
[
c
]
r
=
exp
[
c
].
dropna
(
)
assert
isinstance
(
l
,
pd
.
Series
)
assert
isinstance
(
r
,
pd
.
Series
)
assert
(
l
==
r
).
all
()
else
:
assert
type
(
exp
)
==
type
(
val
)
@pytest.mark.parametrize
(
'
idxer
'
,
INDEXERS
)
@pytest.mark.parametrize
(
'
val
'
,
VALS
)
def
test_dflike__setitem__
(
df_
,
dios_
,
idxer
,
val
):
print
(
idxer
)
exp
=
df_
res
=
dios_
exp
[
idxer
]
=
val
res
[
idxer
]
=
val
_test
(
res
,
exp
)
if
isinstance
(
exp
,
pd
.
Series
):
assert
(
val
==
exp
.
dropna
()).
all
()
else
:
assert
val
==
exp
@pytest.mark.parametrize
(
'
locL
'
,
LOC_L
)
@pytest.mark.parametrize
(
'
locR
'
,
LOC_R
)
@pytest.mark.parametrize
(
'
val
'
,
VALS
)
def
test_dflike__set_loc__
(
df_
,
dios_
,
locL
,
locR
,
val
):
print
(
locL
)
print
(
locR
)
exp
=
df_
res
=
dios_
exp
.
loc
[
locL
,
locR
]
=
val
res
.
loc
[
locL
,
locR
]
=
val
_test
(
res
,
exp
)
@pytest.mark.parametrize
(
'
ilocL
'
,
ILOC_L
)
@pytest.mark.parametrize
(
'
ilocR
'
,
ILOC_R
)
@pytest.mark.parametrize
(
'
val
'
,
VALS
)
def
test_dflike__set_iloc__
(
df_
,
dios_
,
ilocL
,
ilocR
,
val
):
print
(
ilocL
)
print
(
ilocR
)
exp
=
df_
res
=
dios_
exp
.
iloc
[
ilocL
,
ilocR
]
=
val
res
.
iloc
[
ilocL
,
ilocR
]
=
val
_test
(
res
,
exp
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment