Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
8912d28a
Commit
8912d28a
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
aloc finished
parent
d9b288ab
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
dios/dios.py
+53
-33
53 additions, 33 deletions
dios/dios.py
dios/indexer.py
+106
-81
106 additions, 81 deletions
dios/indexer.py
with
159 additions
and
114 deletions
dios/dios.py
+
53
−
33
View file @
8912d28a
...
...
@@ -33,34 +33,7 @@ Unlike the example says, return lists False, not True
"""
from
pandas.core.dtypes.common
import
is_iterator
as
_is_iterator
def
_is_list_like_not_nested
(
obj
):
return
_is_list_like
(
obj
)
and
not
_is_nested_list_like
(
obj
)
def
_is_dios_like
(
obj
):
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return
isinstance
(
obj
,
DictOfSeries
)
or
isinstance
(
obj
,
pd
.
DataFrame
)
def
_is_bool_series
(
obj
):
return
isinstance
(
obj
,
pd
.
Series
)
and
obj
.
dtype
==
bool
def
__monkey_patch_pandas
():
def
to_dios
(
self
):
return
DictOfSeries
(
data
=
self
)
pd
.
Series
.
to_dios
=
to_dios
pd
.
DataFrame
.
to_dios
=
to_dios
__monkey_patch_pandas
()
from
typing
import
Union
,
Any
class
DictOfSeries
:
...
...
@@ -153,7 +126,10 @@ class DictOfSeries:
if
columns
is
None
or
k
in
self
.
columns
:
self
.
_insert
(
k
,
data
[
k
])
elif
_is_list_like
(
data
):
# also Series !
elif
isinstance
(
data
,
pd
.
Series
):
self
.
_insert
(
data
.
name
or
0
,
data
)
elif
_is_list_like
(
data
):
data
=
data
if
_is_nested_list_like
(
data
)
else
[
data
]
if
self
.
columns
.
empty
:
...
...
@@ -301,17 +277,16 @@ class DictOfSeries:
def
_getitem_bool_dios
(
self
,
key
):
"""
Select items by a boolean dios-like drop un-selected indices.
"""
new
=
self
.
copy_empty
(
columns
=
True
)
if
not
_is_bool_dios_like
(
key
):
raise
ValueError
(
"
Must pass DictOfSeries with boolean values only
"
)
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
.
intersection
(
key
.
columns
):
dat
=
self
.
_data
.
at
[
k
]
val
=
key
[
k
]
if
not
_is_bool_indexer
(
val
):
raise
ValueError
(
"
Must pass DictOfSeries with boolean values only
"
)
# align rows
idx
=
val
[
val
].
index
.
intersection
(
dat
.
index
)
new
.
_data
.
at
[
k
]
=
dat
[
idx
]
return
new
def
_getitem_bool_listlike
(
self
,
key
):
...
...
@@ -874,3 +849,48 @@ def _to_aligned_df(dios, no_value=' '):
df
.
loc
[
nandict
[
c
],
c
]
=
np
.
nan
return
df
def
_is_list_like_not_nested
(
obj
):
return
_is_list_like
(
obj
)
and
not
_is_nested_list_like
(
obj
)
def
_is_dios_like
(
obj
)
->
bool
:
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return
isinstance
(
obj
,
DictOfSeries
)
or
isinstance
(
obj
,
pd
.
DataFrame
)
def
_is_bool_series
(
obj
)
->
bool
:
return
isinstance
(
obj
,
pd
.
Series
)
and
obj
.
dtype
==
bool
def
_is_bool_dios_like
(
obj
)
->
bool
:
if
not
_is_dios_like
(
obj
):
return
False
dtypes
=
obj
.
dtypes
if
(
dtypes
==
bool
).
all
():
return
True
if
(
dtypes
==
'
O
'
).
any
():
return
obj
.
apply
(
_is_bool_indexer
).
all
()
return
False
def
to_dios
(
obj
)
->
DictOfSeries
:
return
DictOfSeries
(
data
=
obj
)
def
__monkey_patch_pandas
():
def
to_dios
(
self
):
return
DictOfSeries
(
data
=
self
)
pd
.
Series
.
to_dios
=
to_dios
pd
.
DataFrame
.
to_dios
=
to_dios
__monkey_patch_pandas
()
This diff is collapsed.
Click to expand it.
dios/indexer.py
+
106
−
81
View file @
8912d28a
...
...
@@ -3,11 +3,13 @@ from .dios import (
_is_dios_like
,
_is_bool_series
,
_is_list_like_not_nested
,
_is_bool_dios_like
,
_is_iterator
)
import
pandas
as
pd
import
pandas.core.common
as
ccom
import
pandas.core.dtypes.common
as
dcom
_is_list_like
=
dcom
.
is_list_like
_is_nested_list_like
=
dcom
.
is_nested_list_like
_is_scalar
=
dcom
.
is_scalar
...
...
@@ -19,7 +21,7 @@ _is_bool_indexer = ccom.is_bool_indexer
class
_Indexer
:
def
__init__
(
self
,
obj
):
def
__init__
(
self
,
obj
:
DictOfSeries
):
self
.
obj
=
obj
self
.
_data
=
obj
.
_data
...
...
@@ -132,7 +134,7 @@ class _LocIndexer(_Indexer):
except
Exception
as
e
:
c
=
data
.
index
[
i
]
if
i
is
not
None
else
'
?
'
raise
type
(
e
)(
f
"
failed for column
{
c
}
:
"
+
str
(
e
)
)
from
e
raise
type
(
e
)(
f
"
failed for column
{
c
}
:
"
+
str
(
e
))
from
e
# #############################################################################
...
...
@@ -259,96 +261,121 @@ class _aLocIndexer(_Indexer):
def
__setitem__
(
self
,
key
,
value
):
rowkeys
,
colkeys
,
_
=
self
.
_unpack_key_aloc
(
key
)
c
=
'
?
'
try
:
# full-alignable: dios/df, align rows and columns of value to ourself
# NOTE: this may shrink columns a third time (1st & 2nd in unpack_key_aloc)
if
_is_dios_like
(
value
):
colkeys
=
value
.
columns
.
intersection
(
colkeys
)
for
i
,
c
in
enumerate
(
colkeys
):
l
=
self
.
_data
.
at
[
c
]
r
=
value
[
c
]
idx
=
l
.
loc
[
rowkeys
[
i
]].
index
.
intersection
(
r
.
index
)
l
[
idx
]
=
r
[
idx
]
# row-alignable: given series, align rows of value to every
# (colkeys selected) series in ourself
elif
isinstance
(
value
,
pd
.
Series
):
r
,
rindex
=
value
,
value
.
index
for
i
,
c
in
enumerate
(
colkeys
):
l
=
self
.
_data
.
at
[
c
]
idx
=
l
.
loc
[
rowkeys
[
i
]].
index
.
intersection
(
rindex
)
l
[
idx
]
=
r
[
idx
]
elif
_is_nested_list_like
(
value
):
# todo: iterate + enumerate, check length, set
raise
NotImplementedError
elif
_is_list_like
(
value
):
# todo: iterate columns, check length, set
raise
NotImplementedError
def
iter_self
(
colkeys
,
pos
=
True
):
c
=
'
?
'
try
:
# if no align is possible, fallback to .loc
else
:
for
i
,
c
in
enumerate
(
colkeys
):
self
.
_data
.
at
[
c
].
loc
[
rowkeys
[
i
]]
=
value
dat
=
self
.
_data
.
at
[
c
]
rk
=
rowkeys
[
i
]
if
len
(
dat
.
loc
[
rk
])
==
0
:
continue
yield
dat
,
rk
,
i
if
pos
else
c
except
Exception
as
e
:
raise
type
(
e
)(
f
"
failed for column
{
c
}
:
"
+
str
(
e
))
from
e
except
Exception
as
e
:
raise
type
(
e
)(
f
"
failed for column
{
c
}
:
"
+
str
(
e
))
from
e
# align columns, for rows use series.loc to align
if
_is_dios_like
(
value
):
colkeys
=
value
.
columns
.
intersection
(
colkeys
)
for
dat
,
rk
,
c
in
iter_self
(
colkeys
,
pos
=
False
):
dat
.
loc
[
rk
]
=
value
[
c
]
# align rows by using series.loc
elif
isinstance
(
value
,
pd
.
Series
):
for
dat
,
rk
,
_
in
iter_self
(
colkeys
):
dat
.
loc
[
rk
]
=
value
# no align, no merci
elif
_is_nested_list_like
(
value
):
if
len
(
colkeys
)
!=
len
(
value
):
raise
ValueError
(
f
"
shape mismatch: values array of shape
"
f
"
(..,
{
len
(
value
)
}
) could not
"
f
"
be broadcast to indexing result of
"
f
"
shape (..,
{
len
(
colkeys
)
}
)
"
)
for
dat
,
rk
,
i
in
iter_self
(
colkeys
):
dat
.
loc
[
rk
]
=
value
[
i
]
# no align, no merci
else
:
for
dat
,
rk
,
_
in
iter_self
(
colkeys
):
dat
.
loc
[
rk
]
=
value
def
_unpack_key_aloc
(
self
,
key
):
"""
Return a list of row indexer and a list of existing(!) column labels.
Both list always have the same length and also could be empty together.
"""
# if a single column-key is given, we will
# return a single Series, instead of a dios
Note:
The items of the row indexer list should be passed to pd.Series.loc[]
"""
# if a single column-key is given, the caller may
# want to return a single Series, instead of a dios
lowdim
=
False
# multi-dim (var I) depend on the set method
if
_is_dios_like
(
key
):
def
keys_from_bool_dios_like
(
key
):
if
not
_is_bool_dios_like
(
key
):
raise
ValueError
(
"
Must pass dios-like key with boolean
"
"
values only if passed as single indexer
"
)
colkey
=
self
.
obj
.
columns
.
intersection
(
key
.
columns
)
rowkey
=
[]
for
c
in
colkey
:
b
=
key
[
c
]
rowkey
+=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
b
[
b
].
index
)]
return
rowkey
,
colkey
,
lowdim
# bool dios / df
if
self
.
_use_bool_dios
:
# todo: use a _is_bool_dioslike() helper function,
# that check for dtype==bool for each series or
# dtype of pd.Dataframe
colkey
=
self
.
obj
.
columns
.
intersection
(
key
.
columns
)
rowkey
=
[]
for
c
in
colkey
:
b
=
key
[
c
]
if
not
_is_bool_indexer
(
b
):
raise
ValueError
(
"
Must pass dios-like key with boolean
"
"
values only if passed as single indexer
"
)
rowkey
+=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
b
[
b
].
index
)]
# align any dios-like
else
:
colkey
=
self
.
obj
.
columns
.
intersection
(
key
.
columns
)
rowkey
=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
key
[
c
].
index
)
for
c
in
colkey
]
def
keys_from_dios_like
(
key
):
colkey
=
self
.
obj
.
columns
.
intersection
(
key
.
columns
)
rowkey
=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
key
[
c
].
index
)
for
c
in
colkey
]
return
rowkey
,
colkey
,
lowdim
def
keys_from_nested_list
(
key
):
key
=
key
.
values
if
isinstance
(
key
,
pd
.
Series
)
else
key
if
len
(
key
)
!=
len
(
self
.
obj
.
columns
):
raise
ValueError
(
"
nested arrays outer length must have same langth than columns.
"
)
colkey
=
self
.
obj
.
columns
rowkey
=
[]
for
i
,
k
in
colkey
:
rowkey
.
append
(
self
.
_data
.
at
[
k
].
index
.
intersection
(
key
[
i
]))
return
rowkey
,
colkey
,
lowdim
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
# handle multi-dim keys
if
isinstance
(
key
,
tuple
):
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
# .aloc[any, ...]
# The ellipsis is meant for dios only to indicate
# that alignment of dios is requested, instead of
# using (and checking) it as boolean dios
if
colkey
is
Ellipsis
:
if
_is_dios_like
(
rowkey
):
return
keys_from_dios_like
(
rowkey
)
if
_is_nested_list_like
(
rowkey
):
return
keys_from_nested_list
(
rowkey
)
colkey
=
slice
(
None
)
#
multi-dim (var
II)
if
colkey
is
Ellipsis
:
if
_is_dios_like
(
rowkey
):
colkey
=
self
.
obj
.
columns
.
intersection
(
rowkey
.
columns
)
rowkey
=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
rowkey
[
c
].
index
)
for
c
in
colkey
]
return
rowkey
,
colkey
,
lowdim
#
(I) .aloc[dios] -> defaults to (I
II)
# (II) .aloc(booldios=False)[dios] or
# (III) .aloc(booldios=True)[dios]
elif
_is_dios_like
(
key
):
if
self
.
_use_bool_dios
:
return
keys_from_bool_dios_like
(
key
)
else
:
colkey
=
slice
(
None
)
return
keys_from_dios_like
(
key
)
elif
_is_nested_list_like
(
key
):
return
keys_from_nested_list
(
key
)
# a single row indexer (not multi-dim)
# or just some random crap was given
else
:
rowkey
,
colkey
=
self
.
_unpack_key
(
key
)
# if we come here no more multi-dim keys are allowed
elif
_is_dios_like
(
rowkey
):
raise
ValueError
(
"
Could not index with multi-dimensional
"
"
row key, if column key is not Ellipsis.
"
)
elif
_is_dios_like
(
colkey
):
raise
ValueError
(
"
Could not index with multi-dimensional
"
"
column key.
"
)
# all multi-dim indexer was already handled
if
_is_dios_like
(
rowkey
)
or
_is_nested_list_like
(
rowkey
):
raise
ValueError
(
"
Could not index with multi-dimensional row key
"
"
, if column key is given and is not Ellipsis.
"
)
elif
_is_dios_like
(
colkey
)
or
_is_nested_list_like
(
colkey
):
raise
ValueError
(
"
Could not index with multi-dimensional column key.
"
)
# handle gratefully: scalar
if
_is_hashable
(
colkey
):
...
...
@@ -356,12 +383,11 @@ class _aLocIndexer(_Indexer):
lowdim
=
True
# column-alignable: list-like, filter only existing columns
elif
_is_list_like
_not_nested
(
colkey
)
and
not
_is_bool_indexer
(
colkey
):
elif
_is_list_like
(
colkey
)
and
not
_is_bool_indexer
(
colkey
):
colkey
=
colkey
.
values
if
isinstance
(
colkey
,
pd
.
Series
)
else
colkey
colkey
=
self
.
obj
.
columns
.
intersection
(
colkey
)
# not alignable
# fall back to .loc (boolean list/series, slice(..), ...
# not alignable, fall back to .loc (boolean list/series, slice(..), etc.
else
:
colkey
=
self
.
_data
.
loc
[
colkey
].
index
...
...
@@ -381,11 +407,12 @@ class _aLocIndexer(_Indexer):
# handle gratefully: list-like, filter only existing rows
# NOTE: dios.aloc[series.index] is processed here
elif
_is_list_like
_not_nested
(
rowkey
)
and
not
_is_bool_indexer
(
rowkey
):
elif
_is_list_like
(
rowkey
)
and
not
_is_bool_indexer
(
rowkey
):
rowkey
=
[
self
.
_data
.
at
[
c
].
index
.
intersection
(
rowkey
)
for
c
in
colkey
]
# not alignable
# fallback to .loc (processed by caller) - (eg. slice(..), boolean list-like, ...)
# the rowkey is processed by .loc someway in
# the calling function - (eg. slice(..), boolean list-like, etc.)
else
:
rowkey
=
[
rowkey
]
*
len
(
colkey
)
...
...
@@ -439,5 +466,3 @@ class _iAtIndexer(_Indexer):
if
_is_dios_like
(
value
)
or
_is_nested_list_like
(
value
):
raise
TypeError
(
"
.iat[] cannot be used to set multi-dimensional values, use .aloc[] instead.
"
)
self
.
_data
.
iat
[
key
[
1
]].
iat
[
key
[
0
]]
=
value
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment