Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
3b226443
Commit
3b226443
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
finest setitem
parent
13f833a9
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!2
Develop
,
!1
complete rework
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
dios/dios.py
+84
-71
84 additions, 71 deletions
dios/dios.py
dios/options.py
+11
-20
11 additions, 20 deletions
dios/options.py
with
95 additions
and
91 deletions
dios/dios.py
+
84
−
71
View file @
3b226443
...
...
@@ -24,16 +24,20 @@ from pandas.core.dtypes.common import is_iterator as _is_iterator
def
is_dios_like
(
obj
):
return
isinstance
(
obj
,
DictOfSeries
)
def
is_pandas_like
(
obj
):
"""
We consider ourselfs (dios) as pandas-like
"""
return
is_series_like
(
obj
)
or
is_dataframe_like
(
obj
)
or
is_dios_like
(
obj
)
def
is_series_like
(
obj
):
return
isinstance
(
obj
,
pd
.
Series
)
def
is_dataframe_like
(
obj
):
return
isinstance
(
obj
,
pd
.
DataFrame
)
def
is_iterator
(
obj
):
"""
This is only a dummy wrapper, to warn that the docu of this isnt
'
t right.
Unlike the example says,
...
...
@@ -235,29 +239,39 @@ class DictOfSeries:
in the ``options`` dictionary.
- [3] If ``iterable`` contains any(!) label that does not exist, a KeyError is raised.
"""
# special case: insert a fresh new key
if
isinstance
(
key
,
str
)
and
key
not
in
self
.
columns
:
self
.
_insert
(
key
,
value
)
return
# prepare
if
is_iterator
(
key
):
key
=
list
(
key
)
k
,
i
=
self
.
_get_keys_and_indexer
(
key
)
gen
=
self
.
_setitem_stage2
(
k
,
i
,
value
)
for
tup
in
gen
:
self
.
_set_item
(
*
tup
)
def
_get_keys_and_indexer
(
self
,
key
):
"""
Determine keys and indexer
Notes:
Which keys we get, depends on the policy in dios_options
"""
err_bool
=
"
only boolen values are allowed
"
keys
=
None
indexers
=
None
blowup
=
False
ki
=
dict
()
# determine action by keys
# prevent consuming of a generator
if
is_iterator
(
key
):
key
=
list
(
key
)
if
isinstance
(
key
,
str
):
# special case: insert a fresh new key
if
key
not
in
self
.
columns
:
self
.
_setitem_new
(
key
,
value
)
return
else
:
ki
[
key
]
=
slice
(
None
)
keys
=
[
key
]
raise
KeyError
(
key
)
keys
=
[
key
]
elif
isinstance
(
key
,
slice
):
keys
=
self
.
columns
indexers
=
[
key
]
indexers
,
blowup
=
[
key
]
,
True
# list, np.arrays, ... of list, np.arrays..
elif
is_nested_list_like
(
key
):
...
...
@@ -267,59 +281,81 @@ class DictOfSeries:
for
i
in
range
(
len
(
key
)):
arr
=
np
.
array
(
i
)
if
not
is_bool_array
(
arr
):
raise
ValueError
(
"
Must pass
nested
-
list
-like with boolean values only
"
)
raise
ValueError
(
"
nested
list
:
"
+
err_bool
)
indexers
.
append
(
arr
)
# ser, df, dios
elif
is_pandas_like
(
key
):
if
is_series_like
(
key
):
keys
=
key
.
to_list
()
elif
is_dataframe_like
(
key
):
keys
=
key
.
columns
.
to_list
()
indexers
=
key
.
values
testbool
=
True
if
not
is_bool_array
(
indexers
):
raise
ValueError
(
"
df:
"
+
err_bool
)
elif
is_dios_like
(
key
):
keys
=
key
.
columns
indexers
=
list
(
key
.
values
)
if
not
is_bool_array
(
indexers
):
raise
ValueError
(
"
dios:
"
+
err_bool
)
# list, np.array, np.ndarray, ...
# Note: series considered list-like,
# so we handle lists last
elif
is_list_like
(
key
):
arr
=
np
.
array
(
key
)
if
is_bool_array
(
arr
):
keys
=
self
.
columns
indexers
=
[
arr
]
if
len
(
arr
)
!=
len
(
keys
):
keys
=
np
.
array
(
keys
)[
arr
]
else
:
keys
=
key
else
:
raise
KeyError
(
f
"
{
key
}
"
)
if
length
!=
len
(
keys
):
raise
ValueError
(
f
"
Length mismatch for nested list: expected
{
len
(
keys
)
}
, got
{
length
}
"
)
if
not
indexers
:
indexers
=
[
slice
(
None
)]
if
len
(
indexers
)
==
1
:
indexers
=
indexers
*
len
(
keys
)
assert
len
(
indexers
)
==
len
(
keys
)
# now we have a indexer for every series
# determine action by value
if
isinstance
(
value
,
DictOfSeries
):
method
=
dios_options
[
Options
.
dios_to_dios_method
]
keys
=
get_dios_to_dios_keys
(
keys
,
value
,
method
)
for
k
in
keys
:
self
.
_setitem
(
k
,
value
[
k
],
sl
=
kslicer
)
else
:
if
is_iterator
(
value
):
value
=
list
(
value
)
# check keys
method
=
dios_options
[
Options
.
dios_to_dios_method
]
keys
=
check_keys_by_policy
(
keys
,
self
.
columns
,
method
)
for
k
in
keys
:
self
.
_setitem
(
k
,
value
,
sl
=
kslicer
)
# check indexer
if
indexers
is
None
:
indexers
,
blowup
=
[
slice
(
None
)],
True
if
blowup
:
indexers
=
indexers
*
len
(
keys
)
if
len
(
indexers
)
!=
len
(
keys
):
raise
ValueError
# now we have a valid indexer (a slice or a bool array) for every series
return
keys
,
indexers
def
_setitem_stage2
(
self
,
keys
,
ixs
,
val
):
"
determine looping and ..
"
if
is_iterator
(
val
):
val
=
list
(
val
)
diosl
,
dfl
,
nlistl
=
is_dios_like
(
val
),
is_dataframe_like
(
val
),
is_nested_list_like
(
val
)
if
diosl
or
dfl
or
nlistl
and
len
(
val
)
!=
len
(
keys
):
raise
ValueError
(
f
"
could not broadcast input array with length
{
len
(
val
)
}
"
f
"
into dios of length
{
len
(
keys
)
}
"
)
# now we have everything we need: key, indexer, value
# so we just pack it nice and cosy and let setitem
# do the dirty work.
for
i
,
_
in
enumerate
(
keys
):
key
,
ix
=
keys
[
i
],
ixs
[
i
]
if
dfl
or
diosl
:
yield
key
,
ix
,
val
[
val
.
columns
[
i
]]
elif
nlistl
:
yield
key
,
ix
,
val
[
i
]
else
:
yield
key
,
ix
,
val
def
_setitem_new
(
self
,
key
,
val
):
def
_insert
(
self
,
key
,
val
):
""""""
if
isinstance
(
val
,
DictOfSeries
):
val
=
val
.
squeeze
()
elif
is_list_like
(
val
)
and
not
is_nested_list_like
(
val
):
...
...
@@ -331,39 +367,16 @@ class DictOfSeries:
val
=
cast_to_itype
(
val
,
self
.
_itype
,
policy
=
self
.
_policy
)
self
.
_data
[
key
]
=
val
.
copy
(
deep
=
True
)
def
_setitem
(
self
,
key
,
val
,
sl
=
None
):
"""
Set a value or a set of values to a single(!) key in self k
"""
sl
=
sl
or
slice
(
None
)
# series, dios['a'] = series, 'a' exist !
# diosA[slice] = diosB --> dios[slice][k] = diosB[k] for all k
if
isinstance
(
val
,
pd
.
Series
):
val
=
cast_to_itype
(
val
,
self
.
_itype
,
policy
=
self
.
_policy
)
left
=
self
.
_data
[
key
][
sl
]
idx
=
left
.
index
.
intersection
(
val
.
index
)
# l, r = left.align(val, join='inner')
if
not
idx
.
empty
:
left
.
loc
[
idx
]
=
val
.
loc
[
idx
].
copy
()
return
item
=
self
.
_data
[
key
]
# label <- scalar: dios['a'] = 3.9 or
# slice <- scalar: dios[0:3] = 4.0
if
is_scalar
(
val
):
item
[
sl
]
=
val
# label <- list: dios['a'] = [0.0, 0.3, 0.0]
# sclice <- list: dios[0:3] = [0.0, 0.3, 0.0]
elif
is_list_like
(
val
)
and
not
is_nested_list_like
(
val
):
# ensure same size # fixme: is this neccessary, wouldnt pd.Series raise a Valuerror ?
if
len
(
item
[
sl
])
==
len
(
val
):
item
[
sl
]
=
val
else
:
raise
ValueError
(
f
'
Length of values does not match length of sliced for the key
{
key
}
'
)
def
_set_item
(
self
,
key
,
ix
,
val
):
"
Set a value (scalar or list or series)
"
ser
=
self
.
_data
[
key
]
if
is_series_like
(
val
):
left
=
ser
[
ix
]
index
=
left
.
index
.
intersection
(
val
.
index
)
if
not
index
.
empty
:
left
.
loc
[
index
]
=
val
.
loc
[
index
].
copy
()
else
:
raise
ValueError
(
f
"
assignments with a values of type
{
type
(
val
)
}
are not supported
"
)
return
ser
[
ix
]
=
val
@property
def
loc
(
self
):
...
...
This diff is collapsed.
Click to expand it.
dios/options.py
+
11
−
20
View file @
3b226443
...
...
@@ -34,30 +34,21 @@ dios_options = {
}
def
get_dios_to_dios_keys
(
keys
,
other
,
method
):
def
check_keys_by_policy
(
check
,
keys
,
policy
):
err_append
=
"
consider changing dios.option[
'
dios_to_dios_method
'
]
"
if
policy
==
OptionsDiosToDios
.
any_matching
:
check
=
[
k
for
k
in
check
if
k
in
keys
]
if
method
==
OptionsDiosToDios
.
any_matching
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
elif
policy
==
OptionsDiosToDios
.
at_least_one
:
check
=
[
k
for
k
in
check
if
k
in
keys
]
if
not
check
:
raise
KeyError
(
"
policy says: at least one key must be shared.
"
)
elif
method
==
OptionsDiosToDios
.
at_least_one
:
keys
=
[
k
for
k
in
keys
if
k
in
other
.
columns
]
if
not
keys
:
raise
KeyError
(
"
src-DioS and dest-DioS need to share at least one key,
"
+
err_append
)
# elif method == 2:
# fail = [k for k in keys if k not in other.columns]
# if fail:
# raise KeyError(f"{fail} are missing in the destiny-dios, " + err_append)
# keys in both dios's must be equal
elif
OptionsDiosToDios
.
all_must_match
:
fail
=
set
(
keys
).
symmetric_difference
(
set
(
other
.
column
s
))
fail
=
set
(
check
).
symmetric_difference
(
set
(
key
s
))
if
fail
:
raise
KeyError
(
f
"
{
fail
}
is not in both of src- and dest-dios,
"
+
err_append
)
raise
KeyError
(
f
"
{
fail
}
. policy says: all keys must be present.
"
)
else
:
raise
ValueError
(
method
)
raise
ValueError
(
policy
)
return
keys
return
check
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment