Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
ca31885e
Commit
ca31885e
authored
5 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
create like df
parent
6140bd50
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
dios/dios.py
+30
-41
30 additions, 41 deletions
dios/dios.py
dios/lib.py
+15
-0
15 additions, 0 deletions
dios/lib.py
test/test_dflike.py
+13
-19
13 additions, 19 deletions
test/test_dflike.py
test/test_methods.py
+14
-5
14 additions, 5 deletions
test/test_methods.py
with
72 additions
and
65 deletions
dios/dios.py
+
30
−
41
View file @
ca31885e
from
.operators
import
OP_MAP
as
_OP_MAP
from
.lib
import
*
from
.lib
import
_CAST_POLICIES
,
_itype_le
,
_itype_lt
,
_throw_MixedItype_err_or_warn
from
.lib
import
(
_CAST_POLICIES
,
_itype_le
,
_itype_lt
,
_throw_MixedItype_err_or_warn
,
_find_least_common_itype
,
)
import
pandas
as
pd
import
numpy
as
np
...
...
@@ -105,10 +110,8 @@ class DictOfSeries:
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
columns
)
else
:
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
pd
.
Index
([]))
# itype=None means infer the itype by the data, so we first set to the highest
# possible itype, then insert data, then find the best-fitting.
# possible itype, then insert data, then find the best-fitting
itype
.
if
itype
is
None
:
self
.
_itype
=
MixedItype
else
:
...
...
@@ -118,53 +121,53 @@ class DictOfSeries:
raise
ValueError
(
f
"
downcast_policy must be one of
{
_CAST_POLICIES
}
"
)
self
.
_policy
=
cast_policy
if
columns
is
not
None
and
not
_is_list_like_not_nested
(
columns
)
:
raise
TypeError
(
"'
columns
'
must be some kind of list-like collection.
"
)
index
=
pd
.
Index
([]
if
columns
is
None
else
columns
)
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
index
.
unique
()
)
if
data
is
not
None
:
self
.
_init_insert_data
(
data
,
columns
)
# NOTE: self._data contain nans at locations
# where no data was present, but a column-name
# was given
if
itype
is
None
:
self
.
_itype
=
self
.
_
_find_least_common_itype
()
self
.
_itype
=
_find_least_common_itype
(
self
.
_data
.
dropna
()
)
if
not
self
.
_itype
.
unique
:
_throw_MixedItype_err_or_warn
(
self
.
itype
)
# insert empty
series for requested
columns
if
columns
is
not
None
:
# insert empty columns
if
self
.
_data
.
hasnans
:
e
=
pd
.
Series
(
dtype
=
'
O
'
)
for
c
in
columns
:
if
fastpath
or
c
not
in
self
.
columns
:
self
.
_insert
(
c
,
e
.
copy
())
for
c
in
self
.
columns
[
self
.
_data
.
isna
()]:
self
.
_insert
(
c
,
e
.
copy
())
def
_init_insert_data
(
self
,
data
,
columns
):
def
incols
(
c
):
return
c
in
columns
if
columns
is
not
None
else
True
"""
Insert items of a iterable in self
"""
data
=
list
(
data
)
if
_is_iterator
(
data
)
else
data
if
isinstance
(
data
,
dict
)
or
_is_dios_like
(
data
):
for
k
in
data
:
if
incols
(
k
)
:
if
columns
is
None
or
k
in
self
.
columns
:
self
.
_insert
(
k
,
data
[
k
])
elif
_is_list_like
(
data
):
# also Series !
data
=
data
if
_is_nested_list_like
(
data
)
else
[
data
]
if
columns
is
None
:
for
i
,
d
in
enumerate
(
data
):
self
.
_insert
(
i
,
d
)
else
:
if
len
(
data
)
!=
len
(
columns
):
raise
ValueError
(
f
"
length of passed values is
{
len
(
data
)
}
, columns imply
{
len
(
columns
)
}
"
)
if
self
.
columns
.
empty
:
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
pd
.
RangeIndex
(
len
(
data
)))
elif
len
(
data
)
!=
len
(
self
.
columns
):
raise
ValueError
(
f
"
length of passed values is
{
len
(
data
)
}
, columns imply
{
len
(
self
.
columns
)
}
"
)
for
i
,
c
in
enumerate
(
columns
):
self
.
_insert
(
c
,
data
[
i
])
for
i
,
c
in
enumerate
(
self
.
columns
):
self
.
_insert
(
c
,
data
[
i
])
else
:
raise
ValueError
(
f
"
data must be some kind of iterable, type
{
type
(
data
)
}
was given
"
)
def
_insert
(
self
,
col
,
val
):
"""
Insert a fresh new value into self
"""
"""
Insert a fresh new value
as pd.Series
into self
"""
val
=
list
(
val
)
if
_is_iterator
(
val
)
else
val
if
_is_dios_like
(
val
):
...
...
@@ -183,8 +186,8 @@ class DictOfSeries:
return
self
.
_data
.
index
@columns.setter
def
columns
(
self
,
newindex
):
self
.
_data
.
index
=
newindex
def
columns
(
self
,
cols
):
self
.
_data
.
index
=
cols
@property
def
itype
(
self
):
...
...
@@ -205,20 +208,6 @@ class DictOfSeries:
except
Exception
as
e
:
raise
type
(
e
)(
f
"
Column
{
c
}
:
"
+
str
(
e
))
from
e
def
__find_least_common_itype
(
self
):
itypes
=
[
NumItype
,
FloatItype
,
IntItype
,
DtItype
]
tlist
=
[
get_itype
(
s
.
index
)
for
s
in
self
.
_data
]
found
=
MixedItype
if
tlist
:
for
itype
in
itypes
:
for
t
in
tlist
:
if
_itype_le
(
t
,
itype
):
continue
break
else
:
found
=
itype
return
found
def
__getitem__
(
self
,
key
):
"""
dios[key] -> dios/series
"""
key
=
list
(
key
)
if
_is_iterator
(
key
)
else
key
...
...
This diff is collapsed.
Click to expand it.
dios/lib.py
+
15
−
0
View file @
ca31885e
...
...
@@ -135,6 +135,21 @@ def _itype_le(a, b):
return
is_itype_like
(
a
,
b
)
def
_find_least_common_itype
(
iterable_of_series
):
itypes
=
[
NumItype
,
FloatItype
,
IntItype
,
DtItype
]
tlist
=
[
get_itype
(
s
.
index
)
for
s
in
iterable_of_series
]
found
=
MixedItype
if
tlist
:
for
itype
in
itypes
:
for
t
in
tlist
:
if
_itype_le
(
t
,
itype
):
continue
break
else
:
found
=
itype
return
found
################################################################################
# Casting
...
...
This diff is collapsed.
Click to expand it.
test/test_dflike.py
+
13
−
19
View file @
ca31885e
...
...
@@ -7,8 +7,6 @@ from pandas.core.dtypes.common import is_dict_like, is_nested_list_like
import
numpy
as
np
from
copy
import
deepcopy
pytestmark
=
pytest
.
mark
.
skip
__author__
=
"
Bert Palm
"
__email__
=
"
bert.palm@ufz.de
"
__copyright__
=
"
Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
"
...
...
@@ -16,10 +14,13 @@ __copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - U
arr
=
np
.
random
.
rand
(
8
)
TESTDATA
=
[
None
,
# empty
arr
.
copy
(),
# list
# np.array([arr.copy(), arr.copy(), arr.copy()]), # nested list
dict
(
a
=
arr
.
copy
(),
b
=
arr
.
copy
()),
# dict
None
,
# empty # 0
[
1
],
# 1
arr
.
copy
(),
# 2
np
.
array
([
arr
.
copy
(),
arr
.
copy
(),
arr
.
copy
()]),
# 3 - nested list
range
(
4
),
# 4
dict
(
a
=
arr
.
copy
(),
b
=
arr
.
copy
()),
# 5 dict
pd
.
DataFrame
(
dict
(
a
=
arr
.
copy
(),
b
=
arr
.
copy
()))
# 6 df
]
...
...
@@ -27,12 +28,10 @@ TESTDATA = [
@pytest.mark.parametrize
(
"
with_column_param
"
,
[
False
,
True
])
def
test_dios_create
(
data
,
with_column_param
):
if
is_dict_like
(
data
)
and
with_column_param
:
# giving column names in dict-keys and in columns-parameter is special in df
pytest
.
skip
()
data_copy0
=
deepcopy
(
data
)
data_copy1
=
deepcopy
(
data
)
# create columns list
if
with_column_param
:
df
=
pd
.
DataFrame
(
data
=
data_copy0
)
col
=
[
f
"
new_
{
c
}
"
for
c
in
df
]
...
...
@@ -43,16 +42,11 @@ def test_dios_create(data, with_column_param):
# giving nested lists, work different between df and dios
data_copy1
=
data_copy1
.
transpose
()
df
=
pd
.
DataFrame
(
data
=
data_copy1
,
columns
=
col
)
dios
=
DictOfSeries
(
data
=
data_copy0
,
columns
=
col
)
assert
len
(
dios
.
columns
)
==
len
(
df
.
columns
)
assert
np
.
all
(
dios
.
values
==
df
.
values
)
df
=
pd
.
DataFrame
(
data
=
data_copy0
,
columns
=
col
)
dios
=
DictOfSeries
(
data
=
data_copy1
,
columns
=
col
)
# df columns may not be strings, but dios'es are always
columns
=
[
str
(
c
)
for
c
in
df
.
columns
]
assert
list
(
dios
.
columns
)
==
columns
assert
dios
.
columns
.
equals
(
df
.
columns
)
for
c
in
df
.
columns
:
assert
np
.
all
(
dios
[
str
(
c
)
]
==
df
[
c
])
assert
np
.
all
(
dios
[
c
]
==
df
[
c
]
.
dropna
()
)
This diff is collapsed.
Click to expand it.
test/test_methods.py
+
14
−
5
View file @
ca31885e
...
...
@@ -5,15 +5,24 @@ def test_copy_copy_empty(getDtDiosAligned):
dios
=
getDtDiosAligned
.
copy
()
shallow
=
dios
.
copy
(
deep
=
False
)
deep
=
dios
.
copy
(
deep
=
True
)
empty
=
dios
.
copy_empty
()
empty_w_cols
=
dios
.
copy_empty
(
columns
=
True
)
empty_no_cols
=
dios
.
copy_empty
(
columns
=
False
)
assert
dios
is
not
shallow
assert
dios
is
not
deep
assert
dios
is
not
empty
assert
dios
is
not
empty_w_cols
assert
dios
is
not
empty_no_cols
assert
dios
.
itype
==
shallow
.
itype
assert
dios
.
itype
==
deep
.
itype
assert
dios
.
itype
==
empty
.
itype
for
attr
in
[
'
itype
'
,
'
_itype
'
,
'
_policy
'
,
]:
dios_attr
=
getattr
(
dios
,
attr
)
for
cop
in
[
shallow
,
deep
,
empty_w_cols
,
empty_no_cols
]:
copy_attr
=
getattr
(
cop
,
attr
)
assert
dios_attr
==
copy_attr
assert
dios
.
columns
.
equals
(
shallow
.
columns
)
assert
dios
.
columns
.
equals
(
deep
.
columns
)
assert
dios
.
columns
.
equals
(
empty_w_cols
.
columns
)
assert
not
dios
.
columns
.
equals
(
empty_no_cols
.
columns
)
for
i
in
dios
:
assert
dios
[
i
].
index
is
shallow
[
i
].
index
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment