Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
dios
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Wiki
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RDM
dios
Commits
5f9f19aa
Commit
5f9f19aa
authored
4 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
introduced base-class, cleanup imports
parent
d77f0c9b
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
dios/base.py
+527
-0
527 additions, 0 deletions
dios/base.py
dios/dios.py
+44
-556
44 additions, 556 deletions
dios/dios.py
dios/indexer.py
+4
-6
4 additions, 6 deletions
dios/indexer.py
with
575 additions
and
562 deletions
dios/base.py
0 → 100644
+
527
−
0
View file @
5f9f19aa
#!/usr/bin/env python
from
.
import
operators
as
ops
from
.
import
lib
from
.lib
import
(
_CAST_POLICIES
,
_throw_MixedItype_err_or_warn
,
_find_least_common_itype
,
)
from
abc
import
abstractmethod
import
pandas
as
pd
import
operator
as
op
import
functools
as
ftools
from
pandas.core.common
import
is_bool_indexer
as
_is_bool_indexer
import
pandas.core.dtypes.common
as
pdcom
"""
Unlike the example says, return lists False, not True
>>is_iterator([1, 2, 3])
>>False
"""
from
pandas.core.dtypes.common
import
is_iterator
as
_is_iterator
__author__
=
"
Bert Palm
"
__email__
=
"
bert.palm@ufz.de
"
__copyright__
=
"
Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
"
class
_DiosBase
:
@property
@abstractmethod
def
_constructor
(
self
):
pass
def
__init__
(
self
,
data
=
None
,
columns
=
None
,
index
=
None
,
itype
=
None
,
cast_policy
=
'
save
'
,
fastpath
=
False
):
self
.
cast_policy
=
cast_policy
# we are called internally
if
fastpath
:
self
.
_itype
=
itype
or
lib
.
ObjItype
if
data
is
not
None
:
self
.
_data
=
data
else
:
# it is significantly faster, to provide an index and fill it,
# than to successively build the index by adding data
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
columns
)
else
:
if
index
is
not
None
and
not
isinstance
(
index
,
pd
.
Index
):
index
=
pd
.
Index
(
index
)
# itype=None means infer the itype by the data, so we first set to the highest
# possible itype, then insert data, then infer the best-fitting itype.
if
itype
is
None
and
index
is
None
:
self
.
_itype
=
lib
.
ObjItype
else
:
if
index
is
not
None
:
self
.
_itype
=
lib
.
get_itype
(
index
)
if
itype
is
not
None
:
self
.
_itype
=
lib
.
get_itype
(
itype
)
cols
=
pd
.
Index
([]
if
columns
is
None
else
columns
)
if
not
cols
.
is_unique
:
raise
ValueError
(
"
columns must be unique
"
)
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
cols
)
if
data
is
not
None
:
self
.
_init_insert_data
(
data
,
columns
,
index
)
# self._data still contain nans at all positions, where
# no data was present, but a column-name was given
if
self
.
_data
.
hasnans
:
e
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
index
)
for
c
in
self
.
columns
[
self
.
_data
.
isna
()]:
self
.
_insert
(
c
,
e
.
copy
())
self
.
_data
.
index
.
name
=
'
columns
'
# we try to infer the itype, but if we still have
# no data, we will set the itype lazy, i.e. with
# the first non-empty _insert()
if
itype
is
None
:
if
self
.
empty
:
self
.
_itype
=
'
INFER
'
else
:
self
.
_itype
=
_find_least_common_itype
(
self
.
_data
)
if
not
self
.
_itype
.
unique
:
_throw_MixedItype_err_or_warn
(
self
.
itype
)
def
_init_insert_data
(
self
,
data
,
columns
,
index
):
"""
Insert items of a iterable in self
"""
if
_is_iterator
(
data
):
data
=
list
(
data
)
if
_is_dios_like
(
data
)
or
isinstance
(
data
,
dict
):
if
columns
is
None
:
pass
# data is dict-like
else
:
data
=
{
k
:
data
[
k
]
for
k
in
data
if
k
in
columns
}
elif
isinstance
(
data
,
pd
.
Series
):
name
=
data
.
name
or
0
if
columns
is
not
None
and
len
(
columns
)
>
0
:
name
=
self
.
columns
[
0
]
data
=
{
name
:
data
}
elif
pdcom
.
is_nested_list_like
(
data
):
if
columns
is
None
:
data
=
{
i
:
d
for
i
,
d
in
enumerate
(
data
)}
elif
len
(
data
)
==
len
(
columns
):
data
=
dict
(
zip
(
self
.
columns
,
data
))
else
:
raise
ValueError
(
f
"
{
len
(
columns
)
}
columns passed, data implies
{
len
(
data
)
}
columns
"
)
elif
pdcom
.
is_list_like
(
data
):
name
=
0
if
columns
is
None
or
len
(
columns
)
<
1
else
self
.
columns
[
0
]
data
=
{
name
:
data
}
else
:
raise
TypeError
(
"
data type not understood
"
)
for
k
in
data
:
self
.
_insert
(
k
,
pd
.
Series
(
data
[
k
],
index
=
index
))
# ----------------------------------------------------------------------
# Indexing Methods
def
_insert
(
self
,
col
,
val
):
"""
Insert a fresh new value as pd.Series into self
"""
val
=
list
(
val
)
if
_is_iterator
(
val
)
else
val
if
_is_dios_like
(
val
):
val
=
val
.
squeeze
()
if
not
isinstance
(
val
,
pd
.
Series
):
raise
ValueError
(
f
"
Cannot insert frame-like with more than one column
"
)
elif
val
is
None
:
val
=
pd
.
Series
()
elif
not
isinstance
(
val
,
pd
.
Series
):
raise
TypeError
(
f
"
Only data of type pandas.Series can be inserted, passed was
{
type
(
val
)
}
"
)
# set the itype lazy, i.e. when first non-empty
# column is inserted
if
self
.
_itype
==
'
INFER
'
:
if
not
val
.
empty
:
self
.
_itype
=
lib
.
get_itype
(
val
.
index
)
# cast all pre-inserted empty series
self
.
_cast_all
(
self
.
_itype
,
self
.
_policy
)
if
not
self
.
_itype
.
unique
:
_throw_MixedItype_err_or_warn
(
self
.
_itype
)
else
:
val
=
lib
.
cast_to_itype
(
val
,
self
.
itype
,
policy
=
self
.
_policy
)
val
.
name
=
col
self
.
_data
.
at
[
col
]
=
val
.
copy
(
deep
=
True
)
def
__getitem__
(
self
,
key
):
"""
dios[key] -> dios/series
"""
key
=
list
(
key
)
if
_is_iterator
(
key
)
else
key
if
isinstance
(
key
,
tuple
):
raise
KeyError
(
"
tuples are not allowed
"
)
if
pdcom
.
is_hashable
(
key
):
# NOTE: we use copy here to prevent index
# changes, that could result in an invalid
# itype. A shallow copy is not sufficient.
# work on columns, return series
return
self
.
_data
.
at
[
key
].
copy
()
if
_is_dios_like
(
key
):
# work on rows and columns
new
=
self
.
_getitem_bool_dios
(
key
)
elif
isinstance
(
key
,
slice
):
# work on rows
new
=
self
.
_slice
(
key
)
elif
_is_bool_indexer
(
key
):
# work on rows
new
=
self
.
_getitem_bool_listlike
(
key
)
else
:
# work on columns
data
=
self
.
_data
.
loc
[
key
]
new
=
self
.
_constructor
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
return
new
def
_slice
(
self
,
key
):
"""
slices self, return copy
"""
if
key
==
slice
(
None
):
return
self
.
copy
()
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
:
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
][
key
]
return
new
def
_getitem_bool_dios
(
self
,
key
):
"""
Select items by a boolean dios-like drop un-selected indices.
"""
if
not
_is_bool_dios_like
(
key
):
raise
ValueError
(
"
Must pass DictOfSeries with boolean values only
"
)
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
.
intersection
(
key
.
columns
):
dat
=
self
.
_data
.
at
[
k
]
val
=
key
[
k
]
# align rows
idx
=
val
[
val
].
index
.
intersection
(
dat
.
index
)
new
.
_data
.
at
[
k
]
=
dat
[
idx
]
return
new
def
_getitem_bool_listlike
(
self
,
key
):
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
:
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
].
loc
[
key
]
return
new
def
__setitem__
(
self
,
key
,
value
):
"""
dios[key] = value
"""
key
=
list
(
key
)
if
_is_iterator
(
key
)
else
key
if
isinstance
(
key
,
tuple
):
raise
KeyError
(
f
"
{
key
}
. tuples are not allowed
"
)
elif
pdcom
.
is_hashable
(
key
):
if
isinstance
(
value
,
pd
.
Series
)
or
key
not
in
self
.
columns
:
self
.
_insert
(
key
,
value
)
elif
_is_dios_like
(
value
)
or
pdcom
.
is_nested_list_like
(
value
):
raise
ValueError
(
"
Incompatible indexer with multi-dimensional value
"
)
else
:
self
.
_data
.
at
[
key
][:]
=
value
else
:
data
=
self
.
__getitem__
(
key
)
assert
isinstance
(
data
,
self
.
__class__
),
f
"
getitem returned data of type
{
type
(
data
)
}
"
# special cases
if
_is_dios_like
(
value
):
self
.
_setitem_dios
(
data
,
value
)
# NOTE: pd.Series also considered list-like
elif
pdcom
.
is_list_like
(
value
):
self
.
_setitem_listlike
(
data
,
value
)
# default case
else
:
for
k
in
data
.
columns
:
s
=
data
.
_data
.
at
[
k
]
s
[:]
=
value
self
.
_data
.
at
[
k
][
s
.
index
]
=
s
def
_setitem_listlike
(
self
,
data
,
value
):
value
=
value
.
values
if
isinstance
(
value
,
pd
.
Series
)
else
value
if
len
(
value
)
!=
len
(
data
.
columns
):
raise
ValueError
(
f
"
array-like value of length
{
len
(
value
)
}
could
"
f
"
not be broadcast to indexing result of shape
"
f
"
(..,
{
len
(
data
.
columns
)
}
)
"
)
for
i
,
k
in
enumerate
(
data
.
columns
):
s
=
data
.
_data
.
at
[
k
]
s
[:]
=
value
[
i
]
self
.
_data
.
at
[
k
][
s
.
index
]
=
s
def
_setitem_dios
(
self
,
data
,
value
):
"""
Write values from a dios-like to self.
No justification or alignment of columns, but of indices.
If value has missing indices, nan
'
s are inserted at that
locations, just like `series.loc[:]=val` or `df[:]=val` do.
Eg.
di[::2] = di[::3] -> di[::2]
x | x | x |
===== | ==== | ====== |
0 x | 0 z | 0 z |
2 x | = 3 z | -> 2 NaN |
4 x | 6 z | 4 NaN |
6 x | 6 z |
Parameter
----------
data : dios
A maybe trimmed version of self
value : dios, pd.Dataframe
The value to set with the same column dimension like data
"""
if
len
(
data
)
!=
len
(
value
.
columns
):
raise
ValueError
(
f
"
shape mismatch: values array of shape
"
f
"
(..,
{
len
(
value
.
columns
)
}
) could not
"
f
"
be broadcast to indexing result of
"
f
"
shape (..,
{
len
(
data
.
columns
)
}
)
"
)
for
i
,
k
in
enumerate
(
data
):
dat
=
data
.
_data
.
at
[
k
]
# .loc cannot handle empty series,
# like `emptySeries.loc[:] = [1,2]`
if
dat
.
empty
:
continue
val
=
value
[
value
.
columns
[
i
]]
dat
.
loc
[:]
=
val
self
.
_data
.
at
[
k
].
loc
[
dat
.
index
]
=
dat
def
__delitem__
(
self
,
key
):
del
self
.
_data
[
key
]
# ------------------------------------------------------------------------------
# Base properties and basic dunder magic
@property
def
columns
(
self
):
return
self
.
_data
.
index
@columns.setter
def
columns
(
self
,
cols
):
index
=
pd
.
Index
(
cols
)
if
not
index
.
is_unique
:
raise
ValueError
(
"
columns index must have unique values
"
)
self
.
_data
.
index
=
index
@property
def
itype
(
self
):
if
self
.
_itype
==
'
INFER
'
:
return
None
return
self
.
_itype
@itype.setter
def
itype
(
self
,
itype
):
itype
=
lib
.
get_itype
(
itype
)
self
.
_cast_all
(
itype
,
policy
=
self
.
_policy
)
self
.
_itype
=
itype
@property
def
cast_policy
(
self
):
return
self
.
_policy
@cast_policy.setter
def
cast_policy
(
self
,
policy
):
if
policy
not
in
_CAST_POLICIES
:
raise
ValueError
(
f
"
policy must be one of
{
_CAST_POLICIES
}
"
)
self
.
_policy
=
policy
def
_cast_all
(
self
,
itype
,
policy
):
c
=
'
?
'
data
=
self
.
copy_empty
()
try
:
for
c
in
self
.
columns
:
data
.
_data
.
at
[
c
]
=
lib
.
cast_to_itype
(
self
.
_data
.
at
[
c
],
itype
,
policy
=
policy
)
except
Exception
as
e
:
raise
type
(
e
)(
f
"
Column
{
c
}
:
"
+
str
(
e
))
from
e
def
__len__
(
self
):
return
len
(
self
.
columns
)
@property
def
empty
(
self
):
return
len
(
self
)
==
0
or
all
(
s
.
empty
for
s
in
self
.
_data
)
def
__iter__
(
self
):
yield
from
self
.
columns
def
__reversed__
(
self
):
yield
from
reversed
(
self
.
columns
)
def
__contains__
(
self
,
item
):
return
item
in
self
.
columns
# ----------------------------------------------------------------------
# if copy.copy() is copy.copy(): return copy.copy().copy()
def
__copy__
(
self
):
return
self
.
copy
(
deep
=
True
)
def
__deepcopy__
(
self
,
memo
=
None
):
return
self
.
copy
(
deep
=
True
)
def
copy
(
self
,
deep
=
True
):
if
deep
:
data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
self
.
columns
)
for
c
in
self
.
columns
:
data
.
at
[
c
]
=
self
.
_data
.
at
[
c
].
copy
(
deep
=
True
)
else
:
data
=
self
.
_data
kws
=
dict
(
itype
=
self
.
_itype
,
cast_policy
=
self
.
_policy
)
return
self
.
_constructor
(
data
=
data
,
fastpath
=
True
,
**
kws
)
def
copy_empty
(
self
,
columns
=
True
):
data
=
None
if
columns
is
True
:
# is correct
data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
self
.
columns
)
for
c
in
self
.
columns
:
data
.
at
[
c
]
=
pd
.
Series
(
dtype
=
self
.
_data
.
at
[
c
].
dtype
)
kws
=
dict
(
itype
=
self
.
_itype
,
cast_policy
=
self
.
_policy
)
return
self
.
_constructor
(
data
=
data
,
fastpath
=
True
,
**
kws
)
# ------------------------------------------------------------------------------
# Operators
def
_op1
(
self
,
op
):
new
=
self
.
copy_empty
(
columns
=
True
)
try
:
for
k
in
self
.
columns
:
new
[
k
]
=
op
(
self
[
k
])
except
Exception
as
e
:
raise
type
(
e
)(
f
"'
{
ops
.
OP_MAP
[
op
]
}
dios
'
failed:
"
+
str
(
e
))
from
e
return
new
def
_op2
(
self
,
op
,
other
,
align
=
True
,
inplace
=
False
):
def
raiseif
(
kself
,
kother
,
s
):
if
kself
!=
kother
:
raise
ValueError
(
f
"
{
s
}
does not match,
{
s
}
left:
{
kself
}
,
{
s
}
right:
{
kother
}
"
)
def
doalign
(
left
,
right
):
return
left
.
align
(
right
,
join
=
'
inner
'
)
if
align
else
(
left
,
right
)
def
gen
():
if
_is_dios_like
(
other
):
raiseif
(
list
(
self
),
list
(
other
),
'
keys
'
)
for
k
in
self
.
columns
:
left
,
right
=
self
[
k
],
other
[
k
]
yield
k
,
op
(
*
doalign
(
left
,
right
))
elif
isinstance
(
other
,
pd
.
Series
):
for
k
in
self
.
columns
:
left
,
right
=
self
[
k
],
other
yield
k
,
op
(
*
doalign
(
left
,
right
))
elif
pdcom
.
is_dict_like
(
other
):
raiseif
(
sorted
(
self
),
sorted
(
other
),
'
keys
'
)
for
k
in
self
.
columns
:
yield
k
,
op
(
self
[
k
],
other
[
k
])
elif
pdcom
.
is_nested_list_like
(
other
):
raiseif
(
len
(
self
),
len
(
other
),
'
length
'
)
for
i
,
k
in
enumerate
(
self
.
columns
):
yield
k
,
op
(
self
[
k
],
other
[
i
])
elif
pdcom
.
is_scalar
(
other
)
or
pdcom
.
is_list_like
(
other
):
for
k
in
self
.
columns
:
yield
k
,
op
(
self
[
k
],
other
)
else
:
raise
NotImplementedError
new
=
self
if
inplace
else
self
.
copy_empty
(
columns
=
True
)
try
:
for
k
,
val
in
gen
():
new
[
k
]
=
val
except
Exception
as
e
:
raise
type
(
e
)(
f
"'
dios
{
ops
.
OP_MAP
[
op
]
}
other
'
failed:
"
+
str
(
e
))
from
e
return
new
__neg__
=
ftools
.
partialmethod
(
_op1
,
op
.
neg
)
__abs__
=
ftools
.
partialmethod
(
_op1
,
op
.
abs
)
__invert__
=
ftools
.
partialmethod
(
_op1
,
op
.
inv
)
__eq__
=
ftools
.
partialmethod
(
_op2
,
op
.
eq
,
align
=
False
)
__ne__
=
ftools
.
partialmethod
(
_op2
,
op
.
ne
,
align
=
False
)
__le__
=
ftools
.
partialmethod
(
_op2
,
op
.
le
,
align
=
False
)
__ge__
=
ftools
.
partialmethod
(
_op2
,
op
.
ge
,
align
=
False
)
__lt__
=
ftools
.
partialmethod
(
_op2
,
op
.
lt
,
align
=
False
)
__gt__
=
ftools
.
partialmethod
(
_op2
,
op
.
gt
,
align
=
False
)
__add__
=
ftools
.
partialmethod
(
_op2
,
op
.
add
)
__sub__
=
ftools
.
partialmethod
(
_op2
,
op
.
sub
)
__mul__
=
ftools
.
partialmethod
(
_op2
,
op
.
mul
)
__mod__
=
ftools
.
partialmethod
(
_op2
,
op
.
mod
)
__truediv__
=
ftools
.
partialmethod
(
_op2
,
op
.
truediv
)
__floordiv__
=
ftools
.
partialmethod
(
_op2
,
op
.
floordiv
)
__pow__
=
ftools
.
partialmethod
(
_op2
,
op
.
pow
)
__and__
=
ftools
.
partialmethod
(
_op2
,
op
.
and_
)
__or__
=
ftools
.
partialmethod
(
_op2
,
op
.
or_
)
__xor__
=
ftools
.
partialmethod
(
_op2
,
op
.
xor
)
# ------------------------------------------------------------------------------
# Indexer
@property
def
loc
(
self
):
return
_LocIndexer
(
self
)
@property
def
iloc
(
self
):
return
_iLocIndexer
(
self
)
@property
def
aloc
(
self
):
return
_aLocIndexer
(
self
)
@property
def
at
(
self
):
return
_AtIndexer
(
self
)
@property
def
iat
(
self
):
return
_iAtIndexer
(
self
)
def
_is_dios_like
(
obj
)
->
bool
:
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return
isinstance
(
obj
,
_DiosBase
)
or
isinstance
(
obj
,
pd
.
DataFrame
)
def
_is_bool_series
(
obj
)
->
bool
:
return
isinstance
(
obj
,
pd
.
Series
)
and
obj
.
dtype
==
bool
def
_is_bool_dios_like
(
obj
)
->
bool
:
if
not
_is_dios_like
(
obj
):
return
False
dtypes
=
obj
.
dtypes
if
(
dtypes
==
bool
).
all
():
return
True
if
(
dtypes
==
'
O
'
).
any
():
return
obj
.
apply
(
_is_bool_indexer
).
all
()
return
False
# keep this here to prevent cyclic import
from
.indexer
import
_aLocIndexer
,
_iLocIndexer
,
_LocIndexer
,
_iAtIndexer
,
_AtIndexer
This diff is collapsed.
Click to expand it.
dios/dios.py
+
44
−
556
View file @
5f9f19aa
from
.operators
import
OP_MAP
as
_OP_MAP
from
.base
import
_DiosBase
,
_is_dios_like
from
.lib
import
Opts
,
OptsFields
,
dios_options
from
.lib
import
*
from
.lib
import
_find_least_common_itype
from
.lib
import
(
_CAST_POLICIES
,
_itype_le
,
_itype_lt
,
_throw_MixedItype_err_or_warn
,
_find_least_common_itype
,
)
import
functools
as
ftools
import
pandas
as
pd
import
pandas
as
pd
import
pandas.core.dtypes.common
as
pdcom
import
numpy
as
np
import
numpy
as
np
import
operator
as
op
import
functools
as
ftools
import
pandas.core.dtypes.common
as
dcom
_is_list_like
=
dcom
.
is_list_like
_is_nested_list_like
=
dcom
.
is_nested_list_like
_is_scalar
=
dcom
.
is_scalar
_is_integer
=
dcom
.
is_integer
_is_dict_like
=
dcom
.
is_dict_like
_is_number
=
dcom
.
is_number
_is_hashable
=
dcom
.
is_hashable
from
pandas.core.common
import
is_bool_indexer
as
_is_bool_indexer
"""
Unlike the example says, return lists False, not True
>>is_iterator([1, 2, 3])
>>False
"""
from
pandas.core.dtypes.common
import
is_iterator
as
_is_iterator
from
typing
import
Union
,
Any
class
DictOfSeries
(
_DiosBase
):
class
DictOfSeries
:
"""
A data frame where every column has its own index.
"""
A data frame where every column has its own index.
DictOfSeries is a collection of pd.Series
'
s which aim to be as close as possible similar to
DictOfSeries is a collection of pd.Series
'
s which aim to be as close as possible similar to
...
@@ -44,355 +16,40 @@ class DictOfSeries:
...
@@ -44,355 +16,40 @@ class DictOfSeries:
unlike the former, which provide a single row-index for all columns. This solves problems with
unlike the former, which provide a single row-index for all columns. This solves problems with
unaligned data and data which varies widely in length.
unaligned data and data which varies widely in length.
Indexing with ``di[]``, ``di.loc[]`` and ``di.iloc[]`` should work analogous to these methods
Indexing with ``di[]``, ``di.loc[]`` and ``di.iloc[]`` should work analogous to these methods
from pd.DataFrame. The indexer can be a single label, a slice, a list-like, a boolean list-like,
from pd.DataFrame. The indexer can be a single label, a slice, a list-like, a boolean list-like,
or a boolean
dio
s/pd.DataFrame and can be used to selectively get or set data.
or a boolean
DictOfSerie
s/pd.DataFrame and can be used to selectively get or set data.
Parameters
Parameters
----------
----------
data : array-like, Iterable, dict, or scalar value
data : array-like, Iterable, dict, or scalar value
Contains data stored in Series.
Contains data stored in Series.
columns : array-like
columns : array-like
Column labels to use for resulting frame. Will default to
Column labels to use for resulting frame. Will default to
RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
RangeIndex(0, 1, 2, ..., n) if no column labels are provided.
itype : Itype, pd.Index, Itype-string-repr, type
Index type that every series in this dios should have.
if None, the index-type is inferred each time a series is inserted
or deleted.
cast_policy : str
Policy to use for down-casting an itype.
"""
# ------------------------------------------------------------------------------
# Constructors
def
__init__
(
self
,
data
=
None
,
columns
=
None
,
index
=
None
,
itype
=
None
,
cast_policy
=
'
save
'
,
fastpath
=
False
):
self
.
cast_policy
=
cast_policy
# we are called internally
if
fastpath
:
self
.
_itype
=
itype
or
ObjItype
if
data
is
not
None
:
self
.
_data
=
data
else
:
# it is significantly faster, to provide an index and fill it,
# than to successively build the index by adding data
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
columns
)
else
:
if
index
is
not
None
and
not
isinstance
(
index
,
pd
.
Index
):
index
=
pd
.
Index
(
index
)
# itype=None means infer the itype by the data, so we first set to the highest
# possible itype, then insert data, then infer the best-fitting itype.
if
itype
is
None
and
index
is
None
:
self
.
_itype
=
ObjItype
else
:
if
index
is
not
None
:
self
.
_itype
=
get_itype
(
index
)
if
itype
is
not
None
:
self
.
_itype
=
get_itype
(
itype
)
cols
=
pd
.
Index
([]
if
columns
is
None
else
columns
)
if
not
cols
.
is_unique
:
raise
ValueError
(
"
columns must be unique
"
)
self
.
_data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
cols
)
if
data
is
not
None
:
self
.
_init_insert_data
(
data
,
columns
,
index
)
# self._data still contain nans at all positions, where
# no data was present, but a column-name was given
if
self
.
_data
.
hasnans
:
e
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
index
)
for
c
in
self
.
columns
[
self
.
_data
.
isna
()]:
self
.
_insert
(
c
,
e
.
copy
())
self
.
_data
.
index
.
name
=
'
columns
'
# we try to infer the itype, but if we still have
# no data, we will set the itype lazy, i.e. with
# the first non-empty _insert()
if
itype
is
None
:
if
self
.
empty
:
self
.
_itype
=
'
INFER
'
else
:
self
.
_itype
=
_find_least_common_itype
(
self
.
_data
)
if
not
self
.
_itype
.
unique
:
_throw_MixedItype_err_or_warn
(
self
.
itype
)
def
_init_insert_data
(
self
,
data
,
columns
,
index
):
"""
Insert items of a iterable in self
"""
if
_is_iterator
(
data
):
data
=
list
(
data
)
if
_is_dios_like
(
data
)
or
isinstance
(
data
,
dict
):
if
columns
is
None
:
pass
# data is dict-like
else
:
data
=
{
k
:
data
[
k
]
for
k
in
data
if
k
in
columns
}
elif
isinstance
(
data
,
pd
.
Series
):
name
=
data
.
name
or
0
if
columns
is
not
None
and
len
(
columns
)
>
0
:
name
=
self
.
columns
[
0
]
data
=
{
name
:
data
}
elif
_is_nested_list_like
(
data
):
if
columns
is
None
:
data
=
{
i
:
d
for
i
,
d
in
enumerate
(
data
)}
elif
len
(
data
)
==
len
(
columns
):
data
=
dict
(
zip
(
self
.
columns
,
data
))
else
:
raise
ValueError
(
f
"
{
len
(
columns
)
}
columns passed, data implies
{
len
(
data
)
}
columns
"
)
elif
_is_list_like
(
data
):
name
=
0
if
columns
is
None
or
len
(
columns
)
<
1
else
self
.
columns
[
0
]
data
=
{
name
:
data
}
else
:
raise
TypeError
(
"
data type not understood
"
)
for
k
in
data
:
index : Index or array-like
self
.
_insert
(
k
,
pd
.
Series
(
data
[
k
],
index
=
index
))
Index to use to reindex every given series during init. Ignored if omitted.
# ----------------------------------------------------------------------
itype : Itype, pd.Index, Itype-string-repr or type
# Indexing Methods
Every series that is inserted, must have an index of this type or any
of this types subtypes.
def
_insert
(
self
,
col
,
val
):
If None, the itype is inferred as soon as the first non-empty series is inserted.
"""
Insert a fresh new value as pd.Series into self
"""
val
=
list
(
val
)
if
_is_iterator
(
val
)
else
val
if
_is_dios_like
(
val
):
val
=
val
.
squeeze
()
if
not
isinstance
(
val
,
pd
.
Series
):
raise
ValueError
(
f
"
Cannot insert frame-like with more than one column
"
)
elif
val
is
None
:
val
=
pd
.
Series
()
elif
not
isinstance
(
val
,
pd
.
Series
):
raise
TypeError
(
f
"
Only data of type pandas.Series can be inserted, passed was
{
type
(
val
)
}
"
)
# set the itype lazy, i.e. when first non-empty
# column is inserted
if
self
.
_itype
==
'
INFER
'
:
if
not
val
.
empty
:
self
.
_itype
=
get_itype
(
val
.
index
)
# cast all pre-inserted empty series
self
.
_cast_all
(
self
.
_itype
,
self
.
_policy
)
if
not
self
.
_itype
.
unique
:
_throw_MixedItype_err_or_warn
(
self
.
_itype
)
else
:
val
=
cast_to_itype
(
val
,
self
.
itype
,
policy
=
self
.
_policy
)
val
.
name
=
col
self
.
_data
.
at
[
col
]
=
val
.
copy
(
deep
=
True
)
def
__getitem__
(
self
,
key
):
"""
dios[key] -> dios/series
"""
key
=
list
(
key
)
if
_is_iterator
(
key
)
else
key
if
isinstance
(
key
,
tuple
):
raise
KeyError
(
"
tuples are not allowed
"
)
if
_is_hashable
(
key
):
# NOTE: we use copy here to prevent index
# changes, that could result in an invalid
# itype. A shallow copy is not sufficient.
# work on columns, return series
return
self
.
_data
.
at
[
key
].
copy
()
if
_is_dios_like
(
key
):
# work on rows and columns
new
=
self
.
_getitem_bool_dios
(
key
)
elif
isinstance
(
key
,
slice
):
# work on rows
new
=
self
.
_slice
(
key
)
elif
_is_bool_indexer
(
key
):
# work on rows
new
=
self
.
_getitem_bool_listlike
(
key
)
else
:
# work on columns
data
=
self
.
_data
.
loc
[
key
]
new
=
DictOfSeries
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
return
new
def
_slice
(
self
,
key
):
"""
slices self, return copy
"""
if
key
==
slice
(
None
):
return
self
.
copy
()
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
:
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
][
key
]
return
new
def
_getitem_bool_dios
(
self
,
key
):
"""
Select items by a boolean dios-like drop un-selected indices.
"""
if
not
_is_bool_dios_like
(
key
):
raise
ValueError
(
"
Must pass DictOfSeries with boolean values only
"
)
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
.
intersection
(
key
.
columns
):
dat
=
self
.
_data
.
at
[
k
]
val
=
key
[
k
]
# align rows
idx
=
val
[
val
].
index
.
intersection
(
dat
.
index
)
new
.
_data
.
at
[
k
]
=
dat
[
idx
]
return
new
def
_getitem_bool_listlike
(
self
,
key
):
new
=
self
.
copy_empty
(
columns
=
True
)
for
k
in
self
.
columns
:
new
.
_data
.
at
[
k
]
=
self
.
_data
.
at
[
k
].
loc
[
key
]
return
new
def
__setitem__
(
self
,
key
,
value
):
"""
dios[key] = value
"""
key
=
list
(
key
)
if
_is_iterator
(
key
)
else
key
if
isinstance
(
key
,
tuple
):
raise
KeyError
(
f
"
{
key
}
. tuples are not allowed
"
)
elif
_is_hashable
(
key
):
if
isinstance
(
value
,
pd
.
Series
)
or
key
not
in
self
.
columns
:
self
.
_insert
(
key
,
value
)
elif
_is_dios_like
(
value
)
or
_is_nested_list_like
(
value
):
raise
ValueError
(
"
Incompatible indexer with multi-dimensional value
"
)
else
:
self
.
_data
.
at
[
key
][:]
=
value
else
:
data
=
self
.
__getitem__
(
key
)
assert
isinstance
(
data
,
self
.
__class__
),
f
"
getitem returned data of type
{
type
(
data
)
}
"
# special cases
if
_is_dios_like
(
value
):
self
.
_setitem_dios
(
data
,
value
)
# NOTE: pd.Series also considered list-like
elif
_is_list_like
(
value
):
self
.
_setitem_listlike
(
data
,
value
)
# default case
else
:
for
k
in
data
.
columns
:
s
=
data
.
_data
.
at
[
k
]
s
[:]
=
value
self
.
_data
.
at
[
k
][
s
.
index
]
=
s
def
_setitem_listlike
(
self
,
data
,
value
):
value
=
value
.
values
if
isinstance
(
value
,
pd
.
Series
)
else
value
if
len
(
value
)
!=
len
(
data
.
columns
):
raise
ValueError
(
f
"
array-like value of length
{
len
(
value
)
}
could
"
f
"
not be broadcast to indexing result of shape
"
f
"
(..,
{
len
(
data
.
columns
)
}
)
"
)
for
i
,
k
in
enumerate
(
data
.
columns
):
s
=
data
.
_data
.
at
[
k
]
s
[:]
=
value
[
i
]
self
.
_data
.
at
[
k
][
s
.
index
]
=
s
def
_setitem_dios
(
self
,
data
,
value
):
"""
Write values from a dios-like to self.
No justification or alignment of columns, but of indices.
If value has missing indices, nan
'
s are inserted at that
locations, just like `series.loc[:]=val` or `df[:]=val` do.
Eg.
di[::2] = di[::3] -> di[::2]
x | x | x |
cast_policy : {
'
save
'
,
'
force
'
,
'
never
'
}, default
'
save
'
===== | ==== | ====== |
Policy used for (down-)casting the index of a series if its type does not match
0 x | 0 z | 0 z |
the ``itype``.
2 x | = 3 z | -> 2 NaN |
"""
4 x | 6 z | 4 NaN |
6 x | 6 z |
Parameter
----------
data : dios
A maybe trimmed version of self
value : dios, pd.Dataframe
The value to set with the same column dimension like data
"""
if
len
(
data
)
!=
len
(
value
.
columns
):
raise
ValueError
(
f
"
shape mismatch: values array of shape
"
f
"
(..,
{
len
(
value
.
columns
)
}
) could not
"
f
"
be broadcast to indexing result of
"
f
"
shape (..,
{
len
(
data
.
columns
)
}
)
"
)
for
i
,
k
in
enumerate
(
data
):
dat
=
data
.
_data
.
at
[
k
]
# .loc cannot handle empty series,
# like `emptySeries.loc[:] = [1,2]`
if
dat
.
empty
:
continue
val
=
value
[
value
.
columns
[
i
]]
dat
.
loc
[:]
=
val
self
.
_data
.
at
[
k
].
loc
[
dat
.
index
]
=
dat
def
__delitem__
(
self
,
key
):
del
self
.
_data
[
key
]
# ------------------------------------------------------------------------------
# Base properties and basic dunder magic
@property
def
columns
(
self
):
return
self
.
_data
.
index
@columns.setter
def
columns
(
self
,
cols
):
index
=
pd
.
Index
(
cols
)
if
not
index
.
is_unique
:
raise
ValueError
(
"
columns index must have unique values
"
)
self
.
_data
.
index
=
index
@property
def
itype
(
self
):
if
self
.
_itype
==
'
INFER
'
:
return
None
return
self
.
_itype
@itype.setter
def
__init__
(
self
,
data
=
None
,
columns
=
None
,
index
=
None
,
itype
=
None
,
cast_policy
=
'
save
'
,
fastpath
=
False
):
def
itype
(
self
,
itype
):
super
().
__init__
(
itype
=
get_itype
(
itype
)
data
=
data
,
columns
=
columns
,
index
=
index
,
itype
=
itype
,
cast_policy
=
cast_policy
,
fastpath
=
fastpath
self
.
_cast_all
(
itype
,
policy
=
self
.
_policy
)
)
self
.
_itype
=
itype
@property
@property
def
cast_policy
(
self
):
def
_constructor
(
self
):
return
self
.
_policy
return
DictOfSeries
@cast_policy.setter
def
cast_policy
(
self
,
policy
):
if
policy
not
in
_CAST_POLICIES
:
raise
ValueError
(
f
"
policy must be one of
{
_CAST_POLICIES
}
"
)
self
.
_policy
=
policy
def
_cast_all
(
self
,
itype
,
policy
):
c
=
'
?
'
data
=
self
.
copy_empty
()
try
:
for
c
in
self
.
columns
:
data
.
_data
.
at
[
c
]
=
cast_to_itype
(
self
.
_data
.
at
[
c
],
itype
,
policy
=
policy
)
except
Exception
as
e
:
raise
type
(
e
)(
f
"
Column
{
c
}
:
"
+
str
(
e
))
from
e
def
__len__
(
self
):
return
len
(
self
.
columns
)
@property
@property
def
indexes
(
self
):
def
indexes
(
self
):
...
@@ -412,23 +69,10 @@ class DictOfSeries:
...
@@ -412,23 +69,10 @@ class DictOfSeries:
def
lengths
(
self
):
def
lengths
(
self
):
return
self
.
_data
.
apply
(
len
)
return
self
.
_data
.
apply
(
len
)
@property
def
empty
(
self
):
return
len
(
self
)
==
0
or
all
(
s
.
empty
for
s
in
self
.
_data
)
@property
@property
def
size
(
self
):
def
size
(
self
):
return
self
.
lengths
.
sum
()
return
self
.
lengths
.
sum
()
def
__iter__
(
self
):
yield
from
self
.
columns
def
__reversed__
(
self
):
yield
from
reversed
(
self
.
columns
)
def
__contains__
(
self
,
item
):
return
item
in
self
.
columns
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Dict-like methods
# Dict-like methods
...
@@ -480,7 +124,7 @@ class DictOfSeries:
...
@@ -480,7 +124,7 @@ class DictOfSeries:
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Broadcasting methods and helper
# Broadcasting methods and helper
def
for_each
(
self
,
attr_or_callable
,
**
kw
arg
s
):
def
for_each
(
self
,
attr_or_callable
,
**
kw
d
s
):
"""
"""
Apply a callable or a pandas.Series method or property on each column.
Apply a callable or a pandas.Series method or property on each column.
...
@@ -492,7 +136,7 @@ class DictOfSeries:
...
@@ -492,7 +136,7 @@ class DictOfSeries:
could be specified as string. If a callable is given it must take
could be specified as string. If a callable is given it must take
pandas.Series as the only positional argument.
pandas.Series as the only positional argument.
**kw
arg
s: any
**kw
d
s: any
kwargs to passed to callable
kwargs to passed to callable
Returns
Returns
...
@@ -554,7 +198,7 @@ class DictOfSeries:
...
@@ -554,7 +198,7 @@ class DictOfSeries:
for
c
in
self
.
columns
:
for
c
in
self
.
columns
:
dat
=
self
.
_data
.
at
[
c
]
dat
=
self
.
_data
.
at
[
c
]
if
call
:
if
call
:
data
.
at
[
c
]
=
attr_or_callable
(
dat
,
**
kw
arg
s
)
data
.
at
[
c
]
=
attr_or_callable
(
dat
,
**
kw
d
s
)
else
:
else
:
data
.
at
[
c
]
=
attr_or_callable
.
fget
(
dat
)
data
.
at
[
c
]
=
attr_or_callable
.
fget
(
dat
)
return
data
return
data
...
@@ -618,7 +262,7 @@ class DictOfSeries:
...
@@ -618,7 +262,7 @@ class DictOfSeries:
dat
=
self
.
_data
.
at
[
c
].
values
if
raw
else
self
.
_data
.
at
[
c
]
dat
=
self
.
_data
.
at
[
c
].
values
if
raw
else
self
.
_data
.
at
[
c
]
s
=
func
(
dat
,
*
args
,
**
kwds
)
s
=
func
(
dat
,
*
args
,
**
kwds
)
result
.
at
[
c
]
=
s
result
.
at
[
c
]
=
s
if
_
is_scalar
(
s
):
if
pdcom
.
is_scalar
(
s
):
need_convert
=
True
need_convert
=
True
else
:
else
:
need_dios
=
True
need_dios
=
True
...
@@ -628,7 +272,7 @@ class DictOfSeries:
...
@@ -628,7 +272,7 @@ class DictOfSeries:
if
need_convert
:
if
need_convert
:
for
c
in
result
.
index
:
for
c
in
result
.
index
:
result
.
at
[
c
]
=
pd
.
Series
(
result
[
c
])
result
.
at
[
c
]
=
pd
.
Series
(
result
[
c
])
itype
=
_find_least_common_itype
(
result
)
itype
=
_find_least_common_itype
(
result
)
result
=
DictOfSeries
(
data
=
result
,
itype
=
itype
,
fastpath
=
True
)
result
=
DictOfSeries
(
data
=
result
,
itype
=
itype
,
fastpath
=
True
)
else
:
else
:
raise
ValueError
(
axis
)
raise
ValueError
(
axis
)
...
@@ -739,6 +383,17 @@ class DictOfSeries:
...
@@ -739,6 +383,17 @@ class DictOfSeries:
data
=
self
.
for_each
(
'
astype
'
,
dtype
=
dtype
,
copy
=
copy
,
errors
=
errors
)
data
=
self
.
for_each
(
'
astype
'
,
dtype
=
dtype
,
copy
=
copy
,
errors
=
errors
)
return
DictOfSeries
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
return
DictOfSeries
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
def
memory_usage
(
self
,
index
=
True
,
deep
=
False
):
return
self
.
for_each
(
pd
.
Series
.
memory_usage
,
index
=
index
,
deep
=
deep
).
sum
()
def
to_df
(
self
):
df_or_ser
=
self
.
_data
.
apply
(
lambda
s
:
s
).
transpose
()
return
pd
.
DataFrame
()
if
isinstance
(
df_or_ser
,
pd
.
Series
)
else
df_or_ser
@property
def
debugDf
(
self
):
return
self
.
to_df
()
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Boolean stuff
# Boolean stuff
...
@@ -781,34 +436,6 @@ class DictOfSeries:
...
@@ -781,34 +436,6 @@ class DictOfSeries:
data
=
self
.
for_each
(
'
notna
'
)
data
=
self
.
for_each
(
'
notna
'
)
return
DictOfSeries
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
return
DictOfSeries
(
data
=
data
,
itype
=
self
.
itype
,
cast_policy
=
self
.
_policy
,
fastpath
=
True
)
# ----------------------------------------------------------------------
# if copy.copy() is copy.copy(): return copy.copy().copy()
def
__copy__
(
self
):
return
self
.
copy
(
deep
=
True
)
def
__deepcopy__
(
self
,
memo
=
None
):
return
self
.
copy
(
deep
=
True
)
def
copy
(
self
,
deep
=
True
):
if
deep
:
data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
self
.
columns
)
for
c
in
self
.
columns
:
data
.
at
[
c
]
=
self
.
_data
.
at
[
c
].
copy
(
deep
=
True
)
else
:
data
=
self
.
_data
kws
=
dict
(
itype
=
self
.
_itype
,
cast_policy
=
self
.
_policy
)
return
DictOfSeries
(
data
=
data
,
fastpath
=
True
,
**
kws
)
def
copy_empty
(
self
,
columns
=
True
):
data
=
None
if
columns
is
True
:
# is correct
data
=
pd
.
Series
(
dtype
=
'
O
'
,
index
=
self
.
columns
)
for
c
in
self
.
columns
:
data
.
at
[
c
]
=
pd
.
Series
(
dtype
=
self
.
_data
.
at
[
c
].
dtype
)
kws
=
dict
(
itype
=
self
.
_itype
,
cast_policy
=
self
.
_policy
)
return
DictOfSeries
(
data
=
data
,
fastpath
=
True
,
**
kws
)
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Rendering Methods
# Rendering Methods
...
@@ -875,113 +502,6 @@ class DictOfSeries:
...
@@ -875,113 +502,6 @@ class DictOfSeries:
return
pprint_dios
(
self
,
**
kwargs
)
return
pprint_dios
(
self
,
**
kwargs
)
def
memory_usage
(
self
,
index
=
True
,
deep
=
False
):
return
self
.
for_each
(
pd
.
Series
.
memory_usage
,
index
=
index
,
deep
=
deep
).
sum
()
def
to_df
(
self
):
df_or_ser
=
self
.
_data
.
apply
(
lambda
s
:
s
).
transpose
()
return
pd
.
DataFrame
()
if
isinstance
(
df_or_ser
,
pd
.
Series
)
else
df_or_ser
@property
def
debugDf
(
self
):
return
self
.
to_df
()
# ------------------------------------------------------------------------------
# Operators
def
_op1
(
self
,
op
):
new
=
self
.
copy_empty
(
columns
=
True
)
try
:
for
k
in
self
.
columns
:
new
[
k
]
=
op
(
self
[
k
])
except
Exception
as
e
:
raise
type
(
e
)(
f
"'
{
_OP_MAP
[
op
]
}
dios
'
failed:
"
+
str
(
e
))
from
e
return
new
def
_op2
(
self
,
op
,
other
,
align
=
True
,
inplace
=
False
):
def
raiseif
(
kself
,
kother
,
s
):
if
kself
!=
kother
:
raise
ValueError
(
f
"
{
s
}
does not match,
{
s
}
left:
{
kself
}
,
{
s
}
right:
{
kother
}
"
)
def
doalign
(
left
,
right
):
return
left
.
align
(
right
,
join
=
'
inner
'
)
if
align
else
(
left
,
right
)
def
gen
():
if
_is_dios_like
(
other
):
raiseif
(
list
(
self
),
list
(
other
),
'
keys
'
)
for
k
in
self
.
columns
:
left
,
right
=
self
[
k
],
other
[
k
]
yield
k
,
op
(
*
doalign
(
left
,
right
))
elif
isinstance
(
other
,
pd
.
Series
):
for
k
in
self
.
columns
:
left
,
right
=
self
[
k
],
other
yield
k
,
op
(
*
doalign
(
left
,
right
))
elif
_is_dict_like
(
other
):
raiseif
(
sorted
(
self
),
sorted
(
other
),
'
keys
'
)
for
k
in
self
.
columns
:
yield
k
,
op
(
self
[
k
],
other
[
k
])
elif
_is_nested_list_like
(
other
):
raiseif
(
len
(
self
),
len
(
other
),
'
length
'
)
for
i
,
k
in
enumerate
(
self
.
columns
):
yield
k
,
op
(
self
[
k
],
other
[
i
])
elif
_is_scalar
(
other
)
or
_is_list_like
(
other
):
for
k
in
self
.
columns
:
yield
k
,
op
(
self
[
k
],
other
)
else
:
raise
NotImplementedError
new
=
self
if
inplace
else
self
.
copy_empty
(
columns
=
True
)
try
:
for
k
,
val
in
gen
():
new
[
k
]
=
val
except
Exception
as
e
:
raise
type
(
e
)(
f
"'
dios
{
_OP_MAP
[
op
]
}
other
'
failed:
"
+
str
(
e
))
from
e
return
new
__neg__
=
ftools
.
partialmethod
(
_op1
,
op
.
neg
)
__abs__
=
ftools
.
partialmethod
(
_op1
,
op
.
abs
)
__invert__
=
ftools
.
partialmethod
(
_op1
,
op
.
inv
)
__eq__
=
ftools
.
partialmethod
(
_op2
,
op
.
eq
,
align
=
False
)
__ne__
=
ftools
.
partialmethod
(
_op2
,
op
.
ne
,
align
=
False
)
__le__
=
ftools
.
partialmethod
(
_op2
,
op
.
le
,
align
=
False
)
__ge__
=
ftools
.
partialmethod
(
_op2
,
op
.
ge
,
align
=
False
)
__lt__
=
ftools
.
partialmethod
(
_op2
,
op
.
lt
,
align
=
False
)
__gt__
=
ftools
.
partialmethod
(
_op2
,
op
.
gt
,
align
=
False
)
__add__
=
ftools
.
partialmethod
(
_op2
,
op
.
add
)
__sub__
=
ftools
.
partialmethod
(
_op2
,
op
.
sub
)
__mul__
=
ftools
.
partialmethod
(
_op2
,
op
.
mul
)
__mod__
=
ftools
.
partialmethod
(
_op2
,
op
.
mod
)
__truediv__
=
ftools
.
partialmethod
(
_op2
,
op
.
truediv
)
__floordiv__
=
ftools
.
partialmethod
(
_op2
,
op
.
floordiv
)
__pow__
=
ftools
.
partialmethod
(
_op2
,
op
.
pow
)
__and__
=
ftools
.
partialmethod
(
_op2
,
op
.
and_
)
__or__
=
ftools
.
partialmethod
(
_op2
,
op
.
or_
)
__xor__
=
ftools
.
partialmethod
(
_op2
,
op
.
xor
)
# ------------------------------------------------------------------------------
# Indexer
@property
def
loc
(
self
):
return
_LocIndexer
(
self
)
@property
def
iloc
(
self
):
return
_iLocIndexer
(
self
)
@property
def
aloc
(
self
):
return
_aLocIndexer
(
self
)
@property
def
at
(
self
):
return
_AtIndexer
(
self
)
@property
def
iat
(
self
):
return
_iAtIndexer
(
self
)
def
_empty_repr
(
di
):
def
_empty_repr
(
di
):
return
f
"
Empty DictOfSeries
\n
"
\
return
f
"
Empty DictOfSeries
\n
"
\
...
@@ -1119,35 +639,6 @@ def _to_aligned_df(dios, no_value=' '):
...
@@ -1119,35 +639,6 @@ def _to_aligned_df(dios, no_value=' '):
return
df
return
df
def
_is_list_like_not_nested
(
obj
):
return
_is_list_like
(
obj
)
and
not
_is_nested_list_like
(
obj
)
def
_is_dios_like
(
obj
)
->
bool
:
# must have columns
# columns is some kind of pd.Index
# iter will iter through columns
# a `in` obj check if obj is in columns
# obj[key] will give a pd.Series
# obj.squeeze() give pd.Series if len(obj) == 1
return
isinstance
(
obj
,
DictOfSeries
)
or
isinstance
(
obj
,
pd
.
DataFrame
)
def
_is_bool_series
(
obj
)
->
bool
:
return
isinstance
(
obj
,
pd
.
Series
)
and
obj
.
dtype
==
bool
def
_is_bool_dios_like
(
obj
)
->
bool
:
if
not
_is_dios_like
(
obj
):
return
False
dtypes
=
obj
.
dtypes
if
(
dtypes
==
bool
).
all
():
return
True
if
(
dtypes
==
'
O
'
).
any
():
return
obj
.
apply
(
_is_bool_indexer
).
all
()
return
False
def
to_dios
(
obj
)
->
DictOfSeries
:
def
to_dios
(
obj
)
->
DictOfSeries
:
if
isinstance
(
obj
,
DictOfSeries
):
if
isinstance
(
obj
,
DictOfSeries
):
return
obj
return
obj
...
@@ -1163,6 +654,3 @@ def __monkey_patch_pandas():
...
@@ -1163,6 +654,3 @@ def __monkey_patch_pandas():
__monkey_patch_pandas
()
__monkey_patch_pandas
()
# keep this here to prevent cyclic import
from
.indexer
import
_aLocIndexer
,
_iLocIndexer
,
_LocIndexer
,
_iAtIndexer
,
_AtIndexer
This diff is collapsed.
Click to expand it.
dios/indexer.py
+
4
−
6
View file @
5f9f19aa
from
.
dios
import
(
from
.
base
import
(
Di
ctOfSeries
,
_
Di
osBase
,
_is_dios_like
,
_is_dios_like
,
_is_bool_series
,
_is_list_like_not_nested
,
_is_bool_dios_like
,
_is_bool_dios_like
,
_is_iterator
)
_is_iterator
)
...
@@ -23,7 +21,7 @@ _is_null_slice = ccom.is_null_slice
...
@@ -23,7 +21,7 @@ _is_null_slice = ccom.is_null_slice
class
_Indexer
:
class
_Indexer
:
def
__init__
(
self
,
obj
:
Di
ctOfSeries
):
def
__init__
(
self
,
obj
:
_
Di
osBase
):
self
.
obj
=
obj
self
.
obj
=
obj
self
.
_data
=
obj
.
_data
self
.
_data
=
obj
.
_data
...
@@ -271,7 +269,7 @@ class _aLocIndexer(_Indexer):
...
@@ -271,7 +269,7 @@ class _aLocIndexer(_Indexer):
if
lowdim
:
if
lowdim
:
return
data
.
squeeze
()
return
data
.
squeeze
()
else
:
else
:
return
DictOfSeries
(
data
=
data
,
fastpath
=
True
,
**
kws
)
return
self
.
obj
.
_constructor
(
data
=
data
,
fastpath
=
True
,
**
kws
)
def
__setitem__
(
self
,
key
,
value
):
def
__setitem__
(
self
,
key
,
value
):
rowkeys
,
colkeys
,
_
=
self
.
_unpack_key_aloc
(
key
)
rowkeys
,
colkeys
,
_
=
self
.
_unpack_key_aloc
(
key
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment