...
 
Commits (2)
......@@ -5,6 +5,7 @@ from .lib import _find_least_common_itype
import functools as ftools
import pandas as pd
import pandas.core.dtypes.common as pdcom
import pandas.core.common as pdcorecom
import numpy as np
......@@ -582,6 +583,98 @@ class DictOfSeries(_DiosBase):
data = self.for_each('astype', dtype=dtype, copy=copy, errors=errors)
return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
def _mask_or_where(self, cond, other=np.nan, inplace=False, mask=True):
""" helper to mask/where """
data = self if inplace else self.copy()
if callable(other):
other = other(data)
if callable(cond):
cond = cond(data)
# if DictOfSeries is bool,
# is already checked in aloc
elif not _is_dios_like(cond):
if not pdcorecom.is_bool_indexer(cond):
raise ValueError("Object with boolean entries only expected for the condition")
if mask:
data.aloc[cond] = other
else:
data.aloc[~cond] = other
if inplace:
return None
return data
def where(self, cond, other=np.nan, inplace=False):
"""
Replace values where the condition is False.
Parameters
----------
cond : bool DictOfSeries, Series, array-like, or callable
Where cond is True, keep the original value. Where False, replace
with corresponding value from other. If cond is callable, it is computed
on the DictOfSeries and should return boolean DictOfSeries or array.
The callable must not change input DictOfSeries (though dios doesn’t check it).
If cond is a bool Series, every column is (row-)aligned against it, before the
boolean values are evaluated. Missing indices are treated like False values.
other : scalar, Series, DictOfSeries, or callable
Entries where cond is False are replaced with corresponding value from other.
If other is callable, it is computed on the DictOfSeries and should return scalar
or DictOfSeries. The callable must not change input DictOfSeries (though dios doesn’t check it).
If other is a Series, every column is (row-)aligned against it, before the values
are written. NAN's are written for missing indices.
inplace : bool, default False
Whether to perform the operation in place on the data.
Returns
-------
DictOfSeries
See Also
--------
mask: Mask data where condition is True
"""
return self._mask_or_where(cond=cond, other=other, inplace=inplace, mask=False)
def mask(self, cond, other=np.nan, inplace=False):
"""
Replace values where the condition is True.
Parameters
----------
cond : bool DictOfSeries, Series, array-like, or callable
Where cond is False, keep the original value. Where True, replace
with corresponding value from other. If cond is callable, it is computed
on the DictOfSeries and should return boolean DictOfSeries or array.
The callable must not change input DictOfSeries (though dios doesn’t check it).
If cond is a bool Series, every column is (row-)aligned against it, before the
boolean values are evaluated. Missing indices are treated like False values.
other : scalar, Series, DictOfSeries, or callable
Entries where cond is True are replaced with corresponding value from other.
If other is callable, it is computed on the DictOfSeries and should return scalar
or DictOfSeries. The callable must not change input DictOfSeries (though dios doesn’t check it).
If other is a Series, every column is (row-)aligned against it, before the values
are written. NAN's are written for missing indices.
inplace : bool, default False
Whether to perform the operation in place on the data.
Returns
-------
DictOfSeries
See Also
--------
mask: Mask data where condition is False
"""
return self._mask_or_where(cond=cond, other=other, inplace=inplace, mask=True)
def memory_usage(self, index=True, deep=False):
return self.for_each(pd.Series.memory_usage, index=index, deep=deep).sum()
......@@ -645,12 +738,34 @@ class DictOfSeries(_DiosBase):
raise ValueError(axis)
# ----------------------------------------------------------------------
# Boolean stuff
# Boolean and empty stuff
def equals(self, other):
"""
Test whether two DictOfSeries contain the same elements.
This function allows two DictOfSeries to be compared against each other to see
if they have the same shape and elements. NaNs in the same location are considered equal.
The column headers do not need to have the same type, but the elements within the columns
must be the same dtype.
Parameters
----------
other: DictOfSeries
The other DictOfSeries to compare with.
Returns
-------
bool
True if all elements are the same in both DictOfSeries, False otherwise.
"""
if not isinstance(other, DictOfSeries):
return False
try:
eqna = (self.isna() == other.isna()).all(None)
return eqna and (self.dropna() == other.dropna()).all(None)
eq_nans = (self.isna() == other.isna()).all(None)
eq_data = (self.dropna() == other.dropna()).all(None)
eq_dtypes = (self.dtypes == other.dtypes).all()
return eq_nans and eq_dtypes and eq_data
except Exception:
return False
......