Skip to content
Snippets Groups Projects
Commit 0a80f8fe authored by Bert Palm's avatar Bert Palm 🎇
Browse files

private _reduce_horizontal() become public reduce_columns()

parent 2ce3840a
No related branches found
No related tags found
No related merge requests found
......@@ -195,7 +195,7 @@ class DictOfSeries(_DiosBase):
yield idx, DictOfSeries(data=row.to_dict(), index=[idx])
# ------------------------------------------------------------------------------
# Broadcasting methods and helper
# Broadcasting and Reducing
def for_each(self, attr_or_callable, **kwds):
"""
......@@ -354,42 +354,61 @@ class DictOfSeries(_DiosBase):
raise ValueError(axis)
return result
def _reduce_horizontal(self, func, initializer_value):
def reduce_columns(self, func, initial=None, skipna=False):
"""
Reduce values of all columns to a single pandas.Series by a given function.
Reduce all columns to a single pandas.Series by a given function.
A given function is called on pairs of columns, and the result is used
for next pair-call. Because not all columns necessarily share the same
index, some indices (and its corresponding values) may just seen once.
Therefore, every firstly seen index' values are reduced against a dummy
series of the initializer_value.
Apply a function of two pandas.Series as arguments, cumulatively to all
columns, from left to right, so as to reduce the columns to a single
pandas.Series. If initial is present, it is placed before the columns
in the calculation, and serves as a default when the columns are empty.
Parameters
----------
func: function
The function must take two series and must return a single series.
Both input series will the same index and the returned one also
should have it.
initializer_value: Any
A value that is overwritten, by any(!) other value, if the
``func`` is evaluated. This is mandatory and also must apply,
if the value is not present in any of the columns!
E.g. ``False`` for ``func=lambda s1,s2: s1 | s2`` or
``0`` for ``func=max`` if all values are positive integers.
func : function
The function must take two identically indexed pandas.Series and should
return a single pandas.Series with the same index.
initial : column-label or pd.Series, default None
The series to start with. If None a dummy series is created, with the
indices of all columns and the first seen values.
skipna : bool, default False
If True, skip NaN values.
Returns
-------
pandas.Series
A series that have a unique index with the union of indexes
of all columns and the function result as values.
A series with the reducing result and the index of the start series,
defined by ``initializer``.
"""
res = pd.Series(data=initializer_value, index=self.index_of('all'))
for d in self._data:
base = res.loc[d.index]
if len(base) > 0:
res.loc[d.index] = func(base, d)
return res
if initial is None:
value = pd.Series(index=self.index_of('all'))
for d in self._data:
value = value.combine_first(d)
elif isinstance(initial, pd.Series):
value = initial.copy()
elif initial in self.columns:
value = self._data.at[initial].copy()
else:
raise ValueError("initial must be pd.Series, a column label or None")
if skipna:
val = value.dropna()
data = self.dropna()._data
else:
val = value
data = self._data
for d in data:
idx = val.index & d.index
if len(idx) > 0:
l, r = val.loc[idx], d.loc[idx]
val.loc[idx] = func(l, r)
if skipna:
value.loc[val.index] = val
return value
# ------------------------------------------------------------------------------
# Misc methods
......@@ -478,10 +497,7 @@ class DictOfSeries(_DiosBase):
return self.for_each(pd.Series.min, skipna=skipna)
elif axis in [1, 'columns']:
func = lambda s1, s2: s1.where(s1 < s2, s2)
res = self._reduce_horizontal(func, np.inf)
if not skipna:
res.loc[self.isna().any(axis=1)] = np.nan
return res
return self.reduce_columns(func, skipna=skipna)
raise ValueError(axis)
def max(self, axis=None, skipna=None):
......@@ -489,10 +505,7 @@ class DictOfSeries(_DiosBase):
return self.for_each(pd.Series.min, skipna=skipna)
elif axis in [1, 'columns']:
func = lambda s1, s2: s1.where(s1 > s2, s2)
res = self._reduce_horizontal(func, -np.inf)
if not skipna:
res.loc[self.isna().any(axis=1)] = np.nan
return res
return self.reduce_columns(func, skipna=skipna)
raise ValueError(axis)
# ----------------------------------------------------------------------
......@@ -514,7 +527,8 @@ class DictOfSeries(_DiosBase):
return self._data.apply(all)
elif axis in [1, 'columns']:
func = lambda s1, s2: s1.astype(bool) & s2.astype(bool)
return self._reduce_horizontal(func, True)
init = pd.Series(True, dtype=bool, index=self.index_of('all'))
return self.reduce_columns(func, init)
elif axis is None:
return self._data.apply(all).all()
raise ValueError(axis)
......@@ -524,7 +538,8 @@ class DictOfSeries(_DiosBase):
return self._data.apply(any)
elif axis in [1, 'columns']:
func = lambda s1, s2: s1.astype(bool) | s2.astype(bool)
return self._reduce_horizontal(func, False)
init = pd.Series(False, dtype=bool, index=self.index_of('all'))
return self.reduce_columns(func, init)
elif axis is None:
return self._data.apply(any).any()
raise ValueError(axis)
......@@ -554,7 +569,8 @@ class DictOfSeries(_DiosBase):
return data.for_each('hasnans')
elif axis in [1, 'columns']:
func = lambda s1, s2: s1.isna() | s2.isna()
return data._reduce_horizontal(func, False)
init = pd.Series(False, dtype=bool, index=self.index_of('all'))
return data.reduce_columns(func, init)
elif axis is None:
return self.isna(drop_empty=drop_empty)
raise ValueError(axis)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment