Skip to content
Snippets Groups Projects
Commit 0a80f8fe authored by Bert Palm's avatar Bert Palm 🎇
Browse files

private _reduce_horizontal() become public reduce_columns()

parent 2ce3840a
No related branches found
No related tags found
No related merge requests found
...@@ -195,7 +195,7 @@ class DictOfSeries(_DiosBase): ...@@ -195,7 +195,7 @@ class DictOfSeries(_DiosBase):
yield idx, DictOfSeries(data=row.to_dict(), index=[idx]) yield idx, DictOfSeries(data=row.to_dict(), index=[idx])
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Broadcasting methods and helper # Broadcasting and Reducing
def for_each(self, attr_or_callable, **kwds): def for_each(self, attr_or_callable, **kwds):
""" """
...@@ -354,42 +354,61 @@ class DictOfSeries(_DiosBase): ...@@ -354,42 +354,61 @@ class DictOfSeries(_DiosBase):
raise ValueError(axis) raise ValueError(axis)
return result return result
def _reduce_horizontal(self, func, initializer_value): def reduce_columns(self, func, initial=None, skipna=False):
""" """
Reduce values of all columns to a single pandas.Series by a given function. Reduce all columns to a single pandas.Series by a given function.
A given function is called on pairs of columns, and the result is used Apply a function of two pandas.Series as arguments, cumulatively to all
for next pair-call. Because not all columns necessarily share the same columns, from left to right, so as to reduce the columns to a single
index, some indices (and its corresponding values) may just seen once. pandas.Series. If initial is present, it is placed before the columns
Therefore, every firstly seen index' values are reduced against a dummy in the calculation, and serves as a default when the columns are empty.
series of the initializer_value.
Parameters Parameters
---------- ----------
func: function func : function
The function must take two series and must return a single series. The function must take two identically indexed pandas.Series and should
Both input series will the same index and the returned one also return a single pandas.Series with the same index.
should have it.
initial : column-label or pd.Series, default None
initializer_value: Any The series to start with. If None a dummy series is created, with the
A value that is overwritten, by any(!) other value, if the indices of all columns and the first seen values.
``func`` is evaluated. This is mandatory and also must apply,
if the value is not present in any of the columns! skipna : bool, default False
E.g. ``False`` for ``func=lambda s1,s2: s1 | s2`` or If True, skip NaN values.
``0`` for ``func=max`` if all values are positive integers.
Returns Returns
------- -------
pandas.Series pandas.Series
A series that have a unique index with the union of indexes A series with the reducing result and the index of the start series,
of all columns and the function result as values. defined by ``initializer``.
""" """
res = pd.Series(data=initializer_value, index=self.index_of('all')) if initial is None:
for d in self._data: value = pd.Series(index=self.index_of('all'))
base = res.loc[d.index] for d in self._data:
if len(base) > 0: value = value.combine_first(d)
res.loc[d.index] = func(base, d) elif isinstance(initial, pd.Series):
return res value = initial.copy()
elif initial in self.columns:
value = self._data.at[initial].copy()
else:
raise ValueError("initial must be pd.Series, a column label or None")
if skipna:
val = value.dropna()
data = self.dropna()._data
else:
val = value
data = self._data
for d in data:
idx = val.index & d.index
if len(idx) > 0:
l, r = val.loc[idx], d.loc[idx]
val.loc[idx] = func(l, r)
if skipna:
value.loc[val.index] = val
return value
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Misc methods # Misc methods
...@@ -478,10 +497,7 @@ class DictOfSeries(_DiosBase): ...@@ -478,10 +497,7 @@ class DictOfSeries(_DiosBase):
return self.for_each(pd.Series.min, skipna=skipna) return self.for_each(pd.Series.min, skipna=skipna)
elif axis in [1, 'columns']: elif axis in [1, 'columns']:
func = lambda s1, s2: s1.where(s1 < s2, s2) func = lambda s1, s2: s1.where(s1 < s2, s2)
res = self._reduce_horizontal(func, np.inf) return self.reduce_columns(func, skipna=skipna)
if not skipna:
res.loc[self.isna().any(axis=1)] = np.nan
return res
raise ValueError(axis) raise ValueError(axis)
def max(self, axis=None, skipna=None): def max(self, axis=None, skipna=None):
...@@ -489,10 +505,7 @@ class DictOfSeries(_DiosBase): ...@@ -489,10 +505,7 @@ class DictOfSeries(_DiosBase):
return self.for_each(pd.Series.min, skipna=skipna) return self.for_each(pd.Series.min, skipna=skipna)
elif axis in [1, 'columns']: elif axis in [1, 'columns']:
func = lambda s1, s2: s1.where(s1 > s2, s2) func = lambda s1, s2: s1.where(s1 > s2, s2)
res = self._reduce_horizontal(func, -np.inf) return self.reduce_columns(func, skipna=skipna)
if not skipna:
res.loc[self.isna().any(axis=1)] = np.nan
return res
raise ValueError(axis) raise ValueError(axis)
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
...@@ -514,7 +527,8 @@ class DictOfSeries(_DiosBase): ...@@ -514,7 +527,8 @@ class DictOfSeries(_DiosBase):
return self._data.apply(all) return self._data.apply(all)
elif axis in [1, 'columns']: elif axis in [1, 'columns']:
func = lambda s1, s2: s1.astype(bool) & s2.astype(bool) func = lambda s1, s2: s1.astype(bool) & s2.astype(bool)
return self._reduce_horizontal(func, True) init = pd.Series(True, dtype=bool, index=self.index_of('all'))
return self.reduce_columns(func, init)
elif axis is None: elif axis is None:
return self._data.apply(all).all() return self._data.apply(all).all()
raise ValueError(axis) raise ValueError(axis)
...@@ -524,7 +538,8 @@ class DictOfSeries(_DiosBase): ...@@ -524,7 +538,8 @@ class DictOfSeries(_DiosBase):
return self._data.apply(any) return self._data.apply(any)
elif axis in [1, 'columns']: elif axis in [1, 'columns']:
func = lambda s1, s2: s1.astype(bool) | s2.astype(bool) func = lambda s1, s2: s1.astype(bool) | s2.astype(bool)
return self._reduce_horizontal(func, False) init = pd.Series(False, dtype=bool, index=self.index_of('all'))
return self.reduce_columns(func, init)
elif axis is None: elif axis is None:
return self._data.apply(any).any() return self._data.apply(any).any()
raise ValueError(axis) raise ValueError(axis)
...@@ -554,7 +569,8 @@ class DictOfSeries(_DiosBase): ...@@ -554,7 +569,8 @@ class DictOfSeries(_DiosBase):
return data.for_each('hasnans') return data.for_each('hasnans')
elif axis in [1, 'columns']: elif axis in [1, 'columns']:
func = lambda s1, s2: s1.isna() | s2.isna() func = lambda s1, s2: s1.isna() | s2.isna()
return data._reduce_horizontal(func, False) init = pd.Series(False, dtype=bool, index=self.index_of('all'))
return data.reduce_columns(func, init)
elif axis is None: elif axis is None:
return self.isna(drop_empty=drop_empty) return self.isna(drop_empty=drop_empty)
raise ValueError(axis) raise ValueError(axis)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment