From b8d571f0fb7a12c7fe006052c6d6ad0b9703d44d Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Tue, 21 Apr 2020 15:52:07 +0200
Subject: [PATCH] docs docs docs

---
 dios/base.py        |  39 +++++++++++
 dios/dios.py        | 157 +++++++++++++++++++++++++++++++++++++++++---
 dox/doc_indexing.md |  52 +++++++--------
 3 files changed, 212 insertions(+), 36 deletions(-)

diff --git a/dios/base.py b/dios/base.py
index f6679cb..6e77bc4 100644
--- a/dios/base.py
+++ b/dios/base.py
@@ -460,6 +460,45 @@ class _DiosBase:
         return self._constructor(data=data, fastpath=True, **kws)
 
     def copy_empty(self, columns=True):
+        """
+        Return a new DictOfSeries object, with same properties than the original. 
+        Parameters
+        ----------
+        columns: bool, default True
+             If ``True``, the copy will have the same, but empty columns like the original.
+
+        Returns
+        -------
+        DictOfSeries: empty copy
+
+        Examples
+        --------
+
+        >>> di = DictOfSeries({'A': range(2), 'B': range(3)})
+        >>> di
+           A |    B |
+        ==== | ==== |
+        0  0 | 0  0 |
+        1  1 | 1  1 |
+             | 2  2 |
+
+        >>> empty = di.copy_empty()
+        >>> empty
+        Empty DictOfSeries
+        Columns: ['A', 'B']
+
+        The properties are the same, eg.
+
+        >>> empty.itype == di.itype
+        True
+        >>> empty.cast_policy == di.cast_policy
+        True
+        >>> empty.dtypes == di.dtypes
+        columns
+        A    True
+        B    True
+        dtype: bool
+        """
         data = None
         if columns is True:  # is correct
             data = pd.Series(dtype='O', index=self.columns)
diff --git a/dios/dios.py b/dios/dios.py
index 5742cd2..875b797 100644
--- a/dios/dios.py
+++ b/dios/dios.py
@@ -324,7 +324,69 @@ class DictOfSeries(_DiosBase):
         See Also
         --------
         DictOfSeries.for_each: apply pd.Series methods or properties to each column
-        """
+
+        Examples
+        --------
+
+        We use the example DictOfSeries from :ref:`indexing <doc_indexing:Example dios>`.
+
+        >>> di = di[:5]
+            a |    b |     c |     d |
+        ===== | ==== | ===== | ===== |
+        0   0 | 2  5 | 4   7 | 6   0 |
+        1   7 | 3  6 | 5  17 | 7   1 |
+        2  14 | 4  7 | 6  27 | 8   2 |
+        3  21 | 5  8 | 7  37 | 9   3 |
+        4  28 | 6  9 | 8  47 | 10  4 |
+
+        >>> di.apply(max)
+        columns
+        a    28
+        b     9
+        c    47
+        d     4
+        dtype: int64
+
+        >>> di.apply(pd.Series.count)
+        columns
+        a    5
+        b    5
+        c    5
+        d    5
+        dtype: int64
+
+        One can pass keyword arguments directly..
+
+        >>> di.apply(pd.Series.value_counts, normalize=True)
+              a |      b |       c |      d |
+        ======= | ====== | ======= | ====== |
+        7   0.2 | 7  0.2 | 7   0.2 | 4  0.2 |
+        14  0.2 | 6  0.2 | 37  0.2 | 3  0.2 |
+        21  0.2 | 5  0.2 | 47  0.2 | 2  0.2 |
+        28  0.2 | 9  0.2 | 27  0.2 | 1  0.2 |
+        0   0.2 | 8  0.2 | 17  0.2 | 0  0.2 |
+
+        Or define a own funtion..
+
+        >>> di.apply(lambda s : 'high' if max(s) > 10 else 'low')
+        columns
+        a    high
+        b     low
+        c    high
+        d     low
+        dtype: object
+
+        And also more advanced functions that return a list-like can be given. Note that
+        the returned lists not necessarily must have the same length.
+
+        >>> func = lambda s : ('high', max(s), min(s)) if min(s) > (max(s)//2) else ('low',max(s))
+        >>> di.apply(func)
+             a |       b |      c |      d |
+        ====== | ======= | ====== | ====== |
+        0  low | 0  high | 0  low | 0  low |
+        1   28 | 1     9 | 1   47 | 1    4 |
+               | 2     5 |        |        |
+              """
         if axis in [1, 'columns']:
             raise NotImplementedError
 
@@ -485,11 +547,46 @@ class DictOfSeries(_DiosBase):
         return self.for_each(pd.Series.memory_usage, index=index, deep=deep).sum()
 
     def to_df(self):
+        """
+        Transform DictOfSeries to a pandas.DataFrame.
+
+        Because a pandas.DataFrame can not handle Series of different
+        length, but DictOfSeries can, the missing data is filled with
+        NaNs.
+
+        Returns
+        -------
+        pandas.DataFrame: transformed data
+        
+        Examples
+        --------
+
+        Missing data locations are filled with NaN's
+
+        >>> a = pd.Series(11, index=range(2))
+        >>> b = pd.Series(22, index=range(3))
+        >>> c = pd.Series(33, index=range(1,9,3))
+        >>> di = DictOfSeries(dict(a=a, b=b, c=c))
+        >>> di
+            a |     b |     c |
+        ===== | ===== | ===== |
+        0  11 | 0  22 | 1  33 |
+        1  11 | 1  22 | 4  33 |
+              | 2  22 | 7  33 |
+        >>> di.to_df()
+        columns     a     b     c
+        0        11.0  22.0   NaN
+        1        11.0  22.0  33.0
+        2         NaN  22.0   NaN
+        4         NaN   NaN  33.0
+        7         NaN   NaN  33.0
+        """
         df_or_ser = self._data.apply(lambda s: s).transpose()
         return pd.DataFrame() if isinstance(df_or_ser, pd.Series) else df_or_ser
 
     @property
     def debugDf(self):
+        """ Alias for ``to_df()`` as property, for debugging purpose."""
         return self.to_df()
 
     def min(self, axis=0, skipna=True):
@@ -523,6 +620,29 @@ class DictOfSeries(_DiosBase):
         return DictOfSeries(data=data, itype=self.itype, cast_policy=self._policy, fastpath=True)
 
     def all(self, axis=0):
+        """
+        Return whether all elements are True, potentially over an axis.
+
+        Returns True unless there at least one element within a series
+        or along a DictOfSeries axis that is False or equivalent (e.g. zero or empty).
+
+        Parameters
+        ----------
+        axis : {0 or â€˜indexâ€™, 1 or â€˜columnsâ€™, None}, default 0
+            Indicate which axis or axes should be reduced.
+             * 0 / â€˜indexâ€™ : reduce the index, return a Series whose index is the original column labels.
+             * 1 / â€˜columnsâ€™ : reduce the columns, return a Series whose index is the union of all columns indexes.
+             * None : reduce all axes, return a scalar.
+
+        Returns
+        -------
+        pandas.Series
+
+        See Also
+        --------
+        pandas.Series.all: Return True if all elements are True.
+        any: Return True if one (or more) elements are True.
+        """
         if axis in [0, 'index']:
             return self._data.apply(all)
         elif axis in [1, 'columns']:
@@ -534,6 +654,29 @@ class DictOfSeries(_DiosBase):
         raise ValueError(axis)
 
     def any(self, axis=0):
+        """
+        Return whether any element is True, potentially over an axis.
+
+        Returns False unless there at least one element within a series
+        or along a DictOfSeries axis that is True or equivalent (e.g. non-zero or non-empty).
+
+        Parameters
+        ----------
+        axis : {0 or â€˜indexâ€™, 1 or â€˜columnsâ€™, None}, default 0
+            Indicate which axis or axes should be reduced.
+             * 0 / â€˜indexâ€™ : reduce the index, return a Series whose index is the original column labels.
+             * 1 / â€˜columnsâ€™ : reduce the columns, return a Series whose index is the union of all columns indexes.
+             * None : reduce all axes, return a scalar.
+
+        Returns
+        -------
+        pandas.Series
+
+        See Also
+        --------
+        pandas.Series.any: Return whether any element is True.
+        all: Return True if all elements are True.
+        """
         if axis in [0, 'index']:
             return self._data.apply(any)
         elif axis in [1, 'columns']:
@@ -584,21 +727,15 @@ class DictOfSeries(_DiosBase):
         return ~ self.isempty()
 
     def isdata(self):
-        """ Alias for ``DictOfSeries.notna(drop_empty=True)``. """
+        """ Alias for ``notna(drop_empty=True)``. """
         return self.notna(drop_empty=True)
 
     def isnull(self, drop_empty=False):
-        """ Alias for `isna()`
-
-        See Also
-        --------
-            isna : some foo
-
-        """
+        """ Alias for ``isna()`` """
         return self.isna(drop_empty=drop_empty)
 
     def notnull(self, drop_empty=False):
-        """ Alias, see ``DictOfSeries.notna``. """
+        """ Alias, see ``notna()``. """
         return self.notna(drop_empty=drop_empty)
 
     # ----------------------------------------------------------------------
diff --git a/dox/doc_indexing.md b/dox/doc_indexing.md
index 9a9f830..fc430ed 100644
--- a/dox/doc_indexing.md
+++ b/dox/doc_indexing.md
@@ -101,15 +101,15 @@ each column separately.
 So maybe a first example gives an rough idea:
 ```
 >>> s = pd.Series([11] * 4 )
->>> d = DictOfSeries(dict(a=s[:2]*6, b=s[2:4]*7, c=s[:2]*8, d=s[1:3]*9))
->>> d
+>>> di = DictOfSeries(dict(a=s[:2]*6, b=s[2:4]*7, c=s[:2]*8, d=s[1:3]*9))
+>>> di
     a |     b |     c |     d | 
 ===== | ===== | ===== | ===== | 
 0  66 | 2  77 | 0  88 | 1  99 | 
 1  66 | 3  77 | 1  88 | 2  99 | 
 
 
->>> d.aloc[[1,2], ['a', 'b', 'd', 'x']]
+>>> di.aloc[[1,2], ['a', 'b', 'd', 'x']]
     a |     b |     d | 
 ===== | ===== | ===== | 
 1  66 | 2  77 | 1  99 | 
@@ -201,18 +201,18 @@ Example dios
 The dios used in the examples, unless stated otherwise:
 
 ``` 
-# generate dict
+# generate example DictOfSeries
 >>> sa = pd.Series(range(0, 70, 7))
 >>> sb = pd.Series(range(5, 15, 1))
 >>> sc = pd.Series(range(7, 107, 10))
 >>> sd = pd.Series(range(0, 10, 1))
 >>> for i, s in enumerate([sa,sb,sc,sd]): s.index += i*2
->>> d = DictOfSeries(dict(a=sa, b=sb, c=sc, d=sd))[:5]
+>>> di = DictOfSeries(dict(a=sa, b=sb, c=sc, d=sd))[:5]
 ```
 
 Looks like so:
 ```
->>> d
+>>> di
     a |    b |     c |     d | 
 ===== | ==== | ===== | ===== | 
 0   0 | 2  5 | 4   7 | 6   0 | 
@@ -230,7 +230,7 @@ The underling pandas.Series is returned, if the key exist.
 Otherwise a empty pandas.Series with `dtype=object` is returned.
 
 ```
->>> d.aloc[:, 'a']
+>>> di.aloc[:, 'a']
 0     0
 1     7
 2    14
@@ -238,7 +238,7 @@ Otherwise a empty pandas.Series with `dtype=object` is returned.
 4    28
 Name: a, dtype: int64
 
->>> d.aloc[:, 'x']
+>>> di.aloc[:, 'x']
 Series([], dtype: object)
 ```
 
@@ -250,7 +250,7 @@ A dios is returned, with a subset of the existing columns.
 If no key is present a empty dios is returned. 
 
 ```
->>> d.aloc[:, ['c', 99, None, 'a', 'x', 'y']]
+>>> di.aloc[:, ['c', 99, None, 'a', 'x', 'y']]
     a |     c | 
 ===== | ===== | 
 0   0 | 4   7 | 
@@ -259,7 +259,7 @@ If no key is present a empty dios is returned.
 3  21 | 7  37 | 
 4  28 | 8  47 | 
 
->>> d.aloc[:, ['x', 'y']]
+>>> di.aloc[:, ['x', 'y']]
 Empty DictOfSeries
 Columns: []
 
@@ -298,16 +298,16 @@ For scalar and array-like indexer with label values, the keys are handled gracef
 array-like column indexers.
 
 ``` 
->>> d.aloc[1]
+>>> di.aloc[1]
    a |       b |       c |       d | 
 ==== | ======= | ======= | ======= | 
 1  7 | no data | no data | no data | 
 
->>> d.aloc[99]
+>>> di.aloc[99]
 Empty DictOfSeries
 Columns: ['a', 'b', 'c', 'd']
 
->>> d.aloc[[3,6,7,18]]
+>>> di.aloc[[3,6,7,18]]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 3  21 | 3  6 | 6  27 | 6  0 | 
@@ -316,7 +316,7 @@ Columns: ['a', 'b', 'c', 'd']
 
 The length of columns can differ:
 ``` 
->>> d.aloc[[3,6,7,18]].aloc[[3,6]]
+>>> di.aloc[[3,6,7,18]].aloc[[3,6]]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 3  21 | 3  6 | 6  27 | 6  0 | 
@@ -329,7 +329,7 @@ Boolean array-likes as row indexer
 For array-like indexer that hold boolean values, the length of the indexer and
 the length of all column(s) to index must match.
 ``` 
->>> d.aloc[[True,False,False,True,False]]
+>>> di.aloc[[True,False,False,True,False]]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 0   0 | 2  5 | 4   7 | 6  0 | 
@@ -337,7 +337,7 @@ the length of all column(s) to index must match.
 ```
 If the length does not match a `IndexError` is raised:
 ```
->>> d.aloc[[True,False,False]]
+>>> di.aloc[[True,False,False]]
 Traceback (most recent call last):
   ...
   IndexError: failed for column a: Boolean index has wrong length: 3 instead of 5
@@ -365,7 +365,7 @@ When using a pandas.Series as row indexer with `aloc`, all its magic comes to li
 The index of the given series align itself with the index of each column separately and is this way used as a filter.
 
 ```
->>> s = d['b'] + 100
+>>> s = di['b'] + 100
 >>> s
 2    105
 3    106
@@ -374,7 +374,7 @@ The index of the given series align itself with the index of each column separat
 6    109
 Name: b, dtype: int64
 
->>> d.aloc[s]
+>>> di.aloc[s]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 2  14 | 2  5 | 4   7 | 6  0 | 
@@ -393,7 +393,7 @@ The series align the same way as explained above, but additional only the `True`
 Thus `False`-values are treated like missing indices. The behavior here is analogous to `s1.loc[s2[s2].index]`.
 
 ``` 
->>> boolseries = d['b'] > 6
+>>> boolseries = di['b'] > 6
 >>> boolseries
 2    False
 3    False
@@ -402,7 +402,7 @@ Thus `False`-values are treated like missing indices. The behavior here is analo
 6     True
 Name: b, dtype: bool
 
->>> d.aloc[boolseries]
+>>> di.aloc[boolseries]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 4  28 | 4  7 | 4   7 | 6  0 | 
@@ -414,14 +414,14 @@ To evaluate boolean values is a very handy feature, as it can easily used with m
 nicely with writing those as one-liner:
 
 ``` 
->>> d.aloc[d['b'] > 6]
+>>> di.aloc[d['b'] > 6]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 4  28 | 4  7 | 4   7 | 6  0 | 
       | 5  8 | 5  17 |      | 
       | 6  9 | 6  27 |      | 
 
->>> d.aloc[(d['a'] > 6) & (d['b'] > 6)]
+>>> di.aloc[(d['a'] > 6) & (d['b'] > 6)]
     a |    b |    c |       d | 
 ===== | ==== | ==== | ======= | 
 4  28 | 4  7 | 4  7 | no data | 
@@ -430,7 +430,7 @@ nicely with writing those as one-liner:
 
 >**Note:**
 >
->Nevertheless, something like `d.aloc[d['a'] > d['b']]` do not work, because the comparison fails, 
+>Nevertheless, something like `di.aloc[di['a'] > di['b']]` do not work, because the comparison fails, 
 >as long as the two series objects not have the same index. But maybe one want to checkout 
 >[DictOfSeries.index_of()](/docs/methods_and_properties.md#diosdictofseriesindex_of).
 
@@ -453,7 +453,7 @@ Every inner list-like item is applied as row indexer to the according column.
 3  21 | 5  8 | 7  37 | 9   3 | 
 4  28 | 6  9 | 8  47 | 10  4 | 
 
->>> d.aloc[ [d['a'], [True,False,True,False,False], [], [7,8,10]] ]
+>>> di.aloc[ [d['a'], [True,False,True,False,False], [], [7,8,10]] ]
     a |    b |       c |     d | 
 ===== | ==== | ======= | ===== | 
 0   0 | 2  5 | no data | 7   1 | 
@@ -463,7 +463,7 @@ Every inner list-like item is applied as row indexer to the according column.
 4  28 |      |         |       | 
 
 >>> ar = np.array([2,3])
->>> d.aloc[[ar, ar+1, ar+2, ar+3]]
+>>> di.aloc[[ar, ar+1, ar+2, ar+3]]
     a |    b |     c |    d | 
 ===== | ==== | ===== | ==== | 
 2  14 | 3  6 | 4   7 | 6  0 | 
@@ -474,7 +474,7 @@ Even this looks like a 2D-indexer, that are explained in the next section, it is
 In contrast to the 2D-indexer, we also can provide a column key, to pre-filter the columns.
 
 ```
->>> d.aloc[[ar, ar+1, ar+3], ['a','b','d']]
+>>> di.aloc[[ar, ar+1, ar+3], ['a','b','d']]
     a |    b |    d | 
 ===== | ==== | ==== | 
 2  14 | 3  6 | 6  0 | 
-- 
GitLab