diff --git a/Readme.md b/Readme.md index 2de189d07446ab7da87597ac8d7e66efb0d78234..b4c1c0ef3c3d4e383abf0196d8c69c6b540705e6 100644 --- a/Readme.md +++ b/Readme.md @@ -1,7 +1,7 @@ -DictOfSeries (may soon renamed) -=================================== +DictOfSeries +============ -Is a pandas.Series of pandas.Series objects which aims to behave as similar as possible to pandas.DataFrame. +DictOfSeries is a pandas.Series of pandas.Series objects which aims to behave as similar as possible to pandas.DataFrame. Nomenclature diff --git a/dios/dios.py b/dios/dios.py index 1484e00008fd11b602259af860d5b54cdb95a2ac..5742cd261de26e7e28d2bf2f641624edac2b1e2c 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -159,7 +159,7 @@ class DictOfSeries(_DiosBase): See Also -------- - DataFrame.iteritems : Iterate over (column name, Series) pairs. + DictOfSeries.iteritems : Iterate over (column name, Series) pairs. """ # todo: 2nd posibility for fill_value=Any, squeeze=False @@ -588,7 +588,13 @@ class DictOfSeries(_DiosBase): return self.notna(drop_empty=True) def isnull(self, drop_empty=False): - """ Alias, see :ref:`DictOfSeries.isna`. """ + """ Alias for `isna()` + + See Also + -------- + isna : some foo + + """ return self.isna(drop_empty=drop_empty) def notnull(self, drop_empty=False): diff --git a/dox/.gitignore b/dox/.gitignore index e2297b00cfc1e5f00dc691fa585249ebdca05b76..59b03a42eb191d1bf06f45dd6cfb3be394bbe06e 100644 --- a/dox/.gitignore +++ b/dox/.gitignore @@ -1,11 +1,8 @@ # ignore everything -* +_api +_build +_static +*.automodsumm -# except: -!.gitignore -!conf.py -!index.rst -!Makefile -!requirements_sphinx.txt diff --git a/dox/Makefile b/dox/Makefile index d4bb2cbb9eddb1bb1b4f366623044af8e4830919..90e4cc6d6ce8338a0d7eacf2f93e96e8e3f2e6a2 100644 --- a/dox/Makefile +++ b/dox/Makefile @@ -12,7 +12,12 @@ BUILDDIR = _build help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -.PHONY: help Makefile +.PHONY: help Makefile clean + +clean: + rm -rf _build _static _api + rm -f *.automodsumm + mkdir _static # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). diff --git a/dox/autodoc_diosapi.rst b/dox/autodoc_diosapi.rst new file mode 100644 index 0000000000000000000000000000000000000000..976d52c43c1ef9d02d9a55855808ff08cd2a60cd --- /dev/null +++ b/dox/autodoc_diosapi.rst @@ -0,0 +1,11 @@ + +API +==== + +.. automodapi:: dios + :include-all-objects: + :no-heading: + + + + diff --git a/dox/conf.py b/dox/conf.py index 0aeeaf93dd582e7a6b53981538bf21be3fc8d1e8..11a82cc5cfeb3bd0583931a2ea9ef96ed57d3669 100644 --- a/dox/conf.py +++ b/dox/conf.py @@ -28,24 +28,34 @@ author = 'Bert Palm' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - # "sphinx.ext.autodoc", - # "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", # "sphinx.ext.doctest", # "sphinx.ext.extlinks", # "sphinx.ext.todo", + # "sphinx.ext.intersphinx", + # "sphinx.ext.coverage", + # "sphinx.ext.mathjax", + # "sphinx.ext.ifconfig", + + # link source code + "sphinx.ext.viewcode", + # add suupport for NumPy style docstrings "sphinx.ext.napoleon", + + # doc the whole module 'sphinx_automodapi.automodapi', + 'sphinxcontrib.fulltoc', + + # markdown sources support + 'recommonmark', ] numpydoc_show_class_members = False automodsumm_inherited_members = True automodapi_inheritance_diagram = False -# autodoc_default_options = { -# 'undoc-members': True, -# 'exclude-members': '__weakref__', -# 'inherited-members': True, -# 'show-inheritance': True -# } +automodapi_toctreedirnm = '_api' +# automodsumm_writereprocessed = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -55,6 +65,8 @@ templates_path = ['_templates'] # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +source_suffix = ['.rst', '.md'] + # -- Options for HTML output ------------------------------------------------- @@ -63,8 +75,12 @@ pygments_style = "sphinx" # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -# html_theme = "pydata_sphinx_theme" html_theme = "nature" + +# use pandas theme +# html_theme = "pydata_sphinx_theme" + + # html_theme_options = { # } diff --git a/dox/cookbook.md b/dox/cookbook.md new file mode 100644 index 0000000000000000000000000000000000000000..f32a52968935c927fca45591a1004663e19e37b8 --- /dev/null +++ b/dox/cookbook.md @@ -0,0 +1,25 @@ +Cookbook +========= + +Recipes +------- +- select common rows from all columns +- align columns to an other column +- align columns to a given index +- align dios with dios +- get/set values by condition +- apply a value to multiple columns +- [Broadcast array-likes to multiple columns](#broadcast-array-likes-to-multiple-columns) +- apply a array-like value to multiple columns +- nan-policy - mask vs. drop values, when nan's are inserted (mv to Readme ??) +- itype - when to use, pitfalls and best-practise +- changing the index of series' in dios (one, some, all) +- changing the dtype of series' in dios (one, some, all) +- changing properties of series' in dios (one, some, all) + +**T_O_D_O** + + +Broadcast array-likes to multiple columns +----------------------------------------- +**T_O_D_O** diff --git a/dox/genindex.rst b/dox/genindex.rst new file mode 100644 index 0000000000000000000000000000000000000000..1f62f6e9ecdf2abbb93ecc808c98f21d7afa99a6 --- /dev/null +++ b/dox/genindex.rst @@ -0,0 +1,5 @@ + +# dummy file to be able to link to index + +Index +===== \ No newline at end of file diff --git a/dox/index.rst b/dox/index.rst index 0d8649b7fcb98d56a6c2c5f1ce9cf6d355966c89..a08e96ecf960209ee47968b2d103f7a40abb1937 100644 --- a/dox/index.rst +++ b/dox/index.rst @@ -3,14 +3,36 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +Dios Docs +========= -Indices and tables -================== +.. currentmodule:: dios -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +The whole package :mod:`dios` is mainly a container for +the class :class:`dios.DictOfSeries`. See +.. toctree:: + + dios.DictOfSeries <_api/dios.DictOfSeries> + +For some recipes and advanced usage see: + +.. toctree:: + + cookbook + indexing_help + +For full module api documentation see: + +.. toctree:: + :maxdepth: 2 + + sphinx/autodoc_diosapi + +or browse the Index.. + +.. toctree:: + :hidden: + + genindex -.. automodapi:: dios - :include-all-objects: diff --git a/dox/indexing_help.md b/dox/indexing_help.md new file mode 100644 index 0000000000000000000000000000000000000000..cd8f4878b2d66a3427613b126a66f1f392deefe3 --- /dev/null +++ b/dox/indexing_help.md @@ -0,0 +1,411 @@ +Indexing with .aloc +=================== + +Purpose +-------- +- select gracefully, so rows or columns, that was given as indexer, but doesn't exist, not raise an error +- align series/dios-indexer +- setting multiple columns at once with a list-like value + +Overview +-------- +`aloc` is *called* like `loc`, with a single key, that act as row indexer `aloc[rowkey]` or with a tuple of +row indexer and column indexer `aloc[rowkey, columnkey]`. Also 2D-indexer (like dios or df) can be given, but +only as a single key, like `.aloc[2D-indexer]` or with the special column key `...`, +the ellipsis (`.aloc[2D-indexer, ...]`). The ellipsis may change, how the 2D-indexer is +interpreted, but this will explained [later](#the-power-of-2d-indexer) in detail. + +If a normal (non 2D-dimensional) row indexer is given, but no column indexer, the latter defaults to `:` aka. +`slice(None)`, so `.aloc[row-indexer]` becomes `.aloc[row-indexer, :]`, which means, that all columns are used. +In general, a normal row-indexer is applied to every column, that was chosen by the column indexer, but for +each column separately. + +So maybe a first example gives an rough idea: +``` +>> d + a | b | c | d | +===== | ===== | ===== | ===== | +0 66 | 2 77 | 0 88 | 1 99 | +1 66 | 3 77 | 1 88 | 2 99 | + + +>> d.aloc[[1,2], ['a', 'b', 'd', 'x']] + a | b | d | +===== | ===== | ===== | +1 66 | 2 77 | 1 99 | + | | 2 99 | +``` + +The return type +---------------- + +Unlike the other two indexer methods `loc` and `iloc`, it is not possible to get a single item returned; +the return type is either a pandas.Series, iff the column-indexer is a single key (eg. `'a'`) or a dios, iff not. +The row-indexer does not play any role in the return type choice. + +> **Note for the curios:** +> +> This is because a scalar (`.aloc[key]`) is translates to `.loc[key:key]` under the hood. + +Indexer types +------------- +Following the `.aloc` specific indexer are listed. Any indexer that is not listed below (slice, boolean lists, ...), +but are known to work with `.loc`, are treated as they would passed to `.loc`, as they actually do under the hood. + +Some indexer are linked to later sections, where a more detailed explanation and examples are given. + +*special [Column indexer](#select-columns-gracefully) are :* +- *list / array-like* (or any iterable object): Only labels that are present in the columns are used, others are + ignored. +- *pd.Series* : `.values` are taken from series and handled like a *list*. +- *scalar* (or any hashable obj) : Select a single column, if label is present, otherwise nothing. + + +*special [Row indexer](#selecting-rows-a-smart-way) are :* +- *list / array-like* (or any iterable object): Only rows, which indices are present in the index of the column are + used, others are ignored. A dios is returned. +- *scalar* (or any hashable obj) : Select a single row from a column, if the value is present in the index of + the column, otherwise nothing is selected. [1] +- *pd.Series* : align the index from the given Series with the column, what means only common indices are used. The + actual values of the series are ignored(!). +- *boolean pd.Series* : like *pd.Series* but only True values are evaluated. + False values are equivalent to missing indices. To treat a boolean series as a *normal* indexer series, as decribed + above, one can use `.aloc(usebool=False)[boolean pd.Series]`. + + +*special [2D-indexer](#the-power-of-2d-indexer) are :* +- `.aloc[boolean dios-like]` : work same like `di[boolean dios-like]` (see there). + Brief: full align, select items, where the index is present and the value is True. +- `.aloc[dios-like, ...]` (with Ellipsis) : Align in columns and rows, ignore its values. Per common column, + the common indices are selected. The ellipsis forces `aloc`, to ignore the values, so a boolean dios could be + treated as a non-boolean. Alternatively `.aloc(usebool=False)[boolean dios-like]` could be used.[2] +- `.aloc[nested list-like]` : The inner lists are used as `aloc`-*list*-row-indexer (see there) on all columns. + One list for one column, which implies, that the outer list has the same length as the number of columns. + +*special handling of 1D-**values*** + +Values that are list- or array-like, which includes pd.Series, are set on all selected columns. pd.Series align +like `s1.loc[:] = s2` do. See also the [cookbook](/docs/cookbook.md#broadcast-array-likes-to-multiple-columns). + + +Indexer Table +------------- + +| example | type | on | like `.loc` | handling | conditions / hints | link | +| ------- | ---- | --- | ----------- | -------- | ------------------ | ---- | +|[Column indexer](#select-columns-gracefully)| +| `.aloc[any, 'a']` | scalar | columns |no | select graceful | - | [cols](#select-columns-gracefully)| +| `.aloc[any, 'b':'z']` | slice | columns |yes| slice | - | [cols](#select-columns-gracefully)| +| `.aloc[any, ['a','c']]` | list-like | columns |no | filter graceful | - | [cols](#select-columns-gracefully)| +| `.aloc[any [True,False]]` | bool list-like | columns |yes| take `True`'s | length must match nr of columns | [cols](#select-columns-gracefully)| +| `.aloc[any, s]` | pandas.Series | columns |no | like list, | only `s.values` are evaluated | [cols](#select-columns-gracefully)| +| `.aloc[any, bs]` | bool pandas.Series | columns |yes| like bool-list | see there | [cols](#select-columns-gracefully)| +|[Row indexer](#selecting-rows-a-smart-way)| +| `.aloc[7, any]` | scalar | rows |no | translate to `.loc[key:key]` | - | [rows](#selecting-rows-a-smart-way) | +| `.aloc[3:42, any]` | slice | rows |yes| slice | - | | +| `.aloc[[1,2,24], any]` | list-like | rows |no | filter graceful | - | [rows](#selecting-rows-a-smart-way) | +| `.aloc[[True,False], any]` | bool list-like | rows |yes| take `True`'s | length must match nr of (all selected) columns | [blist](#boolean-array-likes-as-row-indexer)| +| `.aloc[s, any]` | pandas.Series | rows |no | like `.loc[s.index]` | - | [ser](#pandasseries-and-boolean-pandasseries-as-row-indexer) | +| `.aloc[bs, any]` | bool pandas.Series | rows |no | align + just take `True`'s | evaluate `usebool`-keyword | [ser](#pandasseries-and-boolean-pandasseries-as-row-indexer)| +| `.aloc[[[s],[1,2,3]], any]` | nested list-like | both | ? | one row-indexer per column | outer length must match nr of (selected) columns | [nlist](#nested-lists-as-row-indexer) | +|[2D-indexer](#the-power-of-2d-indexer)| +| `.aloc[di]` | dios-like | both |no | full align | - | | +| `.aloc[di, ...]` | dios-like | both |no | full align | ellipsis has no effect | | +| `.aloc[di>5]` | bool dios-like | both |no | full align + take `True`'s | evaluate `usebool`-keyword | | +| `.aloc[di>5, ...]` | (bool) dios-like | both |no | full align, **no** bool evaluation | - | | + +Example dios +============ + +The dios used in the examples, unless stated otherwise: + +``` +>>> d + a | b | c | d | +===== | ==== | ===== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | +``` + +Select columns, gracefully +=========================== + +**Single columns** + +Use `.aloc[:, key]` to select a single column gracefully. +The underling pandas Series is returned, if the key exist. +Otherwise a empty pd.Series with `dtype=object` is returned. + +``` +>>> d.aloc[:, 'a'] +0 0 +1 7 +2 14 +3 21 +4 28 +Name: a, dtype: int64 + +>>> d.aloc[:, 'x'] +Series([], dtype: object) +``` + + +**Multiple columns** + +Just like selecting *single columns gracefully*, but with a array-like indexer. +A dios is returned, with a subset of the existing columns. +If no key is present a empty dios is returned. + +``` +>>> d.aloc[:, ['c', 99, None, 'a', 'x', 'y']] + a | c | +===== | ===== | +0 0 | 4 7 | +1 7 | 5 17 | +2 14 | 6 27 | +3 21 | 7 37 | +4 28 | 8 47 | + +>>> d.aloc[:, ['x', 'y']] +Empty DictOfSeries +Columns: [] + +s = pd.Series(dict(a='a', b='x', c='c', foo='d')) +d.aloc[:, s] + a | c | d | +===== | ===== | ===== | +0 0 | 4 7 | 6 0 | +1 7 | 5 17 | 7 1 | +2 14 | 6 27 | 8 2 | +3 21 | 7 37 | 9 3 | +4 28 | 8 47 | 10 4 | +``` + +**Boolean indexing, indexing with pd.Series and slice indexer** + +**Boolean indexer**, for example `[True, 'False', 'True', 'False']`, must have the same length than the number +of columns, then only columns, where the indexer has a `True` value are selected. + +If the key is a **pandas.Series**, its *values* are used for indexing, especially the Series's index is ignored. If a +series has boolean values its treated like a boolean indexer, otherwise its treated as a array-like indexer. + +A easy way to select all columns, is, to use null-**slice**es, like `.aloc[:,:]` or even simpler `.aloc[:]`. +This is just like one would do, with `loc` or `iloc`. Of course slicing with boundaries also work, +eg `.loc[:, 'a':'f']`. + +>**See also** +> - [pandas slicing ranges](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges) +> - [pandas boolean indexing](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing) + + +Selecting Rows a smart way +========================== + +For scalar and array-like indexer with label values, the keys are handled gracefully, just like with +array-like column indexers. + +``` +>>> d.aloc[1] + a | b | c | d | +==== | ======= | ======= | ======= | +1 7 | no data | no data | no data | + +>>> d.aloc[99] +Empty DictOfSeries +Columns: ['a', 'b', 'c', 'd'] + +>>> d.aloc[[3,6,7,18]] + a | b | c | d | +===== | ==== | ===== | ==== | +3 21 | 3 6 | 6 27 | 6 0 | + | 6 9 | 7 37 | 7 1 | +``` + +The length of columns can differ: +``` +>>> d.aloc[[3,6,7,18]].aloc[[3,6]] + a | b | c | d | +===== | ==== | ===== | ==== | +3 21 | 3 6 | 6 27 | 6 0 | + | 6 9 | | | +``` + +Boolean array-likes as row indexer +--------------------------------- + +For array-like indexer that hold boolean values, the length of the indexer and +the length of all column(s) to index must match. +``` +>>> d.aloc[[True,False,False,True,False]] + a | b | c | d | +===== | ==== | ===== | ==== | +0 0 | 2 5 | 4 7 | 6 0 | +3 21 | 5 8 | 7 37 | 9 3 | +``` +If the length does not match a `IndexError` is raised: +``` +>>> d.aloc[[True,False,False]] +Traceback (most recent call last): + ... + f"Boolean index has wrong length: " +IndexError: failed for column a: Boolean index has wrong length: 3 instead of 5 +``` + +This can be tricky, especially if columns have different length: +``` +>>> difflen + a | b | c | d | +===== | ==== | ===== | ==== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 6 27 | 7 1 | +2 14 | 4 7 | | 8 2 | + +>>> difflen.aloc[[False,True,False]] +Traceback (most recent call last): + ... + f"Boolean index has wrong length: " +IndexError: Boolean index has wrong length: 3 instead of 2 +``` + +pandas.Series and boolean pandas.Series as row indexer +------------------------------------------------------ + +When using a pandas.Series as row indexer with `aloc`, all its magic comes to light. +The index of the given series align itself with the index of each column separately and is this way used as a filter. + +``` +>>> s = d['b'] + 100 +>>> s +2 105 +3 106 +4 107 +5 108 +6 109 +Name: b, dtype: int64 + +>>> d.aloc[s] + a | b | c | d | +===== | ==== | ===== | ==== | +2 14 | 2 5 | 4 7 | 6 0 | +3 21 | 3 6 | 5 17 | | +4 28 | 4 7 | 6 27 | | + | 5 8 | | | + | 6 9 | | | +``` + +As seen in the example above the series' values are ignored completely. The functionality +is similar to `s1.loc[s2.index]`, with `s1` and `s2` are pandas.Series's, and s2 is the indexer and s1 is one column +after the other. + +If the indexer series holds boolean values, these are **not** ignored. +The series align the same way as explained above, but additional only the `True` values are evaluated. +Thus `False`-values are treated like missing indices. The behavior here is analogous to `s1.loc[s2[s2].index]`. + +``` +>>> boolseries = d['b'] > 6 +>>> boolseries +2 False +3 False +4 True +5 True +6 True +Name: b, dtype: bool + +>>> d.aloc[boolseries] + a | b | c | d | +===== | ==== | ===== | ==== | +4 28 | 4 7 | 4 7 | 6 0 | + | 5 8 | 5 17 | | + | 6 9 | 6 27 | | +``` + +To evaluate boolean values is a very handy feature, as it can easily used with multiple conditions and also fits +nicely with writing those as one-liner: + +``` +>>> d.aloc[d['b'] > 6] + a | b | c | d | +===== | ==== | ===== | ==== | +4 28 | 4 7 | 4 7 | 6 0 | + | 5 8 | 5 17 | | + | 6 9 | 6 27 | | + +>>> d.aloc[(d['a'] > 6) & (d['b'] > 6)] + a | b | c | d | +===== | ==== | ==== | ======= | +4 28 | 4 7 | 4 7 | no data | +``` + + +>**Note:** +> +>Nevertheless, something like `d.aloc[d['a'] > d['b']]` do not work, because the comparison fails, +>as long as the two series objects not have the same index. But maybe one want to checkout +>[DictOfSeries.index_of()](/docs/methods_and_properties.md#diosdictofseriesindex_of). + + +Nested-lists as row indexer +--------------------------- + +It is possible to pass different array-like indexer to different columns, by using nested lists as indexer. +The outer list's length must match the number of columns of the dios. The items of the outer list, all must be +array-like and not further nested. For example list, pandas.Series, boolean lists or pandas.Series, numpy.arrays... +Every inner list-like item is applied as row indexer to the according column. + +``` +>>> d + a | b | c | d | +===== | ==== | ===== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | + +>>> d.aloc[ [d['a'], [True,False,True,False,False], [], [7,8,10]] ] + a | b | c | d | +===== | ==== | ======= | ===== | +0 0 | 2 5 | no data | 7 1 | +1 7 | 4 7 | | 8 2 | +2 14 | | | 10 4 | +3 21 | | | | +4 28 | | | | + +>>> ar = np.array([2,3]) +>>> d.aloc[[ar, ar+1, ar+2, ar+3]] + a | b | c | d | +===== | ==== | ===== | ==== | +2 14 | 3 6 | 4 7 | 6 0 | +3 21 | 4 7 | 5 17 | | +``` + +Even this looks like a 2D-indexer, that are explained in the next section, it is not. +In contrast to the 2D-indexer, we also can provide a column key, to pre-filter the columns. + +``` +>>> d.aloc[[ar, ar+1, ar+3], ['a','b','d']] + a | b | d | +===== | ==== | ==== | +2 14 | 3 6 | 6 0 | +3 21 | 4 7 | | +``` + + + +The power of 2D-indexer +======================= + +Overview: + +| | | +| ------ | ------ | +| `.aloc[bool-dios]` | 1. align columns, 2. align rows, 3. just take `True`'s -- [1] | +| `.aloc[dios, ...]` (use Ellipsis) | 1. align columns, 2. align rows, (3.) ignore values -- [1] | +[1] evaluate `usebool`-keyword + + +**T_O_D_O** + diff --git a/dox/methods_and_properties.md b/dox/methods_and_properties.md new file mode 100644 index 0000000000000000000000000000000000000000..c0a374b1ca155b5426e7d03b2b2e3cc3c481ba5b --- /dev/null +++ b/dox/methods_and_properties.md @@ -0,0 +1,253 @@ +Dios Methods and Properies +========================== + +Methods +------- + +Brief + - `copy(deep=True)` : Return a copy. See also [pandas.DataFrame.copy]( + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html) + - [`copy_empty()`](#diosdictofseriescopy_empty) : Return a new DictOfSeries object, with same properties than the original. + - `all(axis=0)` : Return whether all elements are True, potentially over an axis. See also [pandas.DataFrame.all]( + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.all.html) + - `any(axis=0)` : Return whether any element is True, potentially over an axis. See also [pandas.DataFrame.any]( + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.any.html) + - `squeeze(axis=None)` : Squeeze a 1-dimensional axis objects into scalars. + See also [pandas.DataFrame.squeeze](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.squeeze.html) + - [`to_df()`](#diosdictofseriesto_df) : Transform the Dios to a pandas.DataFrame + - `to_string(kwargs)` : Return a string representation of the Dios. + - [`apply()`](#diosdictofseriesapply) : apply the given function to every column in the dios eg. + - `astype()` : Cast the data to the given data type. + - `isin()` : return a boolean dios, that indicates if the corresponding value is in the given array-like + - `isna()` : Return a bolean array that is `True` if the value is a Nan-value + - `notna()` : inverse of `isnan()` + - `dropna()` : drop all Nan-values + - [`index_of()`](#diosdictofseriesindex_of): Return a single(!) Index that is constructed from all the indexes of the columns. + - `len(Dios)` : return the number of columns the dios has. + + +Properties +---------- +- `columns` : Column index +- `indexes` : Series of indexes of columns +- `lengths` : Series of lengths of columns +- `values` : A array of length of the columns, with arrays of values, as sub-arrays +- `dtypes` : Series of dtypes of columns +- `itype` : The index type the Dios accept +- `empty` : True if the dios holds no data. Nevertheless the dios can have empty columns. + + +dios.DictOfSeries.copy_empty +---------------------------- + +`DictOfSeries.copy_empty(columns=True)` + + Return a new DictOfSeries object, with same properties than the original. + If `columns=True`, the copy will have the same, but empty columns like the original. + +**Parameter**: + - **columns : bool, default True** + + Function to apply to each column or row. + +**Examples** +``` +>>> d + a | b | c | d | +===== | ==== | ===== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | + +>>> d.copy_empty() +Empty DictOfSeries +Columns: ['a', 'b', 'c', 'd'] + +>>> d.copy_empty(columns=False) +Empty DictOfSeries +Columns: [] +``` + + +dios.DictOfSeries.to_df +----------------------- + +`DictOfSeries.to_df()` + +Transform the Dios to a pandas.DataFrame. Missing common indices are filled with NaN's. + +**Examples** +``` +>>> d + a | b | c | d | +===== | ==== | ===== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | + +>>> d.to_df() +columns a b c d +0 0.0 NaN NaN NaN +1 7.0 NaN NaN NaN +2 14.0 5.0 NaN NaN +3 21.0 6.0 NaN NaN +4 28.0 7.0 7.0 NaN +5 NaN 8.0 17.0 NaN +6 NaN 9.0 27.0 0.0 +7 NaN NaN 37.0 1.0 +8 NaN NaN 47.0 2.0 +9 NaN NaN NaN 3.0 +10 NaN NaN NaN 4.0 +``` + + +dios.DictOfSeries.apply +----------------------- + +`DictOfSeries.apply(func, axis=0, raw=False, args=(), **kwds)` + +Apply the given function to every column in the dios. This is a very mighty tool to apply functions that +are defined on pandas.Series to multiple columns. + +**Parameters:** + - **func : function** + + Function to apply to each column or row. + + - **axis : {0 or ‘index’, 1 or ‘columns’}, default 0** + + Axis along which the function is applied: + - 0 or ‘index’: apply function to each column. + - ~~1 or ‘columns’: apply function to each row~~. **not implemented** + +- **raw : bool, default False** + + Determines if row or column is passed as a Series or ndarray object: + - `False` : passes each row or column as a Series to the function. + - `True` : the passed function will receive ndarray objects instead. If you are just applying a NumPy reduction function this will achieve much better performance. + +- **args : tuple** + + Positional arguments to pass to func in addition to the array/series. + +- ****kwds** + + Additional keyword arguments to pass as keywords arguments to func. + +**Returns: Series or DataFrame** + + - Result of applying func along the given axis of the DataFrame. + + +>**See also:** +> +>[pandas.DataFrame.apply](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html) + + +**Examples** + +``` +>>> d + a | b | c | d | +===== | ==== | ===== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | + +>>> d.apply(max) +columns +a 28 +b 9 +c 47 +d 4 +dtype: int64 + +>>> d.apply(pd.Series.count) +columns +a 5 +b 5 +c 5 +d 5 +dtype: int64 + +>>> d.apply(pd.Series.value_counts, normalize=True) + a | b | c | d | +======= | ====== | ======= | ====== | +7 0.2 | 7 0.2 | 7 0.2 | 4 0.2 | +14 0.2 | 6 0.2 | 37 0.2 | 3 0.2 | +21 0.2 | 5 0.2 | 47 0.2 | 2 0.2 | +28 0.2 | 9 0.2 | 27 0.2 | 1 0.2 | +0 0.2 | 8 0.2 | 17 0.2 | 0 0.2 | + +>>> d.apply(lambda s : 'high' if max(s) > 10 else 'low') +columns +a high +b low +c high +d low +dtype: object + +>>> d.apply(lambda s : ('high', max(s)) if min(s) > (max(s)//2) else ('low',max(s))) + a | b | c | d | +====== | ======= | ====== | ====== | +0 low | 0 high | 0 low | 0 low | +1 28 | 1 9 | 1 47 | 1 4 | +``` + + +dios.DictOfSeries.index_of +-------------------------- +`DictOfSeries.index_of(method='union)` + +Aggregate indexes of all columns to one index by a given method. + + +**Parameters:** + - **method : str, default "union"** + + Aggregation method + - 'all' : get all indices from all columns + - 'shared' : get indices that are present in every columns + - 'uniques' : get indices that are only present in a single column + - 'union' : alias for 'all' + - 'intersection' : alias for 'shared' + - 'non-uniques' : get indices that are present in more than one column + + - **axis : {0 or ‘index’, 1 or ‘columns’}, default 0** + +**Returns: pandas.Index** + +The aggregated Index + + +**Examples** +``` +>>> di + a | b | c | d | +===== | ====== | ====== | ===== | +0 0 | 2 5 | 4 7 | 6 0 | +1 7 | 3 6 | 5 17 | 7 1 | +2 14 | 4 7 | 6 27 | 8 2 | +3 21 | 5 8 | 7 37 | 9 3 | +4 28 | 6 9 | 8 47 | 10 4 | +5 35 | 7 10 | 9 57 | 11 5 | +6 42 | 8 11 | 10 67 | 12 6 | +7 49 | 9 12 | 11 77 | 13 7 | +8 56 | 10 13 | 12 87 | 14 8 | +9 63 | 11 14 | 13 97 | 15 9 | + +>>> di.index_of() +RangeIndex(start=0, stop=16, step=1) + +>>> di.index_of("shared") +Int64Index([6, 7, 8, 9], dtype='int64') + +>>> di.index_of("uniques") +Int64Index([0, 1, 14, 15], dtype='int64') +``` diff --git a/dox/requirements_sphinx.txt b/dox/requirements_sphinx.txt index ddae004656c362376675b3b266915b3558f6d3df..aa1d9fc342ae8e97b823ca2aa59a8ec5cd510c08 100644 --- a/dox/requirements_sphinx.txt +++ b/dox/requirements_sphinx.txt @@ -3,6 +3,7 @@ attrs==19.3.0 Babel==2.8.0 certifi==2020.4.5.1 chardet==3.0.4 +commonmark==0.9.1 docutils==0.16 idna==2.9 imagesize==1.2.0 @@ -15,6 +16,7 @@ packaging==20.3 pandas==1.0.3 pluggy==0.13.1 py==1.8.1 +pydata-sphinx-theme==0.1.1 Pygments==2.6.1 pyparsing==2.4.7 pytest==5.4.1