diff --git a/Readme.md b/Readme.md index 4257c10db0607be3adf78e104aadc8e4a1dbe32e..7fe6d73cbaa39641c75024c0e026bb85aaa35b01 100644 --- a/Readme.md +++ b/Readme.md @@ -1,13 +1,110 @@ +DictOfSeries (soon renamed to SoS?) +=================================== + Features -------- - -* dictionary of Series (dios) -* fast as pd.DataFrame -* every 'column' has its own index +* quite as fast as pd.DataFrame +* every *column* has its own index * use very less memory then a disalignd pd.Dataframe -* `dios[var]` where `var` can be a list (or any iterable object) or a string -* `dios[slice]` where `slice` can be a row slicer -* `dios.loc[rowindexer]` act like `pandas.Series().loc[rowindexer]` for each series in the dios -* `dios.loc[rowindexer, columns]` like `dios.loc[rowindexer]` but for a subset of columns, which can be specified by a iterable, slice(on columns) or string -* `+,-,*,/,//,%` and `==,!=,>,>=,<=,<` and `~,&,|,^` and `is, in, len, all, any, empty, columns` are implemented -* also `pipe()` (and as alias `foreach()`) is implemented, which pass any `pandas.Series`-method to every series in the dios and return the resulting dios +* act quite like pd.DataFrame +* additional align locator (`.aloc[]`) + + +Indexing +-------- + +- `di[]` and `di.loc[]`, `di.iloc[]` and `di.at[]`, `di.iat[]` - should behave exactly like + their counter-parts from pd.Dataframe. + Most indexers are directly passed to the underling columns-series or row-series. + +- on selecting operations, Dios simply throw out rows, that wasn't selected, + instead of using `nan`'s, like pd.Dataframe do. + +- on writing operations, analogous to selecting, only selected rows are changed, un-selected rows preserve + their value. + +- `dios[BoolDiosLike]` - like pd.DataFrame, dios accept boolean multiindexer (boolean pd.Dataframe + or boolean Dios) columns and rows from the multiindexer align with the dios. + This means that only matching columns are selected/written, the same apply for rows. + Nevertheless columns, that are empty after applying the indexer, are preserved, with the effect + that the resulting Dios always have the same (column)-dimension that the initial Dios. + (This is the exact same behaivior as pd.DataFrame handle multiindexer, + despite that miss-matching columns are filled with nan's) + +- additional there is a `di.aloc[..]` indexing method. Unlike `iloc` and `loc` indexers and values + fully align if possible. Also this method handle missing values gratefully. In contrast + to `di[BoolDiosLike]`, empty columns are **not** preserved on selecting. Briefly: + + Grateful handling of non-alignable indexer: + - **lists** (including non-boolean Series, only `ser.values` are used) + - as column indexer: only matching columns are used + - as row indexer: only matching rows are used in every series of the column + - **single labels** on columns or rows: use if match + + Alignable indexer are: + - **boolean-series** (a missing index is treated like an existing `False` value) + - as **column indexer**: The index should contain column names. If the corresponding value is `True` and + the column exist, the column will be selected/written. + - as **row indexer**: The indexer will be applied on all (selected) columns. + On every column the index of the boolean-series is aligned with the index of underling series. + If the corresponding value is `True`, the row will be selected/written. + - **boolean-Dios**: work like `dios[BoolDiosLike]` (see above), but do not preserve empty columns on selecting. + - **pd.DataFrame**: like boolean-Dios + + Alignable values are: + - **series**: align with every column + - **Dios**: full align on columns and rows + - **pd.DataFrame**: like Dios + + +Properties +---------- +- columns +- dtype +- itype (see section Itype) +- empty + + +Methods and implied features +------- +Work mostly like analogous methods from pd.DataFrame. +- copy() +- copy_empty() +- all() +- any() +- squeeze() +- to_df() +- apply() +- astype() +- memory_usage() +- `in` +- `is` +- `len(Dios)` + + +Operators and Comparators +--------- +- arithmetical: `+ - * ** // / %` and `abs()` +- boolean: `&^|~` +- comparators: `== != > >= < <=` + +Itype +----- +DictOfSeries holds multiple series, where possibly every series can have a different index length +and index type. Different index length, is solved with some aligning magic, or simply fail, if +aligning makes no sense (eg. assigning the very same list to series of different length). +The bigger problem is the type of the index. If one series has a alphabetical index, an other +an numeric index, selecting along columns, can just fail in every scenario. To keep track of the +types of index or to prohibit the inserting of a *not fitting* index type, +we introduce a `itype`. This can be set on creation of a Dios and also changed during usage. +On change of the itype, all index of all series in the dios are casted to a new fitting type, +if possible. Different cast-mechanisms are available. + +If a itype prohibit some certain types of index, but a series with this index-type is inserted, +a implicit cast is done, with or without a warning, or an error is raised. The warning/error policy +can be adjusted via global options. + +Have fun :) + + + diff --git a/dios/dios.py b/dios/dios.py index 30311070d9ed969e2f8d877b021d8688074808be..74ecaf463b8191e8ae502a32e1b3bf327a626ed2 100644 --- a/dios/dios.py +++ b/dios/dios.py @@ -19,15 +19,14 @@ _is_dict_like = dcom.is_dict_like _is_number = dcom.is_number _is_hashable = dcom.is_hashable -import pandas.core.common as ccom -_is_bool_indexer = ccom.is_bool_indexer +from pandas.core.common import is_bool_indexer as _is_bool_indexer -# """ -# Unlike the example says, return lists False, not True -# >>is_iterator([1, 2, 3]) -# >>False -# """ -# from pandas.core.dtypes.common import is_iterator +""" +Unlike the example says, return lists False, not True +>>is_iterator([1, 2, 3]) +>>False +""" +from pandas.core.dtypes.common import is_iterator as _is_iterator def _is_list_like_not_nested(obj): @@ -95,9 +94,12 @@ class DictOfSeries: cast_to_itype(s, self.itype, policy=self._policy, inplace=True) def _init_insert_data(self, data, columns): + def incols(c): return c in columns if columns is not None else True + data = list(data) if _is_iterator(data) else data + if isinstance(data, dict) or _is_dios_like(data): for k in data: if incols(k): @@ -118,6 +120,7 @@ class DictOfSeries: def _insert(self, col, val): """Insert a fresh new value into self""" + val = list(val) if _is_iterator(val) else val if _is_dios_like(val): if len(val) > 1: raise ValueError(f"Cannot insert DictOfSeries " @@ -163,7 +166,7 @@ class DictOfSeries: def __getitem__(self, key): """ dios[key] -> dios/series """ - + key = list(key) if _is_iterator(key) else key if isinstance(key, tuple): raise KeyError("tuples are not allowed") elif _is_hashable(key): @@ -216,7 +219,7 @@ class DictOfSeries: def __setitem__(self, key, value): """ dios[key] = value """ - + key = list(key) if _is_iterator(key) else key if isinstance(key, tuple): raise KeyError(f"{key}. tuples are not allowed") diff --git a/dios/locator.py b/dios/locator.py index ee4fceb9c82f773db634a5e21b790f73916cfc3b..651334b1d63d54de87142f0f4f841b5f93944185 100644 --- a/dios/locator.py +++ b/dios/locator.py @@ -1,6 +1,10 @@ -from dios.dios import _is_dios_like, _is_bool_series, _is_list_like_not_nested -import pandas as pd +from dios.dios import ( + _is_dios_like, + _is_bool_series, + _is_list_like_not_nested, + _is_iterator) +import pandas as pd import pandas.core.common as ccom import pandas.core.dtypes.common as dcom _is_list_like = dcom.is_list_like @@ -20,6 +24,8 @@ class _Indexer: def _unpack_key(self, key): + key = list(key) if _is_iterator(key) else key + if isinstance(key, tuple): if len(key) > 2: raise KeyError("To many indexers")