merged develop

e4b4ba31 · Bert Palm · 2540ea87 · 342dcea4 · e4b4ba31 · 2540ea87
Commit e4b4ba31 authored 3 years ago by Bert Palm 🎇
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+# ===========================================================
+# preparation
+# ===========================================================
 variables:
  GIT_SUBMODULE_STRATEGY: recursive
-before_script:
+default:
-  - export DEBIAN_FRONTEND=noninteractive
+  image: python:3.8
-  - apt-get -qq update
+  before_script:
-  - apt-get -qq install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl git > /dev/null
+    - pip install --upgrade pip
-  - export DEBIAN_FRONTEND=dialog
+    - pip install pytest
-  - export LC_ALL=C.UTF-8
+    - pip install -r requirements.txt
-  - export LANG=C.UTF-8
-  - git clone https://github.com/pyenv/pyenv.git ~/.pyenv
-  - export PYENV_ROOT="$HOME/.pyenv"
-  - export PATH="$PYENV_ROOT/bin:$PATH"
-  - eval "$(pyenv init -)"
-test:python37:
+# ===========================================================
+# normal jobs (non scheduled)
+# ===========================================================
+# test saqc with python 3.7
+python37:
+  stage: test
+  except:
+    - schedules
+  image: python:3.7
  script:
-    - pyenv install 3.7.5
+    - pytest tests/core tests/funcs tests/integration dios/test
-    - pyenv shell 3.7.5
-    - pip install --upgrade pip
-    - pip install -r requirements.txt
-    - python -m pytest --ignore test/lib test
    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv
-test:python38:
+# test saqc with python 3.8
+python38:
+  stage: test
+  except:
+    - schedules
  script:
-    - pyenv install 3.8.0
+    - pytest tests/core tests/funcs tests/integration dios/test
-    - pyenv shell 3.8.0
-    - pip install --upgrade pip
-    - pip install -r requirements.txt
-    - python -m pytest --ignore test/lib test
    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv
-# Make html docu with sphinx
+# test saqc with python 3.9
+python39:
+  stage: test
+  except:
+    - schedules
+  image: python:3.9
+  script:
+    - pytest tests/core tests/funcs tests/integration
+    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv
+# check if everthing is properly formatted
+black:
+  stage: test
+  script:
+    - pip install black
+    - black --check .
+# make (visual) coverage in gitlab merge request diff's
+coverage:
+  stage: test
+  except:
+    - schedules
+  allow_failure: true
+  script:
+    - pip install pytest-cov coverage
+    - pytest --cov=saqc tests/core tests/funcs
+  after_script:
+    - coverage xml
+  # regex to find the coverage percentage in the job output
+  coverage: '/^TOTAL.+?(\d+\%)$/'
+  artifacts:
+    when: always
+    reports:
+      cobertura: coverage.xml
+# make html docu with sphinx
 pages:
  stage: deploy
+  only:
+    - cookBux
+  except:
+    - schedules
  script:
-    - pyenv install 3.8.0
-    - pyenv shell 3.8.0
-    - pip install --upgrade pip
-    - pip install -r requirements.txt
    - cd sphinx-doc/
    - pip install -r requirements_sphinx.txt
-    - make html
+    - make doc
    - cp -r _build/html ../public
  artifacts:
    paths:
      - public
+# ===========================================================
+# scheduled jobs
+# ===========================================================
+# fuzzy testing saqc
+fuzzy:
+  stage: test
+  only:
+    - schedules
+  script:
+    - pytest tests/fuzzy
+# test lib saqc
+testLib:
+  stage: test
  only:
-    - develop
+    - schedules
+  script:
+    - pytest tests/lib
--- a/.gitmodules
+++ b/.gitmodules
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -79,10 +79,9 @@
 ## Breaking Changes
 - register is now a decorator instead of a wrapper
 # 1.5
-coming soon...
+coming soon ...
 ## Features

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,29 +3,105 @@ We recommend a virtual python environment for development. The setup process is
 # Testing
 SaQC comes with an extensive test suite based on [pytest](https://docs.pytest.org/en/latest/).
-In order to run all tests execute:
+In order to run all tests execute `python -m pytest .`, for faster iteration a test run with 
-```sh
+`python -m pytest --ignore test/lib test` is usually enough.
-python -m pytest .
-```
 # Coding conventions
 ## Naming
 ### Code
-We follow the follwing naming conventions:
+We implement the following naming conventions:
 - Classes: CamelCase
 - Functions: camelCase
 - Variables/Arguments: snake_case
+### Argument names in public functions signatures
+first, its not necessary to have *talking* arg-names, in contrast to variable names in 
+code. This is, because one always must read the documentation. To use and parameterize a function,
+just by guessing the meaning of the argument names and not read the docs, 
+will almost never work. thats why, we dont have the obligation to make names (very) 
+talkative.
+second, because of the nature of a function (to have a *simple* way to use complex code), 
+its common to use simple and short names. This means, to omit any *irrelevant* information. 
+For example if we have a function that fit a polynomial on some data with three arguments.
+Lets say we have:
+ - the data input, 
+ - a threshold that defines a cutoff point for a calculation on a polynomial and
+ - a third argument. 
+one could name the args `data, poly_cutoff_threshold, ...`, but much better names would 
+be `data, thresh, ...`, because a caller dont need the extra information, 
+stuffed in the name. 
+If the third argument is also some kind of threshold, 
+one can use `data, cutoff, thresh`, because the *thresh-* information of the `cutoff` 
+parameter is not crucial and the caller knows that this is a threshold from the docstring.
+third, underscores give a nice feedback if one doing wrong or over complex. 
+No underscore is fine, one underscore is ok, if the information is *really necessary* (see above), 
+but if one use two or more underscores, one should think of a better naming, 
+or omit some information. 
+Sure, seldom but sometimes it is necessary to use 2 underscores, but we consider it as bad style.
+Using 3 or more underscores, is not allowed unless have write an reasoning and get it
+signed by at least as many core developers as underscores one want to use.
+In short the naming should *give a very, very rough idea* of the purpose of the argument, 
+but not *explain* the usage or the purpose. 
+It is not a shame to name a parameter just `n` or `alpha` etc. if for example the algorithm 
+(from the paper etc.) name it alike. 
 ### Test Functions
 - testnames: [testmodule_]flagTestName
 ## Formatting
-We use (black)[https://black.readthedocs.io/en/stable/] with a line length if 120 characters.
+We use (black)[https://black.readthedocs.io/en/stable/] in its default settings.
-Within the `SaQC` root directory run `black -l 120`.
+Within the `saqc` root directory run `black .`.
 ## Imports
 Only absolute imports are accepted.
+# Development Workflow
+## Repository Structure
+- `master` - branch:
+  + Stable and usually protected.
+  + Regular merges from `develop` according to the [release cycle](#release-cycle). These merges get a tag, increasing at least the minor version.
+  + Irregular merges from `develop` in case if critical bugs. Such merges increase at least the patch level.
+  + Merges into `master` usually lead to a PyPI release
+- `develop` - branch:
+  + The main development branch, no hard stability requirements/guarantees
+  + Merges into `develop` should mostly follow a Merge Request Workflow, minor changes can however be committed directly. Such minor changes include:
+    * Typos and white space changes
+    * Obvious bug in features implemented by the committing developer
+## Merge Request Workflow
+- Most changes to `saqc` are integrated by merge requests from a feature branch into `develop`
+- All merge requests need to be reviewed by at least one other core developer (currently @palmb, @luenensc and @schaefed).
+- We implement the following Gitlab based review process:
+  + The author assigns the Merge Request to one of the core developers. The reviewer should review the request within one week,
+    large requests may of course lead to longer review times.
+  + Reviewer and Author discuss any issues using the Gitlab code review facilities:
+    * In case all concerns are resolved, the reviewer approves the Merge Request and assigns it back to the author.
+    * In case reviewer and author can't resolve their discussion, the Merge Request should be assigned to another reviewer.
+      The new reviewer is now in charge to come to a decision, by either approving, closing or going into another review iteration.
+  + The author of an approved Merge Request:
+    * has the right and the duty to merge into the `develop` branch, any occurring conflicts need to be addressed by the author,
+    * is always highly encouraged to provide a summary of the changes introduced with the Merge Request in its description upon integration. This recommandation becomes an obligation in case of interface modification or changes to supported and/or documented workflows.
+## Release Cycle
+- We employ a release cycle of roughly 4 weeks.
+- To avoid the avoid the integration of untested and/or broken changes, the merge window closes one week before the intended
+  release date. Commits to `develop` after the merge window of a release closes need to be integrated during the subsequent release
+  cycle
+- The release cycle is organized by Gitlab Milestones, the expiration date of a certain milestone indicates the end of the 
+  related merge window, the actual merge into `master` and the accompanying release is scheduled for the week after the
+  milestones expiration date. 
+- Issues and Merge Requests can and should be associated to these milestone as this help in the organization of review activities.
--- a/LICENSE.txt
+++ b/LICENSE.txt
--- a/README.md
+++ b/README.md
@@ -58,12 +58,12 @@ dataset and the routines to inspect, quality control and/or process them.
 The content of such a configuration could look like this:
 ```
-varname    ; test                                
+varname    ; test
 #----------;------------------------------------
-SM2        ; harm_shift2Grid(freq="15Min")       
+SM2        ; shiftToFreq(freq="15Min")
-SM2        ; flagMissing(nodata=NAN)             
+SM2        ; flagMissing()
-'SM(1|2)+' ; flagRange(min=10, max=60)           
+'SM(1|2)+' ; flagRange(min=10, max=60)
-SM2        ; spikes_flagMad(window="30d", z=3.5)
+SM2        ; flagMad(window="30d", z=3.5)
 ```
 As soon as the basic inputs, a dataset and the configuration file are
@@ -81,15 +81,16 @@ The following snippet implements the same configuration given above through
 the Python-API:
 ```python
-from saqc import SaQC, SimpleFlagger
+import numpy as np
+from saqc import SaQC
-saqc = (SaQC(SimpleFlagger(), data)
+saqc = (SaQC(data)
-        .harm_shift2Grid("SM2", freq="15Min")
+        .shiftToFreq("SM2", freq="15Min")
-        .flagMissing("SM2", nodata=np.nan)
+        .flagMissing("SM2")
        .flagRange("SM(1|2)+", regex=True, min=10, max=60)
-        .spikes_flagMad("SM2", window="30d", z=3.5))
+        .flagMad("SM2", window="30d", z=3.5))
-data, flagger = saqc.getResult()
+data, flags = saqc.getResult()
 ```
 ## Installation

--- a/dios/.gitignore
+++ b/dios/.gitignore
+__pycache__/
--- a/dios/Readme.md
+++ b/dios/Readme.md
+DictOfSeries 
+============
+DictOfSeries is a pandas.Series of pandas.Series objects which aims to behave as similar as possible to pandas.DataFrame. 
+Nomenclature
+------------
+- series/ser: instance of pandas.Series
+- dios: instance of dios.DictOfSeries
+- df: instance of pandas.DataFrame
+- dios-like: a *dios* or a *df*
+- alignable object: a *dios*, *df* or a *series*
+Features
+--------
+* every *column* has its own index
+* uses much less memory than a misaligned pandas.DataFrame
+* behaves quite like a pandas.DataFrame
+* additional align locator (`.aloc[]`)
+Install
+-------
+todo: PyPi
+``` 
+import dios
+# Have fun :)
+```
+Documentation
+-------------
+The main docu is on ReadTheDocs at: 
+* [dios.rtfd.io](https://dios.rtfd.io)
+but some docs are also available local:
+* [Indexing](/docs/doc_indexing.md)
+* [Cookbook](/docs/doc_cookbook.md)
+* [Itype](/docs/doc_itype.md)
+TL;DR
+-----
+**get it**
+```
+>>> from dios import DictOfSeries
+```
+**empty**
+```
+>>> DictOfSeries()
+Empty DictOfSeries
+Columns: []
+>>> DictOfSeries(columns=['x', 'y'])
+Empty DictOfSeries
+Columns: ['x', 'y']
+>>> DictOfSeries(columns=['x', 'y'], index=[3,4,5])
+     x |      y | 
+====== | ====== | 
+3  NaN | 3  NaN | 
+4  NaN | 4  NaN | 
+5  NaN | 5  NaN | 
+```
+**with data**
+```
+>>> DictOfSeries([range(4), range(2), range(3)])
+   0 |    1 |    2 | 
+==== | ==== | ==== | 
+0  0 | 0  0 | 0  0 | 
+1  1 | 1  1 | 1  1 | 
+2  2 |      | 2  2 | 
+3  3 |      |      | 
+>>> DictOfSeries(np.random.random([2,4]))
+          0 |           1 | 
+=========== | =========== | 
+0  0.112020 | 0  0.509881 | 
+1  0.108070 | 1  0.285779 | 
+2  0.851453 | 2  0.805933 | 
+3  0.138352 | 3  0.812339 | 
+>>> DictOfSeries(np.random.random([2,4]), columns=['a','b'], index=[11,12,13,14])
+           a |            b | 
+============ | ============ | 
+11  0.394304 | 11  0.356206 | 
+12  0.943689 | 12  0.735356 | 
+13  0.791820 | 13  0.066947 | 
+14  0.759802 | 14  0.496321 | 
+>>> DictOfSeries(dict(today=['spam']*3, tomorrow=['spam']*2))
+  today |   tomorrow | 
+======= | ========== | 
+0  spam | 0     spam | 
+1  spam | 1     spam | 
+2  spam |            | 
+```
--- a/dios/__init__.py
+++ b/dios/__init__.py
+#!/usr/bin/env python
+from .dios import *
--- a/dios/dios/__init__.py
+++ b/dios/dios/__init__.py
+from .lib import *
+from .dios import *
+__all__ = [
+    "DictOfSeries",
+    "to_dios",
+    "pprint_dios",
+    "IntItype",
+    "FloatItype",
+    "NumItype",
+    "DtItype",
+    "ObjItype",
+    "ItypeWarning",
+    "ItypeCastWarning",
+    "ItypeCastError",
+    "is_itype",
+    "is_itype_subtype",
+    "is_itype_like",
+    "get_itype",
+    "cast_to_itype",
+    "CastPolicy",
+    "Opts",
+    "OptsFields",
+    "OptsFields",
+    "dios_options",
+    "example_DictOfSeries",
+]
--- a/dios/dios/base.py
+++ b/dios/dios/base.py
--- a/dios/dios/dios.py
+++ b/dios/dios/dios.py
--- a/dios/dios/indexer.py
+++ b/dios/dios/indexer.py
+from . import pandas_bridge as pdextra
+from .base import (
+    _DiosBase,
+    _is_dios_like,
+    _is_bool_dios_like,
+)
+import numpy as np
+import pandas as pd
+class _Indexer:
+    def __init__(self, obj: _DiosBase):
+        self.obj = obj
+        self._data = obj._data
+    def _unpack_key(self, key):
+        key = list(key) if pdextra.is_iterator(key) else key
+        if isinstance(key, tuple):
+            if len(key) > 2:
+                raise KeyError("To many indexers")
+            rowkey, colkey = key
+        else:
+            rowkey, colkey = key, slice(None)
+        if isinstance(rowkey, tuple) or isinstance(colkey, tuple):
+            raise KeyError(f"{key}. tuples are not allowed.")
+        rowkey = list(rowkey) if pdextra.is_iterator(rowkey) else rowkey
+        colkey = list(colkey) if pdextra.is_iterator(colkey) else colkey
+        return rowkey, colkey
+    def _set_value_muli_column(self, rowkey, colkey, value, xloc="loc"):
+        """set value helper for loc and iloc"""
+        data = getattr(self._data, xloc)[colkey]
+        hashable_rkey = pdextra.is_hashable(rowkey)
+        dioslike_value = False
+        iter_value = False
+        if _is_dios_like(value):
+            dioslike_value = True
+            if hashable_rkey:
+                raise ValueError(f"Incompatible indexer with DictOfSeries")
+        elif pdextra.is_list_like(value):
+            value = value.values if isinstance(value, pd.Series) else value
+            iter_value = True
+            if len(value) != len(data):
+                raise ValueError(
+                    f"shape mismatch: value array of shape (.., {len(value)}) could "
+                    f"not be broadcast to indexing result of shape (.., {len(data)})"
+                )
+        c = "?"
+        try:
+            for i, c in enumerate(data.index):
+                dat = data.at[c]
+                dat_xloc = getattr(dat, xloc)
+                if dioslike_value:
+                    # set to empty series fail; emptySer.loc[:] = [2,1]
+                    # len(scalar) -> would fail, but cannot happen,
+                    # because dioslike+hashable, already was checked
+                    if len(dat_xloc[rowkey]) == 0:
+                        continue
+                # unpack the value if necessary
+                if iter_value:
+                    val = value[i]
+                elif dioslike_value:
+                    val = value[c] if c in value else np.nan
+                else:
+                    val = value
+                dat_xloc[rowkey] = val
+        except Exception as e:
+            raise type(e)(f"failed for column {c}: " + str(e)) from e
+# #############################################################################
+class _LocIndexer(_Indexer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def __getitem__(self, key):
+        rowkey, colkey = self._unpack_key(key)
+        if _is_dios_like(rowkey) or _is_dios_like(colkey):
+            raise ValueError("Could not index with multidimensional key")
+        # simple optimisation
+        if pdextra.is_null_slice(rowkey) and pdextra.is_null_slice(colkey):
+            return self.obj.copy()
+        data = self._data.loc[colkey].copy()
+        # .loc[any, scalar] -> (a single) series
+        # .loc[scalar, scalar] -> (a single) value
+        if pdextra.is_hashable(colkey):
+            new = data.loc[rowkey]
+        # .loc[any, non-scalar]
+        else:
+            k = "?"
+            try:
+                for k in data.index:
+                    data.at[k] = data.at[k].loc[rowkey]
+            except Exception as e:
+                raise type(e)(f"failed for column {k}: " + str(e)) from e
+            # .loc[scalar, non-scalar] -> column-indexed series
+            if pdextra.is_hashable(rowkey):
+                new = data
+            # .loc[non-scalar, non-scalar] -> dios
+            else:
+                new = self.obj.copy_empty(columns=False)
+                new._data = data
+        return new
+    def __setitem__(self, key, value):
+        rowkey, colkey = self._unpack_key(key)
+        if _is_dios_like(rowkey) or _is_dios_like(colkey):
+            raise ValueError("Cannot index with multi-dimensional key")
+        # .loc[any, scalar] - set on single column
+        if pdextra.is_hashable(colkey):
+            # .loc[dont-care, new-scalar] = val
+            if colkey not in self.obj.columns:
+                self.obj._insert(colkey, value)
+            # .loc[any, scalar] = multi-dim
+            elif _is_dios_like(value) or pdextra.is_nested_list_like(value):
+                raise ValueError("Incompatible indexer with multi-dimensional value")
+            # .loc[any, scalar] = val
+            else:
+                self._data.at[colkey].loc[rowkey] = value
+        # .loc[any, non-scalar] = any
+        else:
+            self._set_value_muli_column(rowkey, colkey, value, xloc="loc")
+# #############################################################################
+class _iLocIndexer(_Indexer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def __getitem__(self, key):
+        rowkey, colkey = self._unpack_key(key)
+        if _is_dios_like(rowkey) or _is_dios_like(colkey):
+            raise ValueError("Cannot index with multidimensional key")
+        # simple optimisation
+        if pdextra.is_null_slice(rowkey) and pdextra.is_null_slice(colkey):
+            return self.obj.copy()
+        data = self._data.iloc[colkey].copy()
+        # .iloc[any, int] -> single series
+        # .iloc[int, int] -> single value
+        if pdextra.is_integer(colkey):
+            new = data.iloc[rowkey]
+        # .iloc[any, non-int]
+        else:
+            k = "?"
+            try:
+                for k in data.index:
+                    data.at[k] = data.at[k].iloc[rowkey]
+            except Exception as e:
+                raise type(e)(f"failed for column {k}: " + str(e)) from e
+            # .iloc[int, non-int] -> column-indexed series
+            if pdextra.is_integer(rowkey):
+                new = data
+            # .iloc[non-int, non-int] -> dios
+            else:
+                new = self.obj.copy_empty(columns=False)
+                new._data = data
+        return new
+    def __setitem__(self, key, value):
+        rowkey, colkey = self._unpack_key(key)
+        if _is_dios_like(rowkey) or _is_dios_like(colkey):
+            raise ValueError("Cannot index with multidimensional key")
+        # .iloc[any, int] = Any
+        if pdextra.is_integer(colkey):
+            if _is_dios_like(value) or pdextra.is_nested_list_like(value):
+                raise ValueError("Incompatible indexer with multi-dimensional value")
+            self._data.iat[colkey].iloc[rowkey] = value
+        # .iloc[any, non-int] = Any
+        else:
+            self._set_value_muli_column(rowkey, colkey, value, xloc="iloc")
+# #############################################################################
+class _aLocIndexer(_Indexer):
+    """align Indexer
+    Automatically align (alignable) indexer on all possible axis,
+    and handle indexing with non-existent or missing keys gracefully.
+    Also align (alignable) values before setting them with .loc
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._usebool = True
+    def __call__(self, usebool=True):
+        """We are called if the user want to set `usebool=False', which make
+        boolean alignable indexer treat as non-boolean alignable indexer.
+        Explanation: A boolean dios indexer align its indices with the indices
+        of the receiving dios like a non-boolean dios indexer also would do.
+        Additionally all rows with False values are kicked too. To disable
+         that `usebool=False` can be given."""
+        self._usebool = usebool
+        return self
+    def __getitem__(self, key):
+        rowkeys, colkeys, lowdim = self._unpack_key_aloc(key)
+        data = pd.Series(dtype="O", index=colkeys)
+        kws = dict(itype=self.obj.itype, cast_policy=self.obj._policy)
+        c = "?"
+        try:
+            for i, c in enumerate(data.index):
+                data.at[c] = self._data.at[c].loc[rowkeys[i]]
+        except Exception as e:
+            raise type(e)(f"failed for column {c}: " + str(e)) from e
+        if lowdim:
+            return data.squeeze()
+        else:
+            return self.obj._constructor(data=data, fastpath=True, **kws)
+    def __setitem__(self, key, value):
+        rowkeys, colkeys, _ = self._unpack_key_aloc(key)
+        def iter_self(colkeys, position=False):
+            c = "?"
+            try:
+                for i, c in enumerate(colkeys):
+                    dat = self._data.at[c]
+                    rk = rowkeys[i]
+                    if len(dat.loc[rk]) == 0:
+                        continue
+                    yield dat, rk, i if position else c
+            except Exception as e:
+                raise type(e)(f"failed for column {c}: " + str(e)) from e
+        # align columns, for rows use series.loc to align
+        if _is_dios_like(value):
+            colkeys = value.columns.intersection(colkeys)
+            for dat, rk, c in iter_self(colkeys):
+                dat.loc[rk] = value[c]
+        # no align, no merci
+        elif pdextra.is_nested_list_like(value):
+            if len(colkeys) != len(value):
+                raise ValueError(
+                    f"shape mismatch: values array of shape "
+                    f"(.., {len(value)}) could not "
+                    f"be broadcast to indexing result of "
+                    f"shape (.., {len(colkeys)})"
+                )
+            for dat, rk, i in iter_self(colkeys, position=True):
+                dat.loc[rk] = value[i]
+        # align rows by using series.loc
+        elif isinstance(value, pd.Series):
+            for dat, rk, _ in iter_self(colkeys):
+                dat.loc[rk] = value
+        # no align, no merci
+        else:
+            for dat, rk, _ in iter_self(colkeys):
+                dat.loc[rk] = value
+    def _unpack_key_aloc(self, key):
+        """
+        Return a list of row indexer and a list of existing(!) column labels.
+        Both list always have the same length and also could be empty together.
+        Note:
+            The items of the row indexer list should be passed to pd.Series.loc[]
+        """
+        # if a single column-key is given, the caller may
+        # want to return a single Series, instead of a dios
+        lowdim = False
+        def keys_from_bool_dios_like(key):
+            if not _is_bool_dios_like(key):
+                raise ValueError("Must pass dios-like key with boolean values only.")
+            colkey = self.obj.columns.intersection(key.columns)
+            rowkey = []
+            for c in colkey:
+                b = key[c]
+                rowkey += [self._data.at[c].index.intersection(b[b].index)]
+            return rowkey, colkey, lowdim
+        def keys_from_dios_like(key):
+            colkey = self.obj.columns.intersection(key.columns)
+            rowkey = [self._data.at[c].index.intersection(key[c].index) for c in colkey]
+            return rowkey, colkey, lowdim
+        rowkey, colkey = self._unpack_key(key)
+        if _is_dios_like(colkey) or pdextra.is_nested_list_like(colkey):
+            raise ValueError("Could not index with multi-dimensional column key.")
+        # giving the ellipsis as column key, is an alias
+        # for giving `usebool=False`. see self.__call__()
+        if colkey is Ellipsis:
+            self._usebool = False
+            colkey = slice(None)
+        # .aloc[dios]
+        if _is_dios_like(rowkey):
+            if not pdextra.is_null_slice(colkey):
+                raise ValueError(
+                    f"Could not index with a dios-like indexer as rowkey,"
+                    f"and a column key of that type {type(colkey)}"
+                )
+            if self._usebool:
+                return keys_from_bool_dios_like(rowkey)
+            else:
+                return keys_from_dios_like(rowkey)
+        # handle gracefully: scalar
+        elif pdextra.is_hashable(colkey):
+            colkey = [colkey] if colkey in self.obj.columns else []
+            lowdim = True
+        # column-alignable: list-like, filter only existing columns
+        elif pdextra.is_list_like(colkey) and not pdextra.is_bool_indexer(colkey):
+            colkey = colkey.values if isinstance(colkey, pd.Series) else colkey
+            colkey = self.obj.columns.intersection(colkey)
+        # handle gracefully (automatically)
+        # just a simple optimisation
+        elif pdextra.is_null_slice(colkey):
+            colkey = self.obj.columns
+        # not alignable, fall back to .loc (boolean list/series, slice(..), etc.
+        else:
+            colkey = self._data.loc[colkey].index
+        if len(colkey) == 0:  # (!) `if not colkey:` fails for pd.Index
+            return [], [], lowdim
+        rowkey = self._get_rowkey(rowkey, colkey)
+        return rowkey, colkey, lowdim
+    def _get_rowkey(self, rowkey, colkey, depth=0):
+        if pdextra.is_nested_list_like(rowkey) and depth == 0:
+            rowkey = rowkey.values if isinstance(rowkey, pd.Series) else rowkey
+            if len(rowkey) != len(colkey):
+                raise ValueError(
+                    "Nested arrays indexer must have same (outer) "
+                    "length than the number of selected columns."
+                )
+            indexer = []
+            for i, c in enumerate(colkey):
+                # recurse to get the row indexer from inner element
+                indexer += self._get_rowkey(rowkey[i], [c], depth=depth + 1)
+            rowkey = indexer
+        # row-alignable: pd.Series(), align rows to every series in colkey (columns)
+        elif isinstance(rowkey, pd.Series):
+            if self._usebool and pdextra.is_bool_indexer(rowkey):
+                rowkey = [
+                    self._data.at[c].index.intersection(rowkey[rowkey].index)
+                    for c in colkey
+                ]
+            else:
+                rowkey = [
+                    self._data.at[c].index.intersection(rowkey.index) for c in colkey
+                ]
+        # handle gracefully: scalar, transform to row-slice
+        elif pdextra.is_hashable(rowkey):
+            rowkey = [slice(rowkey, rowkey)] * len(colkey)
+        # handle gracefully: list-like, filter only existing rows
+        # NOTE: dios.aloc[series.index] is processed here
+        elif pdextra.is_list_like(rowkey) and not pdextra.is_bool_indexer(rowkey):
+            rowkey = [self._data.at[c].index.intersection(rowkey) for c in colkey]
+        # not alignable
+        # the rowkey is processed by .loc someway in
+        # the calling function - (eg. slice(..), boolean list-like, etc.)
+        else:
+            rowkey = [rowkey] * len(colkey)
+        return rowkey
+# #############################################################################
+class _AtIndexer(_Indexer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _check_key(self, key):
+        if not (
+            isinstance(key, tuple)
+            and len(key) == 2
+            and pdextra.is_hashable(key[0])
+            and pdextra.is_hashable(key[1])
+        ):
+            raise KeyError(
+                f"{key}. `.at` takes exactly one scalar row-key "
+                "and one scalar column-key"
+            )
+    def __getitem__(self, key):
+        self._check_key(key)
+        return self._data.at[key[1]].at[key[0]]
+    def __setitem__(self, key, value):
+        self._check_key(key)
+        if _is_dios_like(value) or pdextra.is_nested_list_like(value):
+            raise TypeError(
+                ".at[] cannot be used to set multi-dimensional values, use .aloc[] instead."
+            )
+        self._data.at[key[1]].at[key[0]] = value
+# #############################################################################
+class _iAtIndexer(_Indexer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _check_key(self, key):
+        if not (
+            isinstance(key, tuple)
+            and len(key) == 2
+            and pdextra.is_integer(key[0])
+            and pdextra.is_integer(key[1])
+        ):
+            raise KeyError(
+                f"{key} `.iat` takes exactly one integer positional "
+                f"row-key and one integer positional scalar column-key"
+            )
+    def __getitem__(self, key):
+        self._check_key(key)
+        return self._data.iat[key[1]].iat[key[0]]
+    def __setitem__(self, key, value):
+        self._check_key(key)
+        if _is_dios_like(value) or pdextra.is_nested_list_like(value):
+            raise TypeError(
+                ".iat[] cannot be used to set multi-dimensional values, use .aloc[] instead."
+            )
+        self._data.iat[key[1]].iat[key[0]] = value
--- a/dios/dios/lib.py
+++ b/dios/dios/lib.py
+import pandas as pd
+import warnings
+class ItypeWarning(RuntimeWarning):
+    pass
+class ItypeCastWarning(ItypeWarning):
+    pass
+class ItypeCastError(RuntimeError):
+    pass
+class __Itype:
+    def __init__(self):
+        raise RuntimeError("a Itype class does not allow instances of itself.")
+class DtItype(__Itype):
+    name = "datetime"
+    unique = True
+    subtypes = (pd.DatetimeIndex,)
+    min_pdindex = pd.DatetimeIndex([])
+class IntItype(__Itype):
+    name = "integer"
+    unique = True
+    subtypes = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, int)
+    min_pdindex = pd.Int64Index([])
+class FloatItype(__Itype):
+    name = "float"
+    subtypes = (pd.Float64Index, float)
+    unique = True
+    min_pdindex = pd.Float64Index([])
+# class MultiItype(__Itype):
+#     name = "multi"
+#     subtypes = (pd.MultiIndex, )
+#     unique = ??
+class NumItype(__Itype):
+    name = "numeric"
+    _subitypes = (IntItype, FloatItype)
+    subtypes = _subitypes + IntItype.subtypes + FloatItype.subtypes
+    unique = False
+    min_pdindex = pd.Float64Index([])
+class ObjItype(__Itype):
+    name = "object"
+    unique = False
+    _subitypes = (DtItype, IntItype, FloatItype, NumItype, str)
+    _otheritypes = (
+        pd.CategoricalIndex,
+        pd.IntervalIndex,
+        pd.PeriodIndex,
+        pd.TimedeltaIndex,
+        pd.Index,
+    )
+    subtypes = _subitypes + _otheritypes + DtItype.subtypes + NumItype.subtypes
+    min_pdindex = pd.Index([])
+def is_itype(obj, itype):
+    """Check if obj is a instance of the given itype or its str-alias was given"""
+    # todo: iter through itype as it could be a tuple, if called like ``is_itype(o, (t1,t2))``
+    # user gave a Itype, like ``DtItype``
+    if type(obj) == type and issubclass(obj, itype):
+        return True
+    # user gave a string, like 'datetime'
+    if isinstance(obj, str) and obj == itype.name:
+        return True
+    return False
+def is_itype_subtype(obj, itype):
+    """Check if obj is a subclass or a instance of a subclass of the given itype"""
+    # user gave a subtype, like ``pd.DatetimeIndex``
+    if type(obj) == type and issubclass(obj, itype.subtypes):
+        return True
+    # user gave a instance of a subtype, like ``pd.Series(..).index``
+    if isinstance(obj, itype.subtypes):
+        return True
+    return False
+def is_itype_like(obj, itype):
+    """Check if obj is a subclass or a instance of the given itype or any of its subtypes"""
+    return is_itype(obj, itype) or is_itype_subtype(obj, itype)
+def get_itype(obj):
+    """
+    Return the according Itype.
+    and return the according Itype
+    Parameters
+    ----------
+    obj : {itype string, Itype, pandas.Index, instance of pd.index}
+        get the itype fitting for the input
+    Examples
+    --------
+    >>> get_itype("datetime")
+    <class 'dios.lib.DtItype'>
+    >>> s = pd.Series(index=pd.to_datetime([]))
+    >>> get_itype(s.index)
+    <class 'dios.lib.DtItype'>
+    >>> get_itype(DtItype)
+    <class 'dios.lib.DtItype'>
+    >>> get_itype(pd.DatetimeIndex)
+    <class 'dios.lib.DtItype'>
+    """
+    if type(obj) == type and issubclass(obj, __Itype):
+        return obj
+    # check if it is the actual type, not a subtype
+    types = [DtItype, IntItype, FloatItype, NumItype, ObjItype]
+    for t in types:
+        if is_itype(obj, t):
+            return t
+    for t in types:
+        if is_itype_subtype(obj, t):
+            return t
+    raise ValueError(
+        f"{obj} is not a itype, nor any known subtype of a itype, nor a itype string alias"
+    )
+def _itype_eq(a, b):
+    return is_itype(a, b)
+def _itype_lt(a, b):
+    return is_itype_subtype(a, b)
+def _itype_le(a, b):
+    return is_itype_like(a, b)
+def _find_least_common_itype(iterable_of_series):
+    itypes = [NumItype, FloatItype, IntItype, DtItype]
+    tlist = [get_itype(s.index) for s in iterable_of_series]
+    found = ObjItype
+    if tlist:
+        for itype in itypes:
+            for t in tlist:
+                if _itype_le(t, itype):
+                    continue
+                break
+            else:
+                found = itype
+    return found
+################################################################################
+# Casting
+class CastPolicy:
+    force = "force"
+    save = "save"
+    never = "never"
+_CAST_POLICIES = [CastPolicy.force, CastPolicy.save, CastPolicy.never]
+def cast_to_itype(series, itype, policy="lossless", err="raise", inplace=False):
+    """Cast a series (more explicit the type of the index) to fit the itype of a dios.
+    Return the casted series if successful, None otherwise.
+    Note:
+        This is very basic number-casting, so in most cases, information from
+        the old index will be lost after the cast.
+    """
+    if policy not in _CAST_POLICIES:
+        raise ValueError(f"policy={policy}")
+    if err not in ["raise", "ignore"]:
+        raise ValueError(f"err={err}")
+    if not inplace:
+        series = series.copy()
+    itype = get_itype(itype)
+    if series.empty:
+        return pd.Series(index=itype.min_pdindex, dtype=series.dtype)
+    series.itype = get_itype(series.index)
+    # up-cast issn't necessary because a dios with a higher
+    # itype always can take lower itypes.
+    # series can have dt/int/float/mixed
+    # dt    -> dt           -> mixed
+    # int   -> int   -> num -> mixed
+    # float -> float -> num -> mixed
+    # mixed                 -> mixed
+    if _itype_le(series.itype, itype):  # a <= b
+        return series
+    e = f"A series index of type '{type(series.index)}' cannot be casted to Itype '{itype.name}'"
+    # cast any -> dt always fail.
+    if is_itype(itype, DtItype):
+        pass
+    else:
+        e += f", as forbidden by the cast-policy '{policy}'."
+    if policy == CastPolicy.never:
+        pass
+    elif policy == CastPolicy.force:
+        # cast any (dt/float/mixed) -> int
+        if is_itype(itype, IntItype):  # a == b
+            series.index = pd.RangeIndex(len(series))
+            return series
+        # cast any (dt/int/mixed) -> float
+        # cast any (dt/float/mixed) -> nur
+        if is_itype(itype, FloatItype) or is_itype(itype, NumItype):  # a == b or a == c
+            series.index = pd.Float64Index(range(len(series)))
+            return series
+    elif policy == CastPolicy.save:
+        # cast int   -> float
+        if is_itype(itype, IntItype) and is_itype(
+            series.itype, FloatItype
+        ):  # a == b and c == d
+            series.index = series.index.astype(float)
+            return series
+        # cast float -> int, maybe if unique
+        if is_itype(itype, FloatItype) and is_itype(
+            series.itype, IntItype
+        ):  # a == b and c == d
+            series.index = series.index.astype(int)
+            if series.index.is_unique:
+                return series
+            e = (
+                f"The cast with policy {policy} from series index type '{type(series.index)}' to "
+                f"itype {itype.name} resulted in a non-unique index."
+            )
+        # cast mixed -> int/float always fail
+    if err == "raise":
+        raise ItypeCastError(e)
+    else:
+        return None
+################################################################################
+# OPTIONS
+class OptsFields:
+    """storage class for the keys in `dios_options`
+    Use like so: ``dios_options[OptsFields.X] = Opts.Y``.
+    See Also
+    --------
+        Opts: values for the options dict
+        dios_options: options dict for module
+    """
+    mixed_itype_warn_policy = "mixed_itype_policy"
+    disp_max_rows = "disp_max_rows "
+    disp_min_rows = "disp_min_rows "
+    disp_max_cols = "disp_max_vars"
+    dios_repr = "dios_repr"
+class Opts:
+    """storage class for string values for `dios_options`
+    Use like so: ``dios_options[OptsFields.X] = Opts.Y``.
+    See Also
+    --------
+        OptsFields: keys for the options dict
+        dios_options: options dict for module
+    """
+    itype_warn = "warn"
+    itype_err = "err"
+    itype_ignore = "ignore"
+    repr_aligned = "aligned"
+    repr_indexed = "indexed"
+class __DocDummy(dict):
+    pass
+dios_options = __DocDummy()
+dios_options.update(
+    **{
+        OptsFields.disp_max_rows: 60,
+        OptsFields.disp_min_rows: 10,
+        OptsFields.disp_max_cols: 10,
+        OptsFields.mixed_itype_warn_policy: Opts.itype_warn,
+        OptsFields.dios_repr: Opts.repr_indexed,
+    }
+)
+opdoc = f"""Options dictionary for module `dios`.
+Use like so: ``dios_options[OptsFields.X] = Opts.Y``.
+**Items**:
+ * {OptsFields.dios_repr}: {{'indexed', 'aligned'}} default: 'indexed'
+    dios default representation if:
+     * `indexed`:  show every column with its index
+     * `aligned`:  transform to pandas.DataFrame with indexed merged together.
+ * {OptsFields.disp_max_rows}  : int
+    Maximum numbers of row before truncated to `disp_min_rows`
+    in representation of DictOfSeries
+ * {OptsFields.disp_min_rows} : int
+    min rows to display if `max_rows` is exceeded
+ * {OptsFields.disp_max_cols} : int
+    Maximum numbers of columns before truncated representation
+ * {OptsFields.mixed_itype_warn_policy} : {{'warn', 'err', 'ignore'}}
+    How to inform user about mixed Itype
+See Also
+--------
+    OptsFields: keys for the options dict 
+    Opts: values for the options dict 
+"""
+dios_options.__doc__ = opdoc
+def _throw_MixedItype_err_or_warn(itype):
+    msg = (
+        f"Using '{itype.name}' as itype is not recommend. "
+        f"As soon as series with different index types are inserted,\n"
+        f"indexing and slicing will almost always fail. "
+    )
+    if dios_options[OptsFields.mixed_itype_warn_policy] in [
+        "ignore",
+        Opts.itype_ignore,
+    ]:
+        pass
+    elif dios_options[OptsFields.mixed_itype_warn_policy] in [
+        "error",
+        "err",
+        Opts.itype_err,
+    ]:
+        msg += "Suppress this error by specifying an unitary 'itype' or giving an 'index' to DictOfSeries."
+        raise ItypeCastError(msg)
+    else:
+        msg += "Silence this warning by specifying an unitary 'itype' or giving an 'index' to DictOfSeries."
+        warnings.warn(msg, ItypeWarning)
+    return
+def example_DictOfSeries():
+    """Return a example dios.
+    Returns
+    -------
+    DictOfSeries: an example
+    Examples
+    --------
+    >>> from dios import example_DictOfSeries
+    >>> di = example_DictOfSeries()
+    >>> di
+        a |      b |      c |     d |
+    ===== | ====== | ====== | ===== |
+    0   0 | 2    5 | 4    7 | 6   0 |
+    1   7 | 3    6 | 5   17 | 7   1 |
+    2  14 | 4    7 | 6   27 | 8   2 |
+    3  21 | 5    8 | 7   37 | 9   3 |
+    4  28 | 6    9 | 8   47 | 10  4 |
+    5  35 | 7   10 | 9   57 | 11  5 |
+    6  42 | 8   11 | 10  67 | 12  6 |
+    7  49 | 9   12 | 11  77 | 13  7 |
+    8  56 | 10  13 | 12  87 | 14  8 |
+    9  63 | 11  14 | 13  97 | 15  9 |
+    """
+    from dios import DictOfSeries
+    a = pd.Series(range(0, 70, 7))
+    b = pd.Series(range(5, 15, 1))
+    c = pd.Series(range(7, 107, 10))
+    d = pd.Series(range(0, 10, 1))
+    for i, s in enumerate([a, b, c, d]):
+        s.index += i * 2
+    di = DictOfSeries(dict(a=a, b=b, c=c, d=d))
+    return di.copy()
--- a/dios/dios/operators.py
+++ b/dios/dios/operators.py
+# do not import dios-stuff here
+import operator as op
+_OP1_MAP = {
+    op.inv: "~",
+    op.neg: "-",
+    op.abs: "abs()",
+}
+_OP2_COMP_MAP = {
+    op.eq: "==",
+    op.ne: "!=",
+    op.le: "<=",
+    op.ge: ">=",
+    op.gt: ">",
+    op.lt: "<",
+}
+_OP2_BOOL_MAP = {
+    op.and_: "&",
+    op.or_: "|",
+    op.xor: "^",
+}
+_OP2_ARITH_MAP = {
+    op.add: "+",
+    op.sub: "-",
+    op.mul: "*",
+    op.pow: "**",
+}
+_OP2_DIV_MAP = {
+    op.mod: "%",
+    op.truediv: "/",
+    op.floordiv: "//",
+}
+OP_MAP = _OP2_COMP_MAP.copy()
+OP_MAP.update(_OP2_BOOL_MAP)
+OP_MAP.update(_OP2_ARITH_MAP)
+OP_MAP.update(_OP2_DIV_MAP)
+OP_MAP.update(_OP1_MAP)
--- a/dios/dios/pandas_bridge.py
+++ b/dios/dios/pandas_bridge.py
+#!/usr/bin/env python
+__author__ = "Bert Palm"
+__email__ = "bert.palm@ufz.de"
+__copyright__ = "Copyright 2020, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
+from pandas.core.common import (
+    is_bool_indexer,
+    is_null_slice,
+)
+from pandas.core.dtypes.common import (
+    is_nested_list_like,
+)
+from pandas.api.types import (
+    is_list_like,
+    is_hashable,
+    is_integer,
+    is_dict_like,
+    is_scalar,
+    # Unlike the example says, return lists False, not True
+    # >>is_iterator([1, 2, 3])
+    # >>False
+    is_iterator,
+)
--- a/dios/docs/.gitignore
+++ b/dios/docs/.gitignore
+# ignore everything
+_api
+_build
+_static
+*.automodsumm
--- a/dios/docs/Makefile
+++ b/dios/docs/Makefile
+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile clean
+clean:
+	rm -rf _build _static _api
+	rm -f *.automodsumm
+	mkdir _static
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/dios/docs/conf.py
+++ b/dios/docs/conf.py
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath(".."))
+# -- Project information -----------------------------------------------------
+project = "dios"
+copyright = "2020, Bert Palm"
+author = "Bert Palm"
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    # "sphinx.ext.doctest",
+    # "sphinx.ext.extlinks",
+    # "sphinx.ext.todo",
+    # "sphinx.ext.intersphinx",
+    # "sphinx.ext.coverage",
+    # "sphinx.ext.mathjax",
+    # "sphinx.ext.ifconfig",
+    "sphinx.ext.autosectionlabel",
+    # link source code
+    "sphinx.ext.viewcode",
+    # add suupport for NumPy style docstrings
+    "sphinx.ext.napoleon",
+    # doc the whole module
+    "sphinx_automodapi.automodapi",
+    "sphinxcontrib.fulltoc",
+    # markdown sources support
+    "recommonmark",
+    "sphinx_markdown_tables",
+]
+numpydoc_show_class_members = False
+automodsumm_inherited_members = True
+automodapi_inheritance_diagram = False
+automodapi_toctreedirnm = "_api"
+# automodsumm_writereprocessed = True
+autosectionlabel_prefix_document = True
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+source_suffix = [".rst", ".md"]
+# -- Options for HTML output -------------------------------------------------
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = "nature"
+# use pandas theme
+# html_theme = "pydata_sphinx_theme"
+# html_theme_options = {
+# }
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
--- a/dios/docs/dios_api.rst
+++ b/dios/docs/dios_api.rst
+API
+====
+.. automodapi:: dios
+   :include-all-objects:
+   :no-heading: