diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 134fadf21f0066f65afebb3f2d89f48350684666..7a8fcc4e8109c933d04b672129aeb0b826c6eced 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,3 +1,6 @@
+variables:
+  GIT_SUBMODULE_STRATEGY: recursive
+
 before_script:
   - export DEBIAN_FRONTEND=noninteractive
   - apt-get -qq update
@@ -11,24 +14,14 @@ before_script:
   - eval "$(pyenv init -)"
 
 
-test:python36:
-  script:
-    - pyenv install 3.6.9
-    - pyenv shell 3.6.9
-    - pip install --upgrade pip
-    - pip install -r requirements.txt
-    - python -m pytest test
-    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv
-
-
 test:python37:
   script:
     - pyenv install 3.7.5
     - pyenv shell 3.7.5
     - pip install --upgrade pip
     - pip install -r requirements.txt
-    - python -m pytest test
-    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv
+    - python -m pytest --ignore test/lib test
+    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv
 
 
 test:python38:
@@ -37,5 +30,23 @@ test:python38:
     - pyenv shell 3.8.0
     - pip install --upgrade pip
     - pip install -r requirements.txt
-    - python -m pytest test
-    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv
+    - python -m pytest --ignore test/lib test
+    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv
+
+# Make html docu with sphinx
+pages:
+  stage: deploy
+  script:
+    - pyenv install 3.8.0
+    - pyenv shell 3.8.0
+    - pip install --upgrade pip
+    - pip install -r requirements.txt
+    - cd sphinx-doc/
+    - pip install -r requirements_sphinx.txt
+    - make html
+    - cp -r _build/html ../public
+  artifacts:
+    paths:
+      - public
+  only:
+    - develop
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 004e41e57478191f15b5eb1fb5522a48024410ee..10d3465f6d14b3a3ded89c4039ee39363a41a284 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,7 +61,28 @@
 
 # 1.4
 
-coming soon ...
+## Features
+- added the data processing module `proc_functions`
+- `flagCrossValidation` implemented
+- CLI: added support for parquet files
+
+## Bugfixes
+- `spikes_flagRaise` - overestimation of value courses average fixed
+- `spikes_flagRaise` - raise check window now closed on both sides
+
+## Refactorings
+- renamed `spikes_oddWater` to `spikes_flagMultivarScores`
+- added STRAY auto treshing algorithm to `spikes_flagMultivarScores`
+- added "unflagging" - postprocess to `spikes_flagMultivarScores`
+- improved and extended masking
+
+## Breaking Changes
+- register is now a decorator instead of a wrapper
+
+
+# 1.5
+
+coming soon...
 
 ## Features
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d10ea5b19bab0fe4d1a0adcda964defdc3162f1f..90a3e1f9826b61dc44c39346dafa71898b19dec4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,19 +1,6 @@
 # Development Environment
-We recommend an virtual python environment for development. The setup process consists of the follwing simply steps:
-
-1. Create a fresh environment with:
-   ```sh
-   python -m venv saqc_dev
-   ```
-2. Activate the created environment
-   ```
-   source saqc_dev/bin/activate
-   ```
-3. Install the dependencies
-   ```sh
-   python -m pip install -r requirements.txt
-   ```
- 
+We recommend a virtual python environment for development. The setup process is described in detail in our [GettingStarted](docs/GettingStarted.md).
+
 # Testing
 SaQC comes with an extensive test suite based on [pytest](https://docs.pytest.org/en/latest/).
 In order to run all tests execute:
@@ -26,7 +13,7 @@ python -m pytest .
 ## Naming
 
 ### Code
-We follow the follwing naming conventions
+We follow the follwing naming conventions:
 - Classes: CamelCase
 - Functions: camelCase
 - Variables/Arguments: snake_case
@@ -36,9 +23,9 @@ We follow the follwing naming conventions
  
 ## Formatting
 We use (black)[https://black.readthedocs.io/en/stable/] with a line length if 120 characters.
-Within the `SaQC` root directory run `black -l 120`
+Within the `SaQC` root directory run `black -l 120`.
 
 ## Imports
-Only absolute imports are accepted
+Only absolute imports are accepted.
 
 
diff --git a/README.md b/README.md
index 308250cbd333bffbff1d5aea96fcfc80a793929d..35c04b031c760a99ab2068823cf0d97a89129622 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,11 @@
+[![pipeline status](https://git.ufz.de/rdm-software/saqc/badges/develop/pipeline.svg)](https://git.ufz.de/rdm-software/saqc/-/commits/develop)
+
 # System for automated Quality Control (SaQC)
 
 Quality Control of numerical data requires a significant amount of
 domain knowledge and practical experience. Finding a robust setup of
 quality tests that identifies as many suspicious values as possible, without
-removing valid data, is usually a time-consuming and iterative endeavor,
+removing valid data, is usually a time-consuming endeavor,
 even for experts.
 
 SaQC is both, a Python framework and a command line application, that
@@ -13,8 +15,8 @@ and simple configuration system.
 
 Below its user interface, SaQC is highly customizable and extensible.
 A modular structure and well-defined interfaces make it easy to extend
-the system with custom quality checks and even core components, like
-the flagging scheme, are exchangeable.
+the system with custom quality checks. Furthermore, even core components like
+the flagging scheme are exchangeable.
 
 ![SaQC Workflow](ressources/images/readme_image.png "SaQC Workflow")
 
@@ -30,35 +32,66 @@ data processing.
 
 The main objective of SaQC is to bridge this gap by allowing both
 parties to focus on their strengths: The data collector/owner should be
-able to express his/her ideas in an easy and succinct way, while the actual
+able to express his/her ideas in an easy way, while the actual
 implementation of the algorithms is left to the respective developers.
 
 
 ## How?
-The most import aspect of SaQC, the [general configuration](docs/ConfigurationFiles.md)
-of the system, is text-based. All the magic takes place in a semicolon-separated
-table file listing the variables within the dataset and the routines to inspect,
-quality control and/or modify them.
 
-```
-varname    ; test                                ; plot
-#----------;-------------------------------------;------
-SM2        ; harm_shift2Grid(freq="15Min")       ; False
-SM2        ; flagMissing(nodata=NAN)             ; False
-'SM(1|2)+' ; flagRange(min=10, max=60)           ; False
-SM2        ; spikes_flagMad(window="30d", z=3.5) ; True
-```
+`SaQC` is both a command line application controlled by a text based configuration file and a python
+module with a simple API.
 
 While a good (but still growing) number of predefined and highly configurable
 [functions](docs/FunctionIndex.md) are included and ready to use, SaQC
-additionally ships with a python based for quality control but also general
-purpose data processing
-[extension language](docs/GenericFunctions.md).
+additionally ships with a python based
+[extension language](docs/GenericFunctions.md) for quality and general
+purpose data processing.
 
-For a more specific round trip to some of SaQC's possibilities, please refer to
+For a more specific round trip to some of SaQC's possibilities, we refer to
 our [GettingStarted](docs/GettingStarted.md).
 
 
+### SaQC as a command line application
+Most of the magic is controlled by a
+[semicolon-separated text file](saqc/docs/ConfigurationFiles.md) listing the variables of the
+dataset and the routines to inspect, quality control and/or process them.
+The content of such a configuration could look like this:
+
+```
+varname    ; test                                
+#----------;------------------------------------
+SM2        ; harm_shift2Grid(freq="15Min")       
+SM2        ; flagMissing(nodata=NAN)             
+'SM(1|2)+' ; flagRange(min=10, max=60)           
+SM2        ; spikes_flagMad(window="30d", z=3.5)
+```
+
+As soon as the basic inputs, a dataset and the configuration file are
+prepared, running SaQC is as simple as:
+```sh
+saqc \
+    --config path_to_configuration.txt \
+    --data path_to_data.csv \
+    --outfile path_to_output.csv
+```
+
+### SaQC as a python module
+
+The following snippet implements the same configuration given above through
+the Python-API:
+
+```python
+from saqc import SaQC, SimpleFlagger
+
+saqc = (SaQC(SimpleFlagger(), data)
+        .harm_shift2Grid("SM2", freq="15Min")
+        .flagMissing("SM2", nodata=np.nan)
+        .flagRange("SM(1|2)+", regex=True, min=10, max=60)
+        .spikes_flagMad("SM2", window="30d", z=3.5))
+        
+data, flagger = saqc.getResult()
+```
+
 ## Installation
 
 ### Python Package Index
@@ -67,6 +100,7 @@ can be installed using [pip](https://pip.pypa.io/en/stable/):
 ```sh
 python -m pip install saqc
 ```
+For a more detailed installion guide, see [GettingStarted](docs/GettingStarted.md).
 
 ### Anaconda
 Currently we don't provide pre-build conda packages but the installing of `SaQC`
@@ -86,30 +120,11 @@ straightforward:
 The latest development version is directly available from the
 [gitlab](https://git.ufz.de/rdm-software/saqc) server of the
 [Helmholtz Center for Environmental Research](https://www.ufz.de/index.php?en=33573).
-More details on how to setup an respective environment are available
-[here](CONTRIBUTING.md#development-environment)
+More details on how to install using the gitlab server are available
+[here](docs/GettingStarted.md).
 
 ### Python version
-The minimum Python version required is 3.6.
-
-
-## Usage
-### Command line interface (CLI)
-SaQC provides a basic CLI to get you started. As soon as the basic inputs,
-a dataset and the [configuration file](saqc/docs/ConfigurationFiles.md) are
-prepared, running SaQC is as simple as:
-```sh
-saqc \
-    --config path_to_configuration.txt \
-    --data path_to_data.csv \
-    --outfile path_to_output.csv
-```
-
-
-### Integration into larger workflows
-The main function is [exposed](saqc/core/core.py#L79) and can be used in within
-your own programs.
-
+The minimum Python version required is 3.7.
 
 ## License
 Copyright(c) 2019,
diff --git a/docs/ConfigurationFiles.md b/docs/ConfigurationFiles.md
index aa60017d3411c1b3e15ffe069cf991432348e2d3..64743bb40d211db2a51adfb5ad07c64bbcbdd7be 100644
--- a/docs/ConfigurationFiles.md
+++ b/docs/ConfigurationFiles.md
@@ -1,11 +1,10 @@
 # Configuration Files
-The behaviour of SaQC is completely controlled by a text based configuration file.
+The behaviour of SaQC can be completely controlled by a text based configuration file.
 
 ## Format
-SaQC expects its configuration files to be semicolon-separated text files with a
+SaQC expects configuration files to be semicolon-separated text files with a
 fixed header. Each row of the configuration file lists
-one variable and one or several test functions, which will be evaluated to
-procduce a result for the given variable.
+one variable and one or several test functions that are applied on the given variable.
 
 
 ### Header names
@@ -13,11 +12,11 @@ procduce a result for the given variable.
 The header names are basically fixed, but if you really insist in custom
 configuration headers have a look [here](saqc/core/config.py).
 
-| Name    | Data Type                                    | Description            | Optional |
+| Name    | Data Type                                    | Description            | Required |
 |---------|----------------------------------------------|------------------------|----------|
-| varname | string                                       | name of a variable     | no       |
-| test    | [function notation](#test-function-notation) | test function          | no       |
-| plot    | boolean (`True`/`False`)                     | plot the test's result | yes      |
+| varname | string                                       | name of a variable     | yes      |
+| test    | [function notation](#test-function-notation) | test function          | yes      |
+| plot    | boolean (`True`/`False`)                     | plot the test's result | no       |
 
 
 ### Test function notation
@@ -27,15 +26,15 @@ many other programming languages and looks like this:
 flagRange(min=0, max=100)
 ```
 Here the function `flagRange` is called and the values `0` and `100` are passed
-to the parameters `min` and `max` respectively. As we (currently) value readablity
+to the parameters `min` and `max` respectively. As we value readablity
 of the configuration more than conciseness of the extrension language, only
 keyword arguments are supported. That means that the notation `flagRange(0, 100)`
 is not a valid replacement for the above example.
 
 ## Examples
 ### Single Test
-Every row lists one test per variable, if you want to call multiple tests on
-a specific variable (and you probably want to), list them in separate rows
+Every row lists one test per variable. If you want to call multiple tests on
+a specific variable (and you probably want to), list them in separate rows:
 ```
 varname | test
 #-------|----------------------------------
@@ -47,7 +46,7 @@ y       | flagRange(min=-10, max=40)
 
 ### Multiple Tests
 A row lists multiple tests for a specific variable in separate columns. All test
-columns need to share the common prefix `test`.
+columns need to share the common prefix `test`:
 
 ```
 varname ; test_1                     ; test_2                    ; test_3
@@ -70,11 +69,9 @@ x       ; constants_flagBasic(window="3h")
 
 ### Plotting
 As the process of finding a good quality check setup is somewhat experimental, SaQC
-provides a possibility to plot the results of the test functions. In
-order to opt-into this feture add the optional columns `plot` and set it
-to `True` whenever you want to see the result of the evaluation. These plots are
-meant to provide a quick and easy visual evaluation of the test setup and not to
-yield 'publication-ready' results
+provides a possibility to plot the results of the test functions. To use this feature, add the optional column `plot` and set it
+to `True` for all results you want to plot. These plots are
+meant to provide a quick and easy visual evaluation of the test.
 ```
 varname ; test                             ; plot
 #-------;----------------------------------;-----
@@ -84,13 +81,12 @@ x       ; constants_flagBasic(window="3h") ; True
 y       ; flagRange(min=-10, max=40)`      ;
 ```
 
-### Regular Expressions
-Some of the most basic tests (e.g. checks for missing values or range tests) but
-also the more elaborated functions available (e.g. aggregation or interpolation
+### Regular Expressions in `varname` column
+Some of the tests (e.g. checks for missing values, range tests or interpolation
 functions) are very likely to be used on all or at least several variables of
 the processed dataset. As it becomes quite cumbersome to list all these
 variables seperately, only to call the same functions with the same
-parameters over and over again, SaQC supports regular expressions
+parameters, SaQC supports regular expressions
 within the `varname` column. Please not that a `varname` needs to be quoted 
 (with `'` or `"`) in order to be interpreted as a regular expression.
 
diff --git a/docs/Customizations.md b/docs/Customizations.md
index 464b36322129e7a00b104b6b791273eab6fa186f..af438fcf6dc622f0a3aacfa679fc8ab7712de83c 100644
--- a/docs/Customizations.md
+++ b/docs/Customizations.md
@@ -2,28 +2,26 @@
 SaQC comes with a continuously growing number of pre-implemented
 [quality check and processing routines](docs/FunctionIndex.md) and 
 flagging schemes. 
-For any sufficiently large use case however, chances are high, that the 
+For any sufficiently large use case however it is very likely that the 
 functions provided won't fulfill all your needs and requirements.
 
-Acknowledging our insufficiency to address all (im-)possible use cases, we 
-designed the system in a way, that makes it's extension and customization as 
-simple as possible. The main extensions options, namely 
+Acknowledging the impossibility to address all imaginable use cases, we 
+designed the system to allow for extensions and costumizations. The main extensions options, namely 
 [quality check routines](#custom-quality-check-routines)
 and the [flagging scheme](#custom-flagging-schemes)
 are described within this documents.
 
 ## Custom quality check routines
 In case you are missing quality check routines, you are of course very
-welcome to file an feature request issue on the project's
+welcome to file a feature request issue on the project's
 [gitlab repository](https://git.ufz.de/rdm-software/saqc). However, if 
-you are more the no-biggie-I-get-this-done-by-myself type of person,
+you are more the "no-way-I-get-this-done-by-myself" type of person,
 SaQC provides two ways to integrate custom routines into the system:
 1. The [extension language](docs/GenericFunctions.md)
 2. An [interface](#interface) to the evaluation machinery
 
 ### Interface
-In order to make a function usable within the evaluation framework of SaQC it needs 
-to implement the following interface:
+In order to make a function usable within the evaluation framework of SaQC the following interface is needed:
 
 ```python
 def yourTestFunction(
@@ -32,37 +30,35 @@ def yourTestFunction(
    flagger: saqc.flagger.BaseFlagger,
    *args: Any,
    **kwargs: Any
-   ) -> (pd.DataFrame, saqc.flagger.BaseFlagger)
+   ) -> (dios.DictOfSeries, saqc.flagger.BaseFlagger)
 ```
 
 #### Argument Descriptions
 
 | Name      | Description                                                                                      |
 |-----------|--------------------------------------------------------------------------------------------------|
-| `data`    | The actual dataset                                                                               |
-| `field`   | The field/column within `data`, the function is checking/processing                              |
-| `flagger` | A instance of a flagger, responsible for the translation of test results into quality attributes |
-| `args`    | Any other arguments needed to parameterize the function                                          |
-| `kwargs`  | Any other keyword arguments needed to parameterize the function                                  |
+| `data`    | The actual dataset.                                                                               |
+| `field`   | The field/column within `data`, that function is processing.                              |
+| `flagger` | An instance of a flagger, responsible for the translation of test results into quality attributes. |
+| `args`    | Any other arguments needed to parameterize the function.                                          |
+| `kwargs`  | Any other keyword arguments needed to parameterize the function.                                  |
 
 ### Integrate into SaQC
 In order make your function available to the system it needs to be registered. We provide the decorator 
-[`register`](saqc/functions/register.py) in the module `saqc.functions.register`, to integrate your 
-test functions into SaQC. A complete, yet useless example might
-look like that:
+[`register`](saqc/functions/register.py) in the module `saqc.functions.register` to integrate your 
+test functions into SaQC. Here is a complete dummy example:
 
 ```python
 from saqc.functions.register import register
 
-@register()
+@register
 def yourTestFunction(data, field, flagger, *args, **kwargs):
     return data, flagger
 ```
 
 ### Example
 The function [`flagRange`](saqc/funcs/functions.py) provides a simple, yet complete implementation of 
-a quality check routine. You might want to look into its implementation before you start writing your
-own.
+a quality check routine. You might want to look into its implementation as a reference for your own.
 
 
 ## Custom flagging schemes
diff --git a/docs/FlaggingSchemes.md b/docs/FlaggingSchemes.md
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..edca5253ca8747048e2c4f3738dc9da522726222 100644
--- a/docs/FlaggingSchemes.md
+++ b/docs/FlaggingSchemes.md
@@ -0,0 +1,10 @@
+# DMP flagging scheme
+
+## Possible flags
+
+The DMP scheme produces the following flag constants:
+
+*    "ok"
+*    "doubtfull"
+*    "bad"
+
diff --git a/docs/GenericFunctions.md b/docs/GenericFunctions.md
index ba3bfc3fc5fc577734778abc00bd4bd1a19faeb8..9f91d4fd53385f2b7f7f795ad3b83c5ee21085bc 100644
--- a/docs/GenericFunctions.md
+++ b/docs/GenericFunctions.md
@@ -2,11 +2,11 @@
 
 ## Generic Flagging Functions
 
-Generic flagging functions provide a way to leverage cross-variable quality
+Generic flagging functions provide for cross-variable quality
 constraints and to implement simple quality checks directly within the configuration.
 
 ### Why?
-The underlying idea is, that in most real world datasets many errors
+In most real world datasets many errors
 can be explained by the dataset itself. Think of a an active, fan-cooled
 measurement device: no matter how precise the instrument may work, problems
 are to be expected when the fan stops working or the power supply 
@@ -23,9 +23,8 @@ flagGeneric(func=<expression>, flag=<flagging_constant>)
 ```
 where `<expression>` is composed of the [supported constructs](#supported-constructs)
 and `<flag_constant>` is one of the predefined
-[flagging constants](ParameterDescriptions.md#flagging-constants) (default: `BAD`)
-Generic flagging functions are expected to evaluate to a boolean value, i.e. only 
-constructs returning `True` or `False` are accepted. All other expressions will
+[flagging constants](ParameterDescriptions.md#flagging-constants) (default: `BAD`).
+Generic flagging functions are expected to return a boolean value, i.e. `True` or `False`. All other expressions will
 fail during the runtime of `SaQC`.
 
 
@@ -34,7 +33,7 @@ fail during the runtime of `SaQC`.
 #### Simple comparisons
 
 ##### Task
-Flag all values of variable `x` when variable `y` falls below a certain threshold
+Flag all values of `x` where `y` falls below 0.
 
 ##### Configuration file
 ```
@@ -46,7 +45,7 @@ x       ; flagGeneric(func=y < 0)
 #### Calculations
 
 ##### Task
-Flag all values of variable `x` that exceed 3 standard deviations of variable `y`
+Flag all values of `x` that exceed 3 standard deviations of `y`.
 
 ##### Configuration file
 ```
@@ -57,7 +56,7 @@ x       ; flagGeneric(func=x > std(y) * 3)
 #### Special functions
 
 ##### Task
-Flag variable `x` where variable `y` is flagged and variable `x` has missing values
+Flag all values of `x` where: `y` is flagged and `z` has missing values.
 
 ##### Configuration file
 ```
@@ -67,7 +66,7 @@ x       ; flagGeneric(func=isflagged(y) & ismissing(z))
 ```
 
 #### A real world example
-Let's consider a dataset like the following:
+Let's consider the following dataset:
 
 | date             | meas | fan | volt |
 |------------------|------|-----|------|
@@ -78,11 +77,11 @@ Let's consider a dataset like the following:
 | ...              |      |     |      |
 
 ##### Task
-Flag variable `meas` where variable `fan` equals 0 and variable `volt`
+Flag `meas` where `fan` equals 0 and `volt`
 is lower than `12.0`.
 
 ##### Configuration file
-We can directly implement the condition as follows:
+There are various options. We can directly implement the condition as follows:
 ```
 varname ; test
 #-------;-----------------------------------------------
diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md
index ea68522c44179f7d2897bb44413c940803e4e64f..c034992821df5a9fa9f310748d801ccfeeb3c7f3 100644
--- a/docs/GettingStarted.md
+++ b/docs/GettingStarted.md
@@ -1,7 +1,7 @@
 # Getting started with SaQC
 
-This "getting started" assumes that you have Python version 3.6 or 3.7
-installed.
+Requirements: this tutorial assumes that you have Python version 3.6.1 or newer
+installed, and that both your operating system and Python version are in 64-bit.
 
 ## Contents
 
@@ -25,7 +25,10 @@ for your needs is using the Python Package Index (PyPI). Following good Python
 practice, you will first want to create a new virtual environment that you
 install SaQC into by typing the following in your console:
 
-```sh	
+
+##### On Unix/Mac-systems
+
+```sh
 # if you have not installed venv yet, do so:
 python3 -m pip install --user virtualenv
 	
@@ -34,23 +37,69 @@ cd YOURDIR
 	
 # create virtual environment called "env_saqc"
 python3 -m venv env_saqc
-	
+
 # activate the virtual environment
 source env_saqc/bin/activate
 ```
 
-Note that these instructions are for Unix/Mac-systems, the commands will be a
-little different for Windows.
+##### On Windows-systems
+
+```sh	
+# if you have not installed venv yet, do so:
+py -3 -m pip install --user virtualenv
+	
+# move to the directory where you want to create your virtual environment
+cd YOURDIR
+	
+# create virtual environment called "env_saqc"
+py -3 -m venv env_saqc
+
+# move to the Scripts directory in "env_saqc"
+cd env_saqc/Scripts
+
+# activate the virtual environment
+./activate
+```
 
 ## 2. Get SaQC
 
-Now get saqc via PyPI as well:
+### Via PyPI
+
+Type the following:
+
+##### On Unix/Mac-systems
+
 
 ```sh
-python -m pip install saqc
+python3 -m pip install saqc
+```
+
+##### On Windows-systems
+
+
+```sh
+py -3 -m pip install saqc
+```
+
+
+### From Gitlab repository
+
+Download SaQC directly from the [GitLab-repository](https://git.ufz.de/rdm/saqc) to make sure you use the most recent version:
+
+```sh
+# clone gitlab - repository
+git clone https://git.ufz.de/rdm-software/saqc
+
+# switch to the folder where you installed saqc
+cd saqc 
+
+# install all required packages 
+pip install -r requirements.txt
+
+# install all required submodules
+git submodule update --init --recursive 
 ```
 
-or download it directly from the [GitLab-repository](https://git.ufz.de/rdm/saqc).
 
 ## 3. Training tour
 
@@ -98,10 +147,21 @@ flags that are set during one test are always passed on to the subsequent one.
 ### Run SaQC
 
 Remember to have your virtual environment activated:
+
+##### On Unix/Mac-systems
+
 ```sh
 source env_saqc/bin/activate
 ```
 
+##### On Windows
+
+```sh
+cd env_saqc/Scripts
+./activate
+```
+
+
 Via your console, move into the folder you downloaded saqc into:
 ```sh
 cd saqc
@@ -109,11 +169,18 @@ cd saqc
 
 From here, you can run saqc and tell it to run the tests from the toy
 config-file on the toy dataset via the `-c` and `-d` options:
+##### On Unix/Mac-systems
 ```sh
-saqc -c ressources/data/myconfig.csv -d ressources/data/data.csv
+python3 -m saqc -c ressources/data/myconfig.csv -d ressources/data/data.csv
 ```
+##### On Windows
+```sh
+py -3 -m saqc -c ressources/data/myconfig.csv -d ressources/data/data.csv
+```
+
+If you installed saqc via PYPi, you can omit ```sh python -m```.
 
-Which will output this plot:
+The command will output this plot:
 
 ![Toy Plot](../ressources/images/example_plot_1.png "Toy Plot")
 
diff --git a/docs/ParameterDescriptions.md b/docs/ParameterDescriptions.md
index 15eccf64ac0609267f0fcefe8e3973f472112c59..8fcfa0511100177240701bb9338174bf4dfde27a 100644
--- a/docs/ParameterDescriptions.md
+++ b/docs/ParameterDescriptions.md
@@ -25,8 +25,8 @@ The following flag constants are available and can be used to mark the quality o
 | `BAD`       | At least on test failed on the values and is therefore considered to be invalid               |
 | `UNFLAGGED` | The value has not got a flag yet. This might mean, that all tests passed or that no tests ran |
 
-How these aliases will be translated into 'real' flags (output of SaQC) dependes on the flagger implementation
-and might range from numerical values to string concstants
+How these aliases will be translated into 'real' flags (output of SaQC) dependes on the [flagging scheme](FlaggingSchemes.md)
+and might range from numerical values to string constants.
 
 ### Numerical Constants
 | Alias    | Description  |
diff --git a/docs/funcs/ConstantDetection.md b/docs/funcs/ConstantDetection.md
index 9fd8185df05dde06af4acc9ba4f5cad9ebab8031..a0f4e580945ca503fdf8608f99fa7e974e72b575 100644
--- a/docs/funcs/ConstantDetection.md
+++ b/docs/funcs/ConstantDetection.md
@@ -17,7 +17,7 @@ constants_flagBasic(window, thresh=0)
 | parameter | data type                                                             | default value | description                                                                                                                  |
 |-----------|-----------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------|
 | window    | integer/[offset string](docs/ParameterDescriptions.md#offset-strings) |               | The minimum count or duration in which the values must be constant to be considered as plateau candidates. See condition (1) |
-| thresh    | float                                                                 |             0 | The maximum difference between values to still considered as constant. See condition (2)                                     |
+| thresh    | float                                                                 |             0 | The maximum difference between values to be still considered as constant. See condition (2)                                     |
 
 This functions flags plateaus/series of constant values of length `window` if
 their difference is smaller than `thresh`.
diff --git a/docs/funcs/FormalDescriptions.md b/docs/funcs/FormalDescriptions.md
new file mode 100644
index 0000000000000000000000000000000000000000..12f286add9f9c5a488025d00b6d232eb02c2f38c
--- /dev/null
+++ b/docs/funcs/FormalDescriptions.md
@@ -0,0 +1,86 @@
+# Mathematical descriptions
+
+A collection of detailed mathematical descriptions.
+
+## Index
+
+- [spikes_flagRaise](#spikes_flagraise)
+- [spikes_flagSpektrumBased](#spikes_flagspektrumbased)
+- [breaks_flagSpektrumBased](#breaks_flagspektrumbased)
+- [sm_flagConstants](#sm_flagconstants)
+
+
+## spikes_flagRaise
+
+The value $`x_{k}`$ of a time series $`x`$ with associated 
+timestamps $`t_i`$, is flagged a rise, if:
+
+1. There is any value $`x_{s}`$, preceeding $`x_{k}`$ within `raise_window` range, so that:
+    * $` M = |x_k - x_s | > `$  `thresh` $` > 0`$ 
+2. The weighted average $`\mu^*`$ of the values, preceeding $`x_{k}`$ within `average_window` range indicates, that $`x_{k}`$ doesnt return from an outliererish value course, meaning that:  
+    * $` x_k > \mu^* + ( M `$ / `mean_raise_factor` $`)`$  
+3. Additionally, if `min_slope` is not `None`, $`x_{k}`$ is checked for being sufficiently divergent from its very predecessor $`x_{k-1}`$, meaning that, it is additionally checked if: 
+    * $`x_k - x_{k-1} > `$ `min_slope` 
+    * $`t_k - t_{k-1} > `$ `min_slope_weight`*`intended_freq`
+
+The weighted average $`\mu^*`$ was calculated with weights $`w_{i}`$, defined by: 
+* $`w_{i} = (t_i - t_{i-1})`$ / `intended_freq`, if $`(t_i - t_{i-1})`$ < `intended_freq` and $`w_i =1`$ otherwise. 
+
+
+
+The value $`x_{k}`$ of a time series $`x_t`$ with 
+timestamps $`t_i`$ is considered a spikes, if:
+
+
+## spikes_flagSpektrumBased
+
+
+1. The quotient to its preceding data point exceeds a certain bound:
+    * $` |\frac{x_k}{x_{k-1}}| > 1 + `$ `raise_factor`, or
+    * $` |\frac{x_k}{x_{k-1}}| < 1 - `$ `raise_factor`
+2. The quotient of the second derivative $`x''`$, at the preceding
+   and subsequent timestamps is close enough to 1:
+    * $` |\frac{x''_{k-1}}{x''_{k+1}} | > 1 - `$ `deriv_factor`, and
+    * $` |\frac{x''_{k-1}}{x''_{k+1}} | < 1 + `$ `deriv_factor`
+3. The dataset $`X = x_i, ..., x_{k-1}, x_{k+1}, ..., x_j`$, with 
+   $`|t_{k-1} - t_i| = |t_j - t_{k+1}| =`$ `noise_window` fulfills the 
+   following condition: 
+   `noise_func`$`(X) <`$ `noise_thresh`
+
+## breaks_flagSpektrumBased
+
+A value $`x_k`$ of a time series $`x_t`$ with timestamps $`t_i`$, is considered to be a break, if:
+
+1. $`x_k`$ represents a sufficiently large relative jump:
+
+   $`|\frac{x_k - x_{k-1}}{x_k}| >`$ `thresh_rel`
+
+2. $`x_k`$ represents a sufficient absolute jump:
+
+   $`|x_k - x_{k-1}| >`$ `thresh_abs`
+
+3. The dataset $`X = x_i, ..., x_{k-1}, x_{k+1}, ..., x_j`$, with $`|t_{k-1} - t_i| = |t_j - t_{k+1}| =`$ `first_der_window`
+   fulfills the following condition:
+   
+   $`|x'_k| >`$ `first_der_factor` $` \cdot \bar{X} `$
+   
+   where $`\bar{X}`$ denotes the arithmetic mean of $`X`$.
+
+4. The ratio (last/this) of the second derivatives is close to 1:
+
+   $` 1 -`$ `scnd_der_ratio_margin_1` $`< |\frac{x''_{k-1}}{x_{k''}}| < 1 + `$`scnd_der_ratio_margin_1`
+
+5. The ratio (this/next) of the second derivatives is sufficiently height:
+
+   $`|\frac{x''_{k}}{x''_{k+1}}| > `$`scnd_der_ratio_margin_2`
+   
+## sm_flagConstants   
+
+Any set of consecutive values
+$`x_k,..., x_{k+n}`$, of a time series $`x`$ is flagged, if:
+
+1. $`n > `$`window`
+2. $`\sigma(x_k, x_{k+1},..., x_{k+n}) < `$`thresh`
+3. $`\max(x'_{k-n-s}, x'_{k-n-s+1},..., x'_{k-n+s}) \geq`$ `deriv_min`, with $`s`$ denoting periods per `precipitation_window`
+4. $`\min(x'_{k-n-s}, x'_{k-n-s+1},..., x'_{k-n+s}) \leq`$ `deriv_max`, with $`s`$ denoting periods per `precipitation_window`
+5. $`\mu(x_k, x_{k+1},..., x_{k+n}) \le \max(x) \cdot`$ `tolerance`   
\ No newline at end of file
diff --git a/docs/funcs/Miscellaneous.md b/docs/funcs/Miscellaneous.md
index 8865a60606a6a1d72ef97047e9b2d5a85a3fa4fb..f5ad537241171171d76dc89e44f1a3b809198cd9 100644
--- a/docs/funcs/Miscellaneous.md
+++ b/docs/funcs/Miscellaneous.md
@@ -7,11 +7,13 @@ A collection of unrelated quality check functions.
 - [flagRange](#flagrange)
 - [flagSeasonalRange](#flagseasonalrange)
 - [flagIsolated](#flagisolated)
+- [flagPattern](#flagpattern)
 - [flagMissing](#flagmissing)
 - [clearFlags](#clearflags)
 - [forceFlags](#forceflags)
 
 
+
 ## flagRange
 
 ```
@@ -88,9 +90,32 @@ flagMissing(nodata=NaN)
 | --------- | ---------- | -------------- | -----------                       |
 | nodata    | any        | `NAN`          | A value that defines missing data |
 
-
 The function flags all values indicating missing data.
 
+
+
+
+## flagPattern
+
+```                            
+flagPattern(ref_datafield, sample_freq = '15 Min', method = 'dtw', min_distance = None)
+``` 
+
+
+| parameter             | data type                                                     | default value | description                                                                                                                                                |
+|-----------------------|---------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ref_datafield         | string                                                        |               |Name of the reference datafield = "pattern"                                           |
+| sample_freq         | string                                                         | `"15 Min"`        |Sample frequency to harmonize the data                                   |
+| method                | string                                                        | `"dtw "`      |"dtw" for Dynamic Time Warping (DTW), "wavelet" for Wavelet Pattern Recognition Algorithm                                                          |
+| min_distance          | float                                                         | `None`        |For DTW - alogrithm: the minimum distance of two graphs in order to be classified as "different"                                      |
+
+
+Implementation of the pattern recognition algorithms introduced in [Pattern Recognition](https://git.ufz.de/rdm-software/saqc/-/wikis/Pattern-Recognition). 
+
+
+
+
+
 ## clearFlags
 
 ```
diff --git a/docs/funcs/SoilMoisture.md b/docs/funcs/SoilMoisture.md
index 107371e2de4eebd398c984bfcb3d77cb16b50215..705456715f6f2ca78525df44a0bb86ca304c4b76 100644
--- a/docs/funcs/SoilMoisture.md
+++ b/docs/funcs/SoilMoisture.md
@@ -31,7 +31,7 @@ sm_flagSpikes(raise_factor=0.15, deriv_factor=0.2,
 | smooth_window   | [offset string](docs/ParameterDescriptions.md#offset-strings) | `"3h"`        |
 | smooth_poly_deg | integer                                                       | `2`           |
 
-The Function is a wrapper around `spikes_flagSpektrumBased`
+The function is a wrapper around `spikes_flagSpektrumBased`
 with a set of default parameters referring to [1]. For a complete description of 
 the algorithm and the available parameters please refer to the documentation of 
 [flagSpikes_spektrumBased](docs/funcs/SpikeDetection.md#spikes_spektrumbased)
@@ -63,7 +63,7 @@ sm_flagBreaks(thresh_rel=0.1, thresh_abs=0.01,
 | smooth_poly_deg       | integer                                                       | `2`           |
 
 
-The Function is a wrapper around `breaks_flagSpektrumBased`
+The function is a wrapper around `breaks_flagSpektrumBased`
 with a set of default parameters referring to [1]. For a complete description of 
 the algorithm and the available parameters please refer to the documentation of 
 [breaks_spektrumBased](docs/funcs/BreakDetection.md#breaks_spektrumbased).
@@ -119,7 +119,7 @@ NOTE:
 - The time series is expected to be harmonized to an
   [equidistant frequency grid](docs/funcs/TimeSeriesHarmonization.md)
 
-This Function is based on [1] and all default parameter values are taken from this publication.
+This function is based on [1] and all default parameter values are taken from this publication.
 
 [1] Dorigo, W. et al: Global Automated Quality Control of In Situ Soil Moisture Data
     from the international Soil Moisture Network. 2013. Vadoze Zone J.
@@ -143,7 +143,7 @@ This function flags soil moisture values if the soil temperature
 (given in `soil_temp_variable`) drops below `frost_thresh`
 within a period of +/- `window`.
 
-This Function is an implementation of the soil temperature based flagging
+This function is an implementation of the soil temperature based flagging
 presented in [1] and all default parameter values are taken from this
 publication.
 
@@ -199,7 +199,7 @@ is flagged, if:
    $` y_{k-j} + y_{k-j+1} + ... + y_{k} \le `$ `sensor_depth` $`\cdot`$ `sensor_accuracy` $`\cdot`$ `soil_porosity`
    
 
-This Function is an implementation of the precipitation based flagging
+This function is an implementation of the precipitation based flagging
 presented in [1] and all default parameter values are taken from this
 publication.
 
@@ -222,7 +222,7 @@ sm_flagRandomForest(references, window_values, window_flags, path)
 | path          | string                    |               | Path to the respective model object, i.e. its name and the respective value of the grouping variable. e.g. "models/model_0.2.pkl" |
 
 
-This Function uses pre-trained machine-learning model objects for flagging. 
+This function uses pre-trained machine-learning model objects for flagging. 
 This requires training a model by use of the [training script](../ressources/machine_learning/train_machine_learning.py) provided. 
 For flagging, inputs to the model are the data of the variable of interest, 
 data of reference variables and the automatic flags that were assigned by other 
diff --git a/environment.yml b/environment.yml
index c08ddee3afdb51bd620f7b7c36e338dac72d42df..aabe58efcf1dab30195217d3e75e53abe969cf63 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,6 +10,10 @@ dependencies:
   - click
   - scikit-learn
   - pyarrow
+  - PyWavelets
   - pip
   - pip:
     - python-intervals
+    - dtw
+    - mlxtend
+    - outlier-utils
diff --git a/requirements.txt b/requirements.txt
index 654dadb61ba251d53ae2d9c51f93079eddfed1d5..23e059f68308f73f7ac60e41726d1f791a686ce0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,28 +1,35 @@
-attrs==19.3.0
-Click==7.0
+attrs==20.3.0
+Click==7.1.2
 cycler==0.10.0
-importlib-metadata==1.5.0
-joblib==0.14.1
-kiwisolver==1.1.0
-llvmlite==0.31.0
-matplotlib==3.1.3
-more-itertools==8.2.0
-numba==0.48.0
-numpy==1.18.1
-packaging==20.1
-pandas==1.0.1
+dios==0.6.0
+dtw==1.4.0
+kiwisolver==1.3.1
+llvmlite==0.35.0
+importlib-metadata==3.3.0
+joblib==1.0.0
+matplotlib==3.3.3
+mlxtend==0.18.0
+more-itertools==8.6.0
+numba==0.52.0
+numpy==1.19.4
+outlier==0.2
+utils==1.0.1
+outlier-utils==0.0.3
+packaging==20.8
+pandas==1.1.4
 pluggy==0.13.1
-py==1.8.1
-pyarrow==0.16.0
-pyparsing==2.4.6
+pyparsing==2.4.7
+py==1.10.0
+pyarrow==1.0.1
 pytest-lazy-fixture==0.6.3
-pytest==5.3.5
+pytest==6.2.1
 python-dateutil==2.8.1
-python-intervals==1.10.0
-pytz==2019.3
-scikit-learn==0.22.1
-scipy==1.4.1
-six==1.14.0
-wcwidth==0.1.8
-zipp==2.2.0
+python-intervals==1.10.0.post1
+pytz==2020.5
+PyWavelets==1.1.1
+zipp==3.4.0
+wcwidth==0.2.5
+scipy==1.6.0
+scikit-learn==0.23.2
+six==1.15.0
 astor==0.8.1
diff --git a/ressources/data/config_ci.csv b/ressources/data/config_ci.csv
index 58ec29a58eb50edde95a73b78c8383688b937de5..f631338ade105552e37c61d16ea72aab50dab106 100644
--- a/ressources/data/config_ci.csv
+++ b/ressources/data/config_ci.csv
@@ -1,7 +1,7 @@
 varname;test;plot
 SM2;harm_shift2Grid(freq="15Min");False
-SM1;flagRange(min=10, max=60);False
+'.*';flagRange(min=10, max=60);False
 SM2;flagMissing(nodata=NAN);False
 SM2;flagRange(min=10, max=60);False
 SM2;spikes_flagMad(window="30d", z=3.5);False
-Dummy;flagGeneric(func=isflagged(SM1) & isflagged(SM2) & (SM1 > 0))
+Dummy;flagGeneric(func=(isflagged(SM1) | isflagged(SM2)))
diff --git a/ressources/machine_learning/train_machine_learning.py b/ressources/machine_learning/train_machine_learning.py
index e7f9272dab21e11ec18e19620dcca5c664887e4b..13f80d21e0cc7fd7096e1704913b603816de2f00 100644
--- a/ressources/machine_learning/train_machine_learning.py
+++ b/ressources/machine_learning/train_machine_learning.py
@@ -75,7 +75,7 @@ def trainML(
 
     def _refCalc(reference, window_values):
         # Helper function for calculation of moving window values
-        outdata = pd.DataFrame()
+        outdata = dios.DictOfSeries()
         name = reference.name
         # derive gradients from reference series
         outdata[name + "_Dt_1"] = reference - reference.shift(1)  # gradient t vs. t-1
@@ -97,7 +97,7 @@ def trainML(
     # define Test/Training
     data = data.assign(TeTr="Tr")
     # create empty df for training data
-    traindata = pd.DataFrame()
+    traindata = dios.DictOfSeries()
     # calculate windows
     for sensor_id in data[sensor_field].unique():
         print(sensor_id)
@@ -194,7 +194,7 @@ def trainML(
 
     endtime = time.time()
     print("TIME ELAPSED: " + str(datetime.timedelta(seconds=endtime - starttime)) + " hours")
-    outinfo_df = pd.DataFrame.from_records(
+    outinfo_df = dios.DictOfSeries.from_records(
         outinfo_df,
         columns=[
             group_field,
diff --git a/saqc/__init__.py b/saqc/__init__.py
index bf7de72ee0f4a61580ee9122d237fb0cb321e9f5..ddc4f2f02f3121c21a65a3f60c43986e657a2413 100644
--- a/saqc/__init__.py
+++ b/saqc/__init__.py
@@ -1,8 +1,8 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
-__version__ = "1.3.0"
+__version__ = "1.4"
 
-from saqc.core.core import run
+from saqc.core.core import SaQC
 from saqc.flagger import *
-from saqc.funcs import register
+from saqc.core.register import register
diff --git a/saqc/__main__.py b/saqc/__main__.py
index 20cb2ec562be34054307c06f4015afcaf23f5009..806377faa01a955c8105ace70425a706ae5ebdbc 100644
--- a/saqc/__main__.py
+++ b/saqc/__main__.py
@@ -1,14 +1,22 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import logging
+from functools import partial
+from pathlib import Path
+
 import click
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 
-from saqc.core import run
+from saqc.core import SaQC
 from saqc.flagger import CategoricalFlagger
-from saqc.flagger.dmpflagger import DmpFlagger, FlagFields
+from saqc.flagger.dmpflagger import DmpFlagger
+
+
+logger = logging.getLogger("SaQC")
 
 
 FLAGGERS = {
@@ -18,6 +26,43 @@ FLAGGERS = {
 }
 
 
+def _setup_logging(loglvl):
+    logger.setLevel(loglvl)
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter("[%(asctime)s][%(name)s][%(levelname)s]: %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+
+def setupIO(nodata):
+    reader = {
+        ".csv"     : partial(pd.read_csv, index_col=0, parse_dates=True),
+        ".parquet" : pd.read_parquet
+    }
+
+    writer = {
+        ".csv" : partial(pd.DataFrame.to_csv, header=True, index=True, na_rep=nodata),
+        ".parquet" : lambda df, outfile: pa.parquet.write_table(pa.Table.from_pandas(df), outfile)
+    }
+    return reader, writer
+
+
+def readData(reader_dict, fname):
+    extension = Path(fname).suffix
+    reader = reader_dict.get(extension)
+    if not reader:
+        raise ValueError(f"Unsupported file format '{extension}', use one of {tuple(reader.keys())}")
+    return reader(fname)
+
+
+def writeData(writer_dict, df, fname):
+    extension = Path(fname).suffix
+    writer = writer_dict.get(extension)
+    if not writer:
+        raise ValueError(f"Unsupported file format '{extension}', use one of {tuple(writer.keys())}")
+    writer(df, fname)
+
+
 @click.command()
 @click.option(
     "-c", "--config", type=click.Path(exists=True), required=True, help="path to the configuration file",
@@ -36,34 +81,35 @@ FLAGGERS = {
 @click.option("--fail/--no-fail", default=True, help="whether to stop the program run on errors")
 def main(config, data, flagger, outfile, nodata, log_level, fail):
 
-    data = pd.read_csv(data, index_col=0, parse_dates=True,)
+    _setup_logging(log_level)
+    reader, writer = setupIO(nodata)
 
-    data_result, flagger_result = run(
-        config_file=config,
-        flagger=FLAGGERS[flagger],
-        data=data,
-        nodata=nodata,
-        log_level=log_level,
-        error_policy="raise" if fail else "warn",
-    )
+    data = readData(reader, data)
 
-    if outfile:
-        flags = flagger_result.getFlags()
-        flags_out = flags.where((flags.isnull() | flagger_result.isFlagged()), flagger_result.GOOD)
+    saqc = SaQC(flagger=FLAGGERS[flagger], data=data, nodata=nodata, error_policy="raise" if fail else "warn",)
 
-        if isinstance(flagger_result, DmpFlagger):
-            flags = flagger_result._flags
-            flags.loc[flags_out.index, (slice(None), FlagFields.FLAG)] = flags_out.values
-            flags_out = flags
+    data_result, flagger_result = saqc.readConfig(config).getResult(raw=True)
 
-        if not isinstance(flags_out.columns, pd.MultiIndex):
-            flags_out.columns = pd.MultiIndex.from_product([flags.columns, ["flag"]])
+    if outfile:
+        data_result = data_result.to_df()
+        flags = flagger_result.flags.to_df()
+        flags_flagged = flagger_result.isFlagged().to_df()
 
-        data_result.columns = pd.MultiIndex.from_product([data_result.columns, ["data"]])
+        flags_out = flags.where((flags.isnull() | flags_flagged), flagger_result.GOOD)
+        fields = {"data": data_result, "flags": flags_out}
 
-        # flags_out.columns = flags_out.columns.map("_".join)
-        data_out = data_result.join(flags_out)
-        data_out.sort_index(axis="columns").to_csv(outfile, header=True, index=True, na_rep=nodata)
+        if isinstance(flagger_result, DmpFlagger):
+            fields["quality_flag"] = fields.pop("flags")
+            fields["quality_comment"] = flagger_result.comments.to_df()
+            fields["quality_cause"] = flagger_result.causes.to_df()
+
+        out = (
+            pd.concat(fields.values(), axis=1, keys=fields.keys())
+            .reorder_levels(order=[1, 0], axis=1)
+            .sort_index(axis=1, level=0, sort_remaining=False)
+        )
+        out.columns = out.columns.rename(["", ""])
+        writeData(writer, out, outfile)
 
 
 if __name__ == "__main__":
diff --git a/saqc/core/__init__.py b/saqc/core/__init__.py
index c319709cfae130c3d142f355e1ea43d99db29bdf..2f42342431e06979befbf39bbdf2e300b38ef2aa 100644
--- a/saqc/core/__init__.py
+++ b/saqc/core/__init__.py
@@ -1,4 +1,5 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from saqc.core.core import run
+from saqc.core.core import SaQC, logger
+from saqc.core.register import register
diff --git a/saqc/core/config.py b/saqc/core/config.py
index 89a598f86051a0e0904e49c3f66c90b90538577a..e6a9e9f6307f8b2de08f560df56dc691e30f8e86 100644
--- a/saqc/core/config.py
+++ b/saqc/core/config.py
@@ -6,13 +6,6 @@ class Fields:
     VARNAME = "varname"
     START = "start_date"
     END = "end_date"
-    TESTS = "test*"
+    TEST = "test"
     PLOT = "plot"
     LINENUMBER = "line"
-
-
-class Params:
-    FLAG_GENERIC = "flagGeneric"
-    PROC_GENERIC = "procGeneric"
-    GENERIC_ARGS = "func_arguments"
-    FUNC = "func"
diff --git a/saqc/core/core.py b/saqc/core/core.py
index ff54e84c8b3a4a5ec80bbbc7cc58204562c0890e..13b7a7087c29012ef486d3a62c1897006570727f 100644
--- a/saqc/core/core.py
+++ b/saqc/core/core.py
@@ -1,172 +1,412 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
+
+"""
+TODOS:
+  - integrate plotting into the api
+  - `data` and `flagger` as arguments to `getResult`
+"""
+
 import logging
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Tuple, Callable, Sequence
+from dataclasses import dataclass, replace
 
-import numpy as np
 import pandas as pd
+import dios
+import numpy as np
+import timeit
+import inspect
 
-from saqc.core.reader import readConfig, checkConfig
-from saqc.core.config import Fields
-from saqc.core.evaluator import evalExpression
 from saqc.lib.plotting import plotHook, plotAllHook
-from saqc.lib.tools import combineDataFrames
 from saqc.flagger import BaseFlagger, CategoricalFlagger, SimpleFlagger, DmpFlagger
+from saqc.core.register import FUNC_MAP
+from saqc.funcs.proc_functions import proc_copy
 
 
 logger = logging.getLogger("SaQC")
 
 
-def _collectVariables(meta, data):
-    """
-    find every relevant variable
-    """
-    # NOTE: get to know every variable from meta
-    variables = list(data.columns)
-    for idx, configrow in meta.iterrows():
-        varname = configrow[Fields.VARNAME]
-        # assign = configrow[Fields.ASSIGN]
-        if varname in variables:
-            continue
-        # if (varname in data):  # or (varname not in variables and assign is True):
-        variables.append(varname)
-    return variables
+@dataclass
+class FuncCtrl:
+    "ctrl_kws"
+    masking: str          # one of: "none", "field", "all"
+    plot: bool
+    lineno: Optional[int] = None
+    expr: Optional[str] = None
+    inplace: bool = False
+    to_mask: Any = None   # flagger.FLAG constants or a list of those
+
+
+@dataclass
+class Func:
+    name: str
+    func: Callable[[pd.DataFrame, str, BaseFlagger, Any], Tuple[pd.DataFrame, BaseFlagger]]
+    field: str
+    kwargs: Dict[str, Any]
+    ctrl: FuncCtrl
+    regex: bool = False
+    target: Optional[str] = None
+    args: Tuple[Any] = tuple()
+
+
+def _handleErrors(exc, func, policy):
+    msg = f"Execution failed. Variable: '{func.field}', "
+    if func.ctrl.lineno is not None and func.ctrl.expr is not None:
+        msg += f"Config line {func.ctrl.lineno}: '{func.ctrl.expr}', "
+    else:
+        msg += f"Function: {func.name}(), parameters: '{func.kwargs}', "
+    msg += f"Exception:\n{type(exc).__name__}: {exc}"
+
+    if policy == "ignore":
+        logger.debug(msg)
+    elif policy == "warn":
+        logger.warning(msg)
+    else:
+        logger.error(msg)
+        raise exc
+
+
+def _prepInput(flagger, data, flags):
+    dios_like = (dios.DictOfSeries, pd.DataFrame)
 
+    if isinstance(data, pd.Series):
+        data = data.to_frame()
 
-def _checkInput(data, flags, flagger):
-    if not isinstance(data, pd.DataFrame):
-        raise TypeError("data must be of type pd.DataFrame")
+    if not isinstance(data, dios_like):
+        raise TypeError("'data' must be of type pd.Series, pd.DataFrame or dios.DictOfSeries")
 
-    if isinstance(data.index, pd.MultiIndex):
-        raise TypeError("the index of data is not allowed to be a multiindex")
+    if isinstance(data, pd.DataFrame):
+        if isinstance(data.index, pd.MultiIndex) or isinstance(data.columns, pd.MultiIndex):
+            raise TypeError("'data' should not use MultiIndex")
+        data = dios.to_dios(data)
 
-    if isinstance(data.columns, pd.MultiIndex):
-        raise TypeError("the columns of data is not allowed to be a multiindex")
+    if not hasattr(data.columns, "str"):
+        raise TypeError("expected dataframe columns of type string")
 
     if not isinstance(flagger, BaseFlagger):
+        # NOTE: we should generate that list automatically,
+        #       it won't ever be complete otherwise
         flaggerlist = [CategoricalFlagger, SimpleFlagger, DmpFlagger]
-        raise TypeError(f"flagger must be of type {flaggerlist} or any inherit class from {BaseFlagger}")
+        raise TypeError(f"'flagger' must be of type {flaggerlist} or a subclass of {BaseFlagger}")
 
-    if flags is None:
-        return
-
-    if not isinstance(flags, pd.DataFrame):
-        raise TypeError("flags must be of type pd.DataFrame")
+    if flags is not None:
+        if not isinstance(flags, dios_like):
+            raise TypeError("'flags' must be of type dios.DictOfSeries or pd.DataFrame")
 
-    if isinstance(data.index, pd.MultiIndex):
-        raise TypeError("the index of data is not allowed to be a multiindex")
+        if isinstance(flags, pd.DataFrame):
+            if isinstance(flags.index, pd.MultiIndex) or isinstance(flags.columns, pd.MultiIndex):
+                raise TypeError("'flags' should not use MultiIndex")
+            flags = dios.to_dios(flags)
 
-    if len(data) != len(flags):
-        raise ValueError("the index of flags and data has not the same length")
+        # NOTE: do not test all columns as they not necessarily need to be the same
+        cols = flags.columns & data.columns
+        if not (flags[cols].lengths == data[cols].lengths).all():
+            raise ValueError("the length of 'flags' and 'data' need to be equal")
 
-    # NOTE: do not test columns as they not necessarily must be the same
+    if flagger.initialized:
+        diff = data.columns.difference(flagger.getFlags().columns)
+        if not diff.empty:
+            raise ValueError("Missing columns in 'flagger': '{list(diff)}'")
 
+    return data, flags
 
-def _handleErrors(exc, configrow, test, policy):
-    line = configrow[Fields.LINENUMBER]
-    msg = f"config, line {line}, test: '{test}' failed with:\n{type(exc).__name__}: {exc}"
-    if policy == "ignore":
-        logger.debug(msg)
-    elif policy == "warn":
-        logger.warning(msg)
-    else:
-        raise Exception(msg)
 
+def _setup():
+    # NOTE:
+    # the import is needed to trigger the registration
+    # of the built-in (test-)functions
+    import saqc.funcs
 
-def _setup(loglevel):
+    # warnings
     pd.set_option("mode.chained_assignment", "warn")
     np.seterr(invalid="ignore")
 
-    # logging setting
-    logger.setLevel(loglevel)
-    handler = logging.StreamHandler()
-    formatter = logging.Formatter("[%(asctime)s][%(name)s][%(levelname)s]: %(message)s")
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-
-
-def run(
-    config_file: str,
-    flagger: BaseFlagger,
-    data: pd.DataFrame,
-    flags: pd.DataFrame = None,
-    nodata: float = np.nan,
-    log_level: str = "INFO",
-    error_policy: str = "raise",
-) -> (pd.DataFrame, BaseFlagger):
-
-    _setup(log_level)
-    _checkInput(data, flags, flagger)
-    config = readConfig(config_file, data)
-
-    # split config into the test and some 'meta' data
-    tests = config.filter(regex=Fields.TESTS)
-    meta = config[config.columns.difference(tests.columns)]
-
-    # prepapre the flags
-    flag_cols = _collectVariables(meta, data)
-    flagger = flagger.initFlags(data=pd.DataFrame(index=data.index, columns=flag_cols))
-    if flags is not None:
-        flagger = flagger.setFlagger(flagger.initFlags(flags=flags))
+
+_setup()
+
+
+class SaQC:
+    def __init__(self, flagger, data, flags=None, nodata=np.nan, to_mask=None, error_policy="raise"):
+        data, flags = _prepInput(flagger, data, flags)
+        self._data = data
+        self._nodata = nodata
+        self._to_mask = to_mask
+        self._flagger = self._initFlagger(data, flagger, flags)
+        self._error_policy = error_policy
+        # NOTE: will be filled by calls to `_wrap`
+        self._to_call: List[Func] = []  # todo fix the access everywhere
+
+    def _initFlagger(self, data, flagger, flags):
+        """ Init the internal flagger object.
+
+        Ensures that all data columns are present and user passed flags from
+        a flags frame and/or an already initialised flagger are used.
+        If columns overlap the passed flagger object is prioritised.
+        """
+        # ensure all data columns
+        merged = flagger.initFlags(data)
+        if flags is not None:
+            merged = merged.merge(flagger.initFlags(flags=flags), inplace=True)
+        if flagger.initialized:
+            merged = merged.merge(flagger, inplace=True)
+        return merged
+
+    def readConfig(self, fname):
+        from saqc.core.reader import readConfig
+        out = deepcopy(self)
+        out._to_call.extend(readConfig(fname, self._flagger))
+        return out
+
+    def _expandFields(self, func, variables) -> Sequence[Func]:
+        if not func.regex:
+            return [func]
+
+        out = []
+        for field in variables[variables.str.match(func.field)]:
+            out.append(replace(func, field=field))
+        return out
+
+    def evaluate(self):
+        """
+        Realize all the registered calculations and return a updated SaQC Object
+
+        Paramters
+        ---------
+
+        Returns
+        -------
+        An updated SaQC Object incorporating the requested computations
+        """
+
+        # NOTE: It would be nicer to separate the plotting into an own
+        #       method instead of intermingling it with the computation
+        data, flagger = self._data, self._flagger
+
+        for func in self._to_call:
+            for func in self._expandFields(func, data.columns.union(flagger._flags.columns)):
+                logger.debug(f"processing: {func.field}, {func.name}, {func.kwargs}")
+
+                try:
+                    t0 = timeit.default_timer()
+                    data_result, flagger_result = _saqcCallFunc(func, data, flagger)
+
+                except Exception as e:
+                    t1 = timeit.default_timer()
+                    logger.debug(f"{func.name} failed after {t1 - t0} sec")
+                    _handleErrors(e, func, self._error_policy)
+                    continue
+                else:
+                    t1 = timeit.default_timer()
+                    logger.debug(f"{func.name} finished after {t1 - t0} sec")
+
+                if func.ctrl.plot:
+                    plotHook(
+                        data_old=data,
+                        data_new=data_result,
+                        flagger_old=flagger,
+                        flagger_new=flagger_result,
+                        sources=[],
+                        targets=[func.field],
+                        plot_name=func.name,
+                    )
+
+                data = data_result
+                flagger = flagger_result
+
+        if any([fdump.ctrl.plot for fdump in self._to_call]):
+            plotAllHook(data, flagger)
+
+        # This is much faster for big datasets that to throw everything in the constructor.
+        # Simply because of _initFlagger -> merge() -> mergeDios() over all columns.
+        new = SaQC(SimpleFlagger(), dios.DictOfSeries(), nodata=self._nodata, error_policy=self._error_policy)
+        new._flagger, new._data = flagger, data
+        return new
+
+    def getResult(self, raw=False):
+        """
+        Realized the registered calculations and return the results
+
+        Returns
+        -------
+        data, flagger: (DictOfSeries, DictOfSeries)
+        """
+
+        realization = self.evaluate()
+        data, flagger = realization._data, realization._flagger
+        if raw is False:
+            return data.to_df(), flagger.toFrame()
+        return data, flagger
+
+    def _wrap(self, func_name):
+        def inner(field: str, *args, target: str=None, regex: bool = False, to_mask=None, plot=False, inplace=False, **kwargs):
+
+            kwargs.setdefault('nodata', self._nodata)
+
+            func = FUNC_MAP[func_name]["func"]
+
+            ctrl_kws = FuncCtrl(
+                masking=FUNC_MAP[func_name]["masking"],
+                to_mask=to_mask or self._to_mask,
+                plot=plot,
+                inplace=inplace,
+                )
+
+            func_dump = Func(
+                name=func_name,
+                func=func,
+                field=field,
+                target=target if target is not None else field,
+                regex=regex,
+                args=args,
+                kwargs=kwargs,
+                ctrl=ctrl_kws,
+            )
+
+            out = self if inplace else self.copy()
+            out._to_call.append(func_dump)
+
+            return out
+
+        return inner
+
+    def __getattr__(self, key):
+        """
+        All failing attribute accesses are redirected to
+        __getattr__. We use this mechanism to make the
+        registered functions as `SaQC`-methods without
+        actually implementing them.
+        """
+        if key not in FUNC_MAP:
+            raise AttributeError(f"no such attribute: '{key}'")
+        return self._wrap(key)
+
+    def copy(self):
+        return deepcopy(self)
+
+
+def _saqcCallFunc(func_dump, data, flagger):
 
     # NOTE:
-    # this checks comes late, but the compilation of
-    # user-test needs fully prepared flags
-    checkConfig(config, data, flagger, nodata)
+    # We assure that all columns in data have an equivalent column in flags,
+    # we might have more flagger columns though
+    assert data.columns.difference(flagger.getFlags().columns).empty
+
+    field = func_dump.field
+    target = func_dump.target
+    to_mask = func_dump.ctrl.to_mask
+    masking = func_dump.ctrl.masking
+
+    if (target != field) and (func_dump.regex is False):
+        data, flagger = proc_copy(data, field, flagger, target)
+        field = target
+
+    if masking == 'all':
+        columns = data.columns
+    elif masking == 'none':
+        columns = []
+    elif masking == 'field':
+        columns = [field]
+    else:
+        raise ValueError(f"wrong use of `register(masking={masking})`")
+
+    # warn if the user explicitly pass `to_mask=..` to a function that is
+    # decorated by `register(masking='none')`, and so `to_mask` is ignored.
+    if masking == 'none' and to_mask not in (None, []):
+        logging.warning("`to_mask` is given, but the test ignore masking. Please refer to the documentation: TODO")
+    to_mask = flagger.BAD if to_mask is None else to_mask
+
+    data_in, mask = _maskData(data, flagger, columns, to_mask)
+    data_result, flagger_result = func_dump.func(
+        data_in, field, flagger,
+        *func_dump.args, func_name=func_dump.name, **func_dump.kwargs)
+    data_result = _unmaskData(data, mask, data_result, flagger_result, to_mask)
+
+    # we check the passed function-kwargs after the actual call, because now "hard" errors would already have been
+    # raised (Eg. `TypeError: got multiple values for argument 'data'`, when the user pass data=...)
+    _warnForUnusedKwargs(func_dump, flagger)
+
+    return data_result, flagger_result
+
+
+def _maskData(data, flagger, columns, to_mask):
+    # TODO: this is heavily undertested
+    mask = flagger.isFlagged(field=columns, flag=to_mask, comparator='==')
+    data = data.copy()
+    for c in columns:
+        col_mask = mask[c].values
+        if np.any(col_mask):
+            col_data = data[c].values.astype(np.float64)
+            col_data[col_mask] = np.nan
+            data[c] = col_data
+    return data, mask
+
+
+def _unmaskData(data_old, mask_old, data_new, flagger_new, to_mask):
+    # TODO: this is heavily undertested
 
     # NOTE:
-    # the outer loop runs over the flag tests, the inner one over the
-    # variables. Switching the loop order would complicate the
-    # reference to flags from other variables within the dataset
-    for _, testcol in tests.iteritems():
-
-        # NOTE: just an optimization
-        if testcol.dropna().empty:
-            continue
-
-        for idx, configrow in meta.iterrows():
-
-            # store config params in some handy variables
-            varname = configrow[Fields.VARNAME]
-            start_date = configrow[Fields.START]
-            end_date = configrow[Fields.END]
-
-            func = testcol[idx]
-            if pd.isnull(func):
-                continue
-
-            if varname not in data and varname not in flagger.getFlags():
-                continue
-
-            # NOTE:
-            # time slicing support is currently disabled
-            # prepare the data for the tests
-            # data_chunk = data.loc[start_date:end_date]
-            data_chunk = data
-            if data_chunk.empty:
-                continue
-            flagger_chunk = flagger.getFlagger(loc=data_chunk.index)
-
-            try:
-                # actually run the tests
-                data_chunk_result, flagger_chunk_result = evalExpression(
-                    func, data=data_chunk, field=varname, flagger=flagger_chunk, nodata=nodata,
-                )
-            except Exception as e:
-                _handleErrors(e, configrow, func, error_policy)
-                continue
+    # we only need to respect columns, that were masked,
+    # and are also still present in new data.
+    # this throws out:
+    #  - any newly assigned columns
+    #  - columns that were excluded from masking
+    columns = mask_old.dropempty().columns.intersection(data_new.dropempty().columns)
+    mask_new = flagger_new.isFlagged(field=columns, flag=to_mask, comparator="==")
+
+    for col in columns:
+        was_masked = mask_old[col]
+        is_masked = mask_new[col]
+
+        # if index changed we just go with the new data.
+        # A test should use `register(masking='none')` if it changes
+        # the index but, does not want to have all NaNs on flagged locations.
+        if was_masked.index.equals(is_masked.index):
+            mask = was_masked.values & is_masked.values & data_new[col].isna().values
+
+            # reapplying old values on masked positions
+            if np.any(mask):
+                data = np.where(mask, data_old[col].values, data_new[col].values)
+                data_new[col] = pd.Series(data=data, index=is_masked.index)
+
+    return data_new
+
+
+def _warnForUnusedKwargs(func_dump, flagger):
+    """ Warn for unused kwargs, passed to a SaQC.function.
+
+    Parameters
+    ----------
+    func_dump: dict
+        Saqc internal data structure that hold all function info.
+    flagger: saqc.flagger.BaseFlagger
+        Flagger object.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    A single warning via the logging module is thrown, if any number of
+    missing kws are detected, naming each missing kw.
+    """
+    sig_kws = inspect.signature(func_dump.func).parameters
 
-            if configrow[Fields.PLOT]:
-                plotHook(
-                    data_chunk_result, flagger_chunk, flagger_chunk_result, varname, func,
-                )
+    # we need to ignore kwargs that are injected or
+    # used to control the flagger
+    ignore = flagger.signature + ('nodata',)
+
+    missing = []
+    for kw in func_dump.kwargs:
+        # there is no need to check for
+        # `kw in [KEYWORD_ONLY, VAR_KEYWORD or POSITIONAL_OR_KEYWORD]`
+        # because this would have raised an error beforehand.
+        if kw not in sig_kws and kw not in ignore:
+            missing.append(kw)
 
-            # NOTE:
-            # time slicing support is currently disabled
-            # flagger = flagger.setFlagger(flagger_chunk_result)
-            # data = combineDataFrames(data, data_chunk_result)
-            flagger = flagger_chunk_result
-            data = data_chunk_result
+    if missing:
+        missing = ', '.join(missing)
+        logging.warning(f"Unused argument(s): {missing}")
 
-    plotAllHook(data, flagger)
 
-    return data, flagger
diff --git a/saqc/core/evaluator/__init__.py b/saqc/core/evaluator/__init__.py
deleted file mode 100644
index 9376d550d128f9783216d32ed27f0c74617f6196..0000000000000000000000000000000000000000
--- a/saqc/core/evaluator/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from saqc.core.evaluator.evaluator import (
-    compileExpression,
-    evalExpression,
-    compileTree,
-    parseExpression,
-    initLocalEnv,
-    evalCode,
-)
-
-from saqc.core.evaluator.checker import DslChecker, ConfigChecker
-
-from saqc.core.evaluator.transformer import DslTransformer, ConfigTransformer
diff --git a/saqc/core/evaluator/checker.py b/saqc/core/evaluator/checker.py
deleted file mode 100644
index 3e116a920a300f84a60fea807ddbe29c7eb1abb2..0000000000000000000000000000000000000000
--- a/saqc/core/evaluator/checker.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import ast
-
-from saqc.funcs.register import FUNC_MAP
-from saqc.core.config import Params
-
-
-class DslChecker(ast.NodeVisitor):
-
-    SUPPORTED = (
-        ast.Expression,
-        ast.UnaryOp,
-        ast.BinOp,
-        ast.BitOr,
-        ast.BitAnd,
-        ast.Num,
-        ast.Compare,
-        ast.Add,
-        ast.Sub,
-        ast.Mult,
-        ast.Div,
-        ast.Pow,
-        ast.Mod,
-        ast.USub,
-        ast.Eq,
-        ast.NotEq,
-        ast.Gt,
-        ast.Lt,
-        ast.GtE,
-        ast.LtE,
-        ast.Invert,
-        ast.Name,
-        ast.Load,
-        ast.Call,
-    )
-
-    def __init__(self, environment):
-        self.environment = environment
-
-    def visit_Call(self, node):
-        func_name = node.func.id
-        if func_name not in self.environment:
-            raise NameError(f"unspported function: '{func_name}'")
-        self.generic_visit(node)
-
-    def visit_Name(self, node):
-        name = node.id
-        if name not in self.environment and name not in self.environment["variables"]:
-            raise NameError(f"unknown variable: '{name}'")
-        self.generic_visit(node)
-
-    def generic_visit(self, node):
-        if not isinstance(node, self.SUPPORTED):
-            raise TypeError(f"invalid expression: '{node}'")
-        return super().generic_visit(node)
-
-
-class ConfigChecker(ast.NodeVisitor):
-
-    SUPPORTED_NODES = (
-        ast.Call,
-        ast.Num,
-        ast.Str,
-        ast.keyword,
-        ast.NameConstant,
-        ast.UnaryOp,
-        ast.Name,
-        ast.Load,
-        ast.Expression,
-        ast.Subscript,
-        ast.Index,
-        ast.USub,
-    )
-
-    SUPPORTED_ARGUMENTS = (
-        ast.Str,
-        ast.Num,
-        ast.NameConstant,
-        ast.Call,
-        ast.UnaryOp,
-        ast.USub,
-        ast.Name,
-    )
-
-    def __init__(self, environment, pass_parameter):
-        self.pass_parameter = pass_parameter
-        self.environment = environment
-        self.func_name = None
-
-    def visit_Call(self, node):
-
-        func_name = node.func.id
-        if func_name not in FUNC_MAP:
-            raise NameError(f"unknown test function: '{func_name}'")
-        if node.args:
-            raise TypeError("only keyword arguments are supported")
-        self.func_name = func_name
-        return self.generic_visit(node)
-
-    def visit_keyword(self, node):
-        key, value = node.arg, node.value
-        if self.func_name in (Params.FLAG_GENERIC, Params.PROC_GENERIC) and key == Params.FUNC:
-            DslChecker(self.environment).visit(value)
-            return
-
-        if key not in FUNC_MAP[self.func_name].signature + self.pass_parameter:
-            raise TypeError(f"unknown function parameter '{node.arg}'")
-
-        if not isinstance(value, self.SUPPORTED_ARGUMENTS):
-            raise TypeError(f"invalid argument type '{type(value)}'")
-
-        if isinstance(value, ast.Name) and value.id not in self.environment:
-            raise NameError(f"unknown variable: {value.id}")
-
-        return self.generic_visit(node)
-
-    def generic_visit(self, node):
-        if not isinstance(node, self.SUPPORTED_NODES):
-            raise TypeError(f"invalid node: '{node}'")
-        return super().generic_visit(node)
diff --git a/saqc/core/evaluator/evaluator.py b/saqc/core/evaluator/evaluator.py
deleted file mode 100644
index 550b655d0beaf8760deaab3a98f19d896b5fefb7..0000000000000000000000000000000000000000
--- a/saqc/core/evaluator/evaluator.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import ast
-import logging
-
-from functools import partial
-from typing import Any, Dict
-
-import astor
-import numpy as np
-import pandas as pd
-
-from saqc.flagger.baseflagger import BaseFlagger
-from saqc.core.config import Params
-from saqc.funcs.register import FUNC_MAP
-from saqc.core.evaluator.checker import ConfigChecker
-from saqc.core.evaluator.transformer import ConfigTransformer
-
-
-logger = logging.getLogger("SaQC")
-
-
-def _dslIsFlagged(flagger, field, flag=None, comparator=None):
-    if comparator is None:
-        return flagger.isFlagged(field, flag=flag)
-    return flagger.isFlagged(field, flag=flag, comparator=comparator)
-
-
-def initLocalEnv(data: pd.DataFrame, field: str, flagger: BaseFlagger, nodata: float) -> Dict[str, Any]:
-
-    return {
-        # general
-        "data": data,
-        "field": field,
-        "flagger": flagger,
-        "this": field,
-        # transformation only
-        "variables": set(flagger.getFlags().columns.tolist()),
-        "nolookup": set(["isflagged"]),  # no variable lookup for flagger based functions,
-        # missing values/data
-        "NAN": np.nan,
-        "NODATA": nodata,
-        # flags
-        "GOOD": flagger.GOOD,
-        "BAD": flagger.BAD,
-        "UNFLAGGED": flagger.UNFLAGGED,
-        # special functions
-        "ismissing": lambda data: ((data == nodata) | pd.isnull(data)),
-        "isflagged": partial(_dslIsFlagged, flagger),
-        # math
-        "abs": np.abs,
-        "exp": np.exp,
-        "log": np.log,
-        "sqrt": np.sqrt,
-        "sin": np.sin,
-        "cos": np.cos,
-        "tan": np.tan,
-        "max": np.nanmax,
-        "min": np.nanmin,
-        "mean": np.nanmean,
-        "sum": np.nansum,
-        "std": np.nanstd,
-        "len": lambda data: np.array(len(data)),
-    }
-
-
-def parseExpression(expr: str) -> ast.AST:
-    tree = ast.parse(expr, mode="eval")
-    return tree
-
-
-def compileTree(tree: ast.Expression):
-    return compile(ast.fix_missing_locations(tree), "<ast>", mode="eval")
-
-
-def evalCode(code, global_env=None, local_env=None):
-    return eval(code, global_env or {}, local_env or {})
-
-
-def compileExpression(expr, data, field, flagger, nodata=np.nan):
-    local_env = initLocalEnv(data, field, flagger, nodata)
-    tree = parseExpression(expr)
-    ConfigChecker(local_env, flagger.signature).visit(tree)
-    transformed_tree = ConfigTransformer(local_env).visit(tree)
-    src = astor.to_source(transformed_tree).strip()
-    logger.debug(f"calling transformed function:\n{src}")
-    return local_env, compileTree(transformed_tree)
-
-
-def evalExpression(expr, data, field, flagger, nodata=np.nan):
-    # mask the already flagged value to make all the functions
-    # called on the way through the evaluator ignore flagged values
-    mask = flagger.isFlagged()
-    data_in = data.copy()
-    data_in[mask] = np.nan
-    local_env, code = compileExpression(expr, data_in, field, flagger, nodata)
-    data_result, flagger_result = evalCode(code, FUNC_MAP, local_env)
-    # reinject the original values, as we don't want to loose them
-    data_result[mask] = data[mask]
-    return data_result, flagger_result
diff --git a/saqc/core/evaluator/transformer.py b/saqc/core/evaluator/transformer.py
deleted file mode 100644
index a304a010fc05c3e26a4c6110bf1caf1b4d85f8ee..0000000000000000000000000000000000000000
--- a/saqc/core/evaluator/transformer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import ast
-
-from typing import Dict, Any
-from contextlib import contextmanager
-
-from saqc.core.config import Params
-
-
-class DslTransformer(ast.NodeTransformer):
-    def __init__(self, environment: Dict[str, Any]):
-        self.environment = environment
-
-    def visit_Call(self, node):
-        new_args = node.args
-        for a in new_args:
-            a.lookup = node.func.id not in self.environment["nolookup"]
-
-        node = ast.Call(func=node.func, args=new_args, keywords=[])
-        return self.generic_visit(node)
-
-    def visit_Name(self, node):
-
-        # NOTE:
-        #
-        # There are different categories of name nodes:
-        #
-        # 1. Names that need a lookup in the global/local eval
-        #    environment (e.g. function names, dsl constants, ...)
-        #    -> nodes need to leave visit_Name unaltered
-        # 2. Names that need a lookup in the 'data' DataFrame
-        #    -> nodes need to be rewritten int ast.Subscript
-        # 3. Names that should be treated as constants and be passed to
-        #    functions requiring a 'field' parameter (e.g. 'isflagged')
-        #    -> nodes need to be rewritten to ast.Constant/ast.Str
-        #
-        # TODO:
-        #
-        # The differentiation between these categories is done based
-        # on the two variables out of 'self.environment', namely
-        # 'nolookup' and 'variables' in two different methods
-        # ('vsisit_Call' and 'visit_Name'). This continues to feel hacky
-        # and I really like to see a cleaner solution for that problem
-
-        name = node.id
-
-        if name == "this":
-            name = self.environment["this"]
-
-        if name in self.environment["variables"]:
-            # determine further tree-transformation path by target
-            if getattr(node, "lookup", True):
-                value = ast.Constant(value=name)
-                node = ast.Subscript(
-                    value=ast.Name(id="data", ctx=ast.Load()), slice=ast.Index(value=value), ctx=ast.Load(),
-                )
-            else:
-                node = ast.Constant(value=name)
-
-        return node
-
-
-class ConfigTransformer(ast.NodeTransformer):
-    def __init__(self, environment):
-        self.environment = environment
-        self.func_name = None
-
-    def visit_Call(self, node):
-        self.func_name = node.func.id
-
-        new_args = [
-            ast.Name(id="data", ctx=ast.Load()),
-            ast.Name(id="field", ctx=ast.Load()),
-            ast.Name(id="flagger", ctx=ast.Load()),
-        ]
-        node = ast.Call(func=node.func, args=new_args + node.args, keywords=node.keywords)
-
-        return self.generic_visit(node)
-
-    def visit_keyword(self, node):
-        key, value = node.arg, node.value
-
-        if self.func_name in (Params.FLAG_GENERIC, Params.PROC_GENERIC) and key == Params.FUNC:
-            dsl_transformer = DslTransformer(self.environment)
-            value = dsl_transformer.visit(value)
-            return ast.keyword(arg=key, value=value)
-
-        return self.generic_visit(node)
diff --git a/saqc/core/reader.py b/saqc/core/reader.py
index ebbd8da19c9d3b71e2a7b099e9491cc86d8afc09..9e07640f23a1a225ab558dc5f01e1dee095aec86 100644
--- a/saqc/core/reader.py
+++ b/saqc/core/reader.py
@@ -1,145 +1,111 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import ast
 
-import re
-import logging
-from csv import reader
-from typing import Dict, List, Any, Union, Iterable, Iterator, Tuple
-from contextlib import contextmanager
-from io import StringIO, TextIOWrapper
+import numpy as np
 
 import pandas as pd
 
 from saqc.core.config import Fields as F
-from saqc.core.evaluator import compileExpression
-from saqc.flagger import BaseFlagger
-
-
-logger = logging.getLogger("SaQC")
-
-
-# typing declarations
-Config = Iterable[Dict[str, Any]]
-Filename = Union[StringIO, str]
-
-
-CONFIG_TYPES = {
-    F.VARNAME: str,
-    F.START: pd.to_datetime,
-    F.END: pd.to_datetime,
-    F.TESTS: str,
-    F.PLOT: lambda v: str(v).lower() == "true",
-    F.LINENUMBER: int,
-}
-
-
-def _raise(config_row, exc, msg, field=None):
-    line_number = config_row[F.LINENUMBER]
-    base_msg = f"configuration error in line {line_number}"
-    if field:
-        base_msg += f", column '{field}'"
-    msg = base_msg + ":\n" + msg
-    raise exc(msg)
-
-
-@contextmanager
-def _open(fname: Filename) -> Union[StringIO, TextIOWrapper]:
-    if isinstance(fname, StringIO):
-        yield fname
-    else:
-        f = open(fname)
-        yield f
-        f.close()
-
-
-def _matchKey(keys: Iterable[str], fuzzy_key: str) -> str:
-    for key in keys:
-        if re.match(fuzzy_key, key):
-            return key
-
-
-def _castRow(row: Dict[str, Any]):
-    out = {}
-    for row_key, row_value in row.items():
-        for fuzzy_key, func in CONFIG_TYPES.items():
-            if re.match(fuzzy_key, row_key):
-                try:
-                    out[row_key] = func(row_value)
-                except ValueError:
-                    _raise(row, ValueError, f"invalid value: '{row_value}'")
-    return out
-
-
-def _expandVarnameWildcards(config: Config, data: pd.DataFrame) -> Config:
-    def isQuoted(string):
-        return bool(re.search(r"'.*'|\".*\"", string))
-
-    new = []
-    for row in config:
-        varname = row[F.VARNAME]
-        if varname and isQuoted(varname):
-            pattern = varname[1:-1]
-            expansion = data.columns[data.columns.str.match(pattern)]
-            if not len(expansion):
-                logger.warning(f"no match for regular expression '{pattern}'")
-            for var in expansion:
-                new.append({**row, F.VARNAME: var})
-        else:
-            new.append(row)
-    return new
-
-
-def _clearRows(rows: Iterable[List[str]], comment: str = "#") -> Iterator[Tuple[str, List[Any]]]:
-    for i, row in enumerate(rows):
-        row = [c.strip() for c in row]
-        if any(row) and not row[0].lstrip().startswith(comment):
-            row = [c.split(comment)[0].strip() for c in row]
-            yield i, row
-
-
-def readConfig(fname: Filename, data: pd.DataFrame, sep: str = ";", comment: str = "#") -> pd.DataFrame:
-    defaults = {F.VARNAME: "", F.START: data.index.min(), F.END: data.index.max(), F.PLOT: False}
-
-    with _open(fname) as f:
-        rdr = reader(f, delimiter=";")
-
-        rows = _clearRows(rdr)
-        _, header = next(rows)
-
-        config = []
-        for n, row in rows:
-            row = dict(zip(header, row))
-            row = _castRow({**defaults, **row, F.LINENUMBER: n + 1})
-            config.append(row)
-
-    expanded = _expandVarnameWildcards(config, data)
-    return pd.DataFrame(expanded)
-
-
-def checkConfig(config_df: pd.DataFrame, data: pd.DataFrame, flagger: BaseFlagger, nodata: float) -> pd.DataFrame:
-
-    for _, config_row in config_df.iterrows():
-
-        var_name = config_row[F.VARNAME]
-        if pd.isnull(config_row[F.VARNAME]) or not var_name:
-            _raise(
-                config_row, SyntaxError, f"non-optional column '{F.VARNAME}' is missing or empty",
+from saqc.core.visitor import ConfigFunctionParser
+from saqc.core.core import Func, FuncCtrl
+from saqc.core.register import FUNC_MAP
+
+from saqc.lib.tools import isQuoted
+
+COMMENT = "#"
+EMPTY = "None"
+
+
+def _handleEmptyLines(df):
+    if F.VARNAME not in df.columns:
+        # at least the first line was empty, so we search the header
+        df = df.reset_index()
+        i = (df == F.VARNAME).first_valid_index()
+        df.columns = df.iloc[i]
+        df = df.iloc[i + 1 :]
+
+    # mark empty lines
+    mask = (df.isnull() | (df == "")).all(axis=1)
+    df.loc[mask] = EMPTY
+    return df
+
+
+def _handleComments(df):
+    # mark commented lines
+    df.loc[df[F.VARNAME].str.startswith(COMMENT)] = EMPTY
+
+    for col in df:
+        try:
+            df[col] = df[col].str.split(COMMENT, expand=True).iloc[:, 0].str.strip()
+        except AttributeError:
+            # NOTE:
+            # if `df[col]` is not of type string, we know, that
+            # there are no comments and the `.str` access fails
+            pass
+
+    return df
+
+
+def _injectOptionalColumns(df):
+    # inject optional columns
+    if F.PLOT not in df:
+        empty = (df == EMPTY).all(axis=1)
+        df[F.PLOT] = "False"
+        df[empty] = EMPTY
+    return df
+
+
+def _parseConfig(df, flagger):
+
+    funcs = []
+    for lineno, (_, target, expr, plot) in enumerate(df.itertuples()):
+        if target == "None" or pd.isnull(target) or pd.isnull(expr):
+            continue
+
+        regex = False
+        if isQuoted(target):
+            regex = True
+            target = target[1:-1]
+
+        tree = ast.parse(expr, mode="eval")
+        func_name, kwargs = ConfigFunctionParser(flagger).parse(tree.body)
+        f = Func(
+            field=kwargs.get("field", target),
+            target=target,
+            name=func_name,
+            func=FUNC_MAP[func_name]["func"],
+            kwargs=kwargs,
+            regex=regex,
+            ctrl=FuncCtrl(
+                masking=FUNC_MAP[func_name]["masking"],
+                plot=plot,
+                lineno=lineno+2,
+                expr=expr
             )
-
-        test_fields = config_row.filter(regex=F.TESTS).dropna()
-        if test_fields.empty:
-            _raise(
-                config_row, SyntaxError, f"at least one test needs to be given for variable",
-            )
-
-        for col, expr in test_fields.iteritems():
-            if not expr:
-                _raise(config_row, SyntaxError, f"field '{col}' may not be empty")
-            try:
-                compileExpression(expr, data, var_name, flagger, nodata)
-            except (TypeError, NameError, SyntaxError) as exc:
-                _raise(
-                    config_row, type(exc), exc.args[0] + f" (failing statement: '{expr}')", col,
-                )
-    return config_df
+        )
+        funcs.append(f)
+    return funcs
+
+
+def readConfig(fname, flagger):
+    df = pd.read_csv(
+        fname,
+        sep=r"\s*;\s*",
+        engine="python",
+        dtype=str,
+        quoting=3,
+        keep_default_na=False,  # don't replace "" by nan
+        skip_blank_lines=False,
+    )
+
+    df = _handleEmptyLines(df)
+    df = _injectOptionalColumns(df)
+    df = _handleComments(df)
+
+    df[F.VARNAME] = df[F.VARNAME].replace(r"^\s*$", np.nan, regex=True)
+    df[F.TEST] = df[F.TEST].replace(r"^\s*$", np.nan, regex=True)
+    df[F.PLOT] = df[F.PLOT].replace({"False": "", EMPTY: "", np.nan: ""})
+    df = df.astype({F.PLOT: bool})
+    return _parseConfig(df, flagger)
diff --git a/saqc/core/register.py b/saqc/core/register.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c9802559897cb9b3c78ef6bc3318967349f345b
--- /dev/null
+++ b/saqc/core/register.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+from typing import Dict, Any
+
+# NOTE:
+# the global SaQC function store,
+# will be filled by calls to register
+FUNC_MAP: Dict[str, Any] = {}
+
+
+def register(masking='all'):
+    def inner(func):
+        FUNC_MAP[func.__name__] = {"func": func, "masking": masking}
+        return func
+    return inner
+
diff --git a/saqc/core/visitor.py b/saqc/core/visitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..79a285f7b0bd6afadde9a424cc678d734ed12fd7
--- /dev/null
+++ b/saqc/core/visitor.py
@@ -0,0 +1,217 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import ast
+
+import numpy as np
+import pandas as pd
+
+from saqc.core.register import FUNC_MAP
+import saqc.lib.ts_operators as ts_ops
+
+
+ENVIRONMENT = {
+    "NAN": np.nan,
+    "abs": np.abs,
+    "max": np.nanmax,
+    "min": np.nanmin,
+    "mean": np.nanmean,
+    "sum": np.nansum,
+    "std": np.nanstd,
+    "len": len,
+    "exp": np.exp,
+    "log": np.log,
+    "var": np.nanvar,
+    "median": np.nanmedian,
+    "first": ts_ops.first,
+    "last": ts_ops.last,
+    "count": ts_ops.count,
+    "deltaT": ts_ops.deltaT,
+    "id": ts_ops.identity,
+    "diff": ts_ops.difference,
+    "relDiff": ts_ops.relativeDifference,
+    "deriv": ts_ops.derivative,
+    "rateOfChange": ts_ops.rateOfChange,
+    "scale": ts_ops.scale,
+    "normScale": ts_ops.normScale,
+    "meanStandardize": ts_ops.standardizeByMean,
+    "medianStandardize": ts_ops.standardizeByMedian,
+    "zLog": ts_ops.zeroLog,
+}
+
+RESERVED = {"GOOD", "BAD", "UNFLAGGED", "NODATA"}
+
+
+class ConfigExpressionParser(ast.NodeVisitor):
+
+    """
+    Generic configuration functions will be rewritten as lambda functions
+    and variables that need a look up in `data` will act as arguments, e.g.:
+      `flagGeneric(func=(x != NODATA) & (y < 3))`
+      will be rewritten to
+      `lambda x, y: (x != NODATA) & (y < 3)`
+
+    The main purpose of this class is to identify all such lambda arguments
+    and check the given expression for accordance with the restrictions
+    imposed onto generic functions.
+    """
+
+    SUPPORTED = (
+        ast.Str,
+        ast.Expression,
+        ast.UnaryOp,
+        ast.BinOp,
+        ast.BitOr,
+        ast.BitAnd,
+        ast.Num,
+        ast.Compare,
+        ast.Add,
+        ast.Sub,
+        ast.Mult,
+        ast.Div,
+        ast.Pow,
+        ast.Mod,
+        ast.USub,
+        ast.Eq,
+        ast.NotEq,
+        ast.Gt,
+        ast.Lt,
+        ast.GtE,
+        ast.LtE,
+        ast.Invert,
+        ast.Name,
+        ast.Load,
+        ast.Call,
+    )
+
+    def __init__(self, node):
+        self._args = []
+        self.visit(node)
+        if not self._args:
+            # NOTE:
+            # we assume, that we are not dealing with an
+            # expression as we couldn't find any arguments
+            raise TypeError("not a valid expression")
+
+    @property
+    def args(self):
+        return tuple(dict.fromkeys(self._args))
+
+    def visit_Call(self, node):
+        # only non-keyword arguments allowed
+        # in generic functions
+        for n in node.args:
+            self.visit(n)
+
+    def visit_Name(self, node):
+        # NOTE:
+        # the assumption is, that anything not in
+        # ENVIRONMENT + RESERVED needs a lookup in `data`
+        name = node.id
+        if name not in ENVIRONMENT and name not in RESERVED:
+            self._args.append(name)
+        self.generic_visit(node)
+
+    def generic_visit(self, node):
+        if not isinstance(node, self.SUPPORTED):
+            raise TypeError(f"invalid expression: '{node}'")
+        return super().generic_visit(node)
+
+
+class ConfigFunctionParser(ast.NodeVisitor):
+
+    SUPPORTED_NODES = (
+        ast.Call,
+        ast.Num,
+        ast.Str,
+        ast.keyword,
+        ast.NameConstant,
+        ast.UnaryOp,
+        ast.Name,
+        ast.Load,
+        ast.Expression,
+        ast.Subscript,
+        ast.Index,
+        ast.USub,
+        ast.List,
+    )
+
+    def __init__(self, flagger):
+
+        self.kwargs = {}
+        self.environment = {
+            "GOOD": flagger.GOOD,
+            "BAD": flagger.BAD,
+            "UNFLAGGED": flagger.UNFLAGGED,
+            **ENVIRONMENT,
+        }
+
+    def parse(self, node):
+        func = self.visit_Call(node)
+        return func, self.kwargs
+
+    def visit_Call(self, node):
+        if not isinstance(node, ast.Call):
+            raise TypeError("expected function call")
+
+        if node.args:
+            raise TypeError("only keyword arguments are supported")
+
+        func_name = node.func.id
+        if func_name not in FUNC_MAP:
+            raise NameError(f"unknown function '{func_name}'")
+
+        self.generic_visit(node)
+        return func_name
+
+    def visit_keyword(self, node):
+
+        k, v = node.arg, node.value
+        check_tree = True
+
+        # NOTE: `node` is not a constant or a variable,
+        #       so it should be a function call
+        try:
+            visitor = ConfigExpressionParser(v)
+            args = ast.arguments(
+                posonlyargs=[],
+                kwonlyargs=[],
+                kw_defaults=[],
+                defaults=[],
+                args=[ast.arg(arg=a, annotation=None) for a in visitor.args],
+                kwarg=None,
+                vararg=None,
+            )
+            v = ast.Lambda(args=args, body=v)
+            # NOTE:
+            # don't pass the generated functions down
+            # to the checks implemented in this class...
+            check_tree = False
+        except TypeError:
+            pass
+
+        vnode = ast.Assign(targets=[ast.Name(id=k, ctx=ast.Store())], value=v)
+
+        # NOTE:
+        # in order to get concrete values out of the AST
+        # we compile and evaluate the keyword (e.g. max=100)
+        # into the dictionary `self.kwargs`
+        # -> after all keywords where visited we end up with
+        #    a dictionary holding all the passed arguments as
+        #    real python objects
+        co = compile(
+            ast.fix_missing_locations(ast.Interactive(body=[vnode])),
+            "<ast>",
+            mode="single"
+        )
+        # NOTE: only pass a copy to not clutter the self.environment
+        exec(co, {**self.environment}, self.kwargs)
+
+        # let's do some more validity checks
+        if check_tree:
+            self.generic_visit(v)
+
+    def generic_visit(self, node):
+        if not isinstance(node, self.SUPPORTED_NODES):
+            raise TypeError(f"invalid node: '{node}'")
+        return super().generic_visit(node)
diff --git a/saqc/flagger/__init__.py b/saqc/flagger/__init__.py
index 3c942296fd455c1d1632a5880afa5759f394c787..dd5b607158f13f3922cdf734f21fa98be19a96cb 100644
--- a/saqc/flagger/__init__.py
+++ b/saqc/flagger/__init__.py
@@ -6,3 +6,4 @@ from saqc.flagger.categoricalflagger import CategoricalFlagger
 from saqc.flagger.simpleflagger import SimpleFlagger
 from saqc.flagger.dmpflagger import DmpFlagger
 from saqc.flagger.continuousflagger import ContinuousFlagger
+from saqc.flagger.positionalflagger import PositionalFlagger
diff --git a/saqc/flagger/baseflagger.py b/saqc/flagger/baseflagger.py
index 21f4d8aa36e50b1d695c34a8d19b2f9f48659b18..b46515d68543bcfe1f4caf987440c621d4821005 100644
--- a/saqc/flagger/baseflagger.py
+++ b/saqc/flagger/baseflagger.py
@@ -3,15 +3,15 @@
 
 import operator as op
 from copy import deepcopy
-from collections import OrderedDict
 from abc import ABC, abstractmethod
-from typing import TypeVar, Union, Any
 
-import numpy as np
-import pandas as pd
+from typing import TypeVar, Union, Any, List, Optional
 
-from saqc.lib.tools import toSequence, assertScalar, assertDataFrame
+import pandas as pd
+import numpy as np
+import dios
 
+from saqc.lib.tools import assertScalar, mergeDios, toSequence, customRoller
 
 COMPARATOR_MAP = {
     "!=": op.ne,
@@ -22,39 +22,63 @@ COMPARATOR_MAP = {
     "<": op.lt,
 }
 
-
-BaseFlaggerT = TypeVar("BaseFlaggerT")
-PandasT = Union[pd.Series, pd.DataFrame]
 # TODO: get some real types here (could be tricky...)
 LocT = Any
-IlocT = Any
 FlagT = Any
+diosT = dios.DictOfSeries
+BaseFlaggerT = TypeVar("BaseFlaggerT")
+PandasT = Union[pd.Series, diosT]
+FieldsT = Union[str, List[str]]
 
 
 class BaseFlagger(ABC):
     @abstractmethod
     def __init__(self, dtype):
-        # NOTE: the type of the _flags DataFrame
+        # NOTE: the type of the _flags DictOfSeries
         self.dtype = dtype
+        self.extra_defaults = dict()
         # NOTE: the arggumens of setFlags supported from
         #       the configuration functions
         self.signature = ("flag",)
-        self._flags: pd.DataFrame
+        self._flags: Optional[diosT] = None
+
+    @property
+    def initialized(self):
+        return self._flags is not None
 
-    def initFlags(self, data: pd.DataFrame = None, flags: pd.DataFrame = None) -> BaseFlaggerT:
+    @property
+    def flags(self):
+        return self._flags.copy()
+
+    def initFlags(self, data: diosT = None, flags: diosT = None) -> BaseFlaggerT:
         """
         initialize a flagger based on the given 'data' or 'flags'
-        if 'data' is not None: return a flagger with flagger.UNFALGGED values
+        if 'data' is not None: return a flagger with flagger.UNFLAGGED values
         if 'flags' is not None: return a flagger with the given flags
         """
 
         if data is None and flags is None:
             raise TypeError("either 'data' or 'flags' are required")
+
+        if data is not None and flags is not None:
+            raise TypeError("either 'data' or 'flags' can be given")
+
         if data is not None:
-            flags = pd.DataFrame(data=self.UNFLAGGED, index=data.index, columns=data.columns)
-        return self._copy(self._assureDtype(flags))
+            if not isinstance(data, diosT):
+                data = dios.DictOfSeries(data)
+
+            flags = dios.DictOfSeries(columns=data.columns)
+            for c in flags.columns:
+                flags[c] = pd.Series(self.UNFLAGGED, index=data[c].index)
+        else:
+            if not isinstance(flags, diosT):
+                flags = dios.DictOfSeries(flags)
+
+        flags = flags.astype(self.dtype)
+        newflagger = self.copy(flags=flags)
+        return newflagger
 
-    def setFlagger(self, other: BaseFlaggerT):
+    def merge(self, other: BaseFlaggerT, subset: Optional[List] = None, join: str = "merge", inplace=False):
         """
         Merge the given flagger 'other' into self
         """
@@ -62,137 +86,395 @@ class BaseFlagger(ABC):
         if not isinstance(other, self.__class__):
             raise TypeError(f"flagger of type '{self.__class__}' needed")
 
-        this = self._flags
-        other = other._flags
+        if inplace:
+            self._flags = mergeDios(self._flags, other._flags, subset=subset, join=join)
+            return self
+        else:
+            return self.copy(flags=mergeDios(self._flags, other._flags, subset=subset, join=join))
+
+    def slice(self, field: FieldsT = None, loc: LocT = None, drop: FieldsT = None, inplace=False) -> BaseFlaggerT:
+        """ Return a potentially trimmed down copy of self. """
+        if drop is not None:
+            if field is not None:
+                raise TypeError("either 'field' or 'drop' can be given, but not both")
+            field = self._flags.columns.drop(drop, errors="ignore")
+        flags = self.getFlags(field=field, loc=loc).to_dios()
+
+        if inplace:
+            self._flags = flags
+            return self
+        else:
+            return self.copy(flags=flags)
+
+    def toFrame(self):
+        """ Return a pd.DataFrame holding the flags
+        Return
+        ------
+        frame: pandas.DataFrame
+
+        Note
+        ----
+        This is a convenience funtion hiding the implementation detail dios.DictOfSeries.
+        Subclasses with special flag structures (i.e. DmpFlagger) should overwrite the
+        this methode in order to provide a usefull user output.
+        """
+        return self._flags.to_df()
+
+    def getFlags(self, field: FieldsT = None, loc: LocT = None, full=False):
+        """ Return a potentially, to `loc`, trimmed down version of flags.
+
+        Parameters
+        ----------
+        field : str, list of str or None, default None
+            Field(s) to request.
+        loc :
+            limit result to specific rows.
+        full : object
+            If True, an additional dict is returned, holding all extras that
+            the flagger may specify. These extras can be feed back to a/the
+            flagger with `setFlags(...with_extras=True)`.
+
+        Return
+        ------
+        flags: pandas.Series or dios.DictOfSeries
+            If field is a scalar a series is returned, otherwise a dios.
+        extras: dict
+            Present only if `full=True`. A dict that hold all extra information.
+
+        Note
+        ----
+        This is more or less a __getitem__(key)-like function, where
+        self._flags is accessed and key is a single key or a tuple.
+        Either key is [loc] or [loc,field]. loc also can be a 2D-key,
+        aka. a booldios
+
+        The resulting dict (full=True) can be feed to setFlags to update extra Columns.
+        but field must be a scalar then, because setFlags only can process a scalar field.
+        """
+
+        # loc should be a valid 2D-indexer and
+        # then field must be None. Otherwise aloc
+        # will fail and throw the correct Error.
+        if isinstance(loc, diosT) and field is None:
+            indexer = loc
+
+        else:
+            loc = slice(None) if loc is None else loc
+            field = slice(None) if field is None else self._check_field(field)
+            indexer = (loc, field)
+
+        # this is a bug in `dios.aloc`, which may return a shallow copied dios, if `slice(None)` is passed
+        # as row indexer. Thus is because pandas `.loc` return a shallow copy if a null-slice is passed to a series.
+        flags = self._flags.aloc[indexer].copy()
+        if full:
+            return flags, {}
+        else:
+            return flags
 
-        flags = this.reindex(
-            index=this.index.union(other.index),
-            columns=this.columns.union(other.columns, sort=False),
-            fill_value=self.UNFLAGGED,
-        )
+    def setFlags(
+            self,
+            field: str,
+            loc: LocT = None,
+            flag: FlagT = None,
+            force: bool = False,
+            inplace: bool = False,
+            with_extra: bool = False,
+            flag_after: Union[str, int] = None,
+            flag_before: Union[str, int] = None,
+            win_flag: FlagT = None,
+            **kwargs
+    ) -> BaseFlaggerT:
+        """Overwrite existing flags at loc.
 
-        for key, values in other.iteritems():
-            flags.loc[other.index, key] = values
+        If `force=False` (default) only flags with a lower priority are overwritten,
+        otherwise, if `force=True`, flags are overwritten unconditionally.
 
-        return self._copy(self._assureDtype(flags))
+        Examples
+        --------
+        One can use this to update extra columns without knowing their names. Eg. like so:
 
-    def getFlagger(self, field: str = None, loc: LocT = None, iloc: IlocT = None) -> BaseFlaggerT:
-        """
-        return a potentially trimmed down copy of self
+        >>> field = 'var0'
+        >>> flags, extra = flagger.getFlags(field, full=True)
+        >>> newflags = magic_that_alter_index(flags)
+        >>> for k, v in extra.items()
+        ...     extra[k] = magic_that_alter_index(v)
+        >>> flagger = flagger.setFlags(field, flags=newflags, with_extra=True, **extra)
         """
-        assertScalar("field", field, optional=True)
-        mask = self._locatorMask(field=slice(None), loc=loc, iloc=iloc)
-        flags = self._flags.loc[mask, field or slice(None)]
-        if isinstance(flags, pd.Series):
-            flags = flags.to_frame()
-        return self._copy(flags)
-
-    def getFlags(self, field: str = None, loc: LocT = None, iloc: IlocT = None) -> PandasT:
-        """
-        return a copy of a potentially trimmed down 'self._flags' DataFrame
-        """
-        assertScalar("field", field, optional=True)
-        field = field or slice(None)
-        flags = self._flags.copy()
-        mask = self._locatorMask(field, loc, iloc)
-        return flags.loc[mask, field]
 
-    def setFlags(
-        self, field: str, loc: LocT = None, iloc: IlocT = None, flag: FlagT = None, force: bool = False, **kwargs,
-    ) -> BaseFlaggerT:
-        assertScalar("field", field, optional=False)
+        assert "iloc" not in kwargs, "deprecated keyword, `iloc=slice(i:j)`. Use eg. `loc=srs.index[i:j]` instead."
 
-        flag = self.BAD if flag is None else self._checkFlag(flag)
+        assertScalar("field", self._check_field(field), optional=False)
+        flag = self.BAD if flag is None else flag
+        out = self if inplace else deepcopy(self)
 
-        this = self.getFlags(field=field)
-        other = self._broadcastFlags(field=field, flag=flag)
+        if with_extra and not isinstance(flag, pd.Series):
+            raise ValueError("flags must be pd.Series if `with_extras=True`.")
 
-        mask = self._locatorMask(field, loc, iloc)
-        if not force:
-            mask &= (this < other).values
+        trimmed = self.getFlags(field=field, loc=loc)
+        if force:
+            mask = pd.Series(True, index=trimmed.index, dtype=bool)
+        else:
+            mask = trimmed < flag
+
+        # set flags of the test
+        out._flags.aloc[mask, field] = flag
+
+        # calc and set window flags
+        if flag_after is not None or flag_before is not None:
+            win_mask, win_flag = self._getWindowMask(field, mask, flag_after, flag_before, win_flag, flag, force)
+            out._flags.aloc[win_mask, field] = win_flag
 
-        out = deepcopy(self)
-        out._flags.loc[mask, field] = other[mask]
         return out
 
-    def clearFlags(self, field: str, loc: LocT = None, iloc: IlocT = None, **kwargs) -> BaseFlaggerT:
+    def _getWindowMask(self, field, mask, flag_after, flag_before, win_flag, flag, force):
+        """ Return a mask which is True where the additional window flags should get set.
+
+        Parameters
+        ----------
+        field : str
+            column identifier.
+        mask : boolean pd.Series
+            identified locations where flags was set
+        flag_after : offset or int
+            set additional flags after each flag that was set
+        flag_before : offset or int
+            set additional flags before each flag that was set
+        win_flag : any
+            Should be valid flag of the flagger or None. Defaults to `flag` if None.
+        flag : any
+            The flag that was used by flagger.setFlags(). Only used to determine `win_flag` if the latter is None.
+        force : bool
+            If True, the additional flags specified by `flag_after` and `flag_before` are set unconditionally and so
+            also could overwrite worse flags.
+
+        Returns
+        -------
+        mask: boolean pandas.Series
+            locations where additional flags should be set. The mask has the same (complete) length than `.flags[field]`
+        win_flag: the flag to set
+
+        Raises
+        ------
+        ValueError : If `win_flag` is None and `flag` is not a scalar.
+        ValueError : If `win_flag` is not a valid flagger flag
+        NotImplementedError: if `flag_before` is given
+        """
+
+        # win_flag default to flag if not explicitly given
+        if win_flag is None:
+            win_flag = flag
+            if not np.isscalar(win_flag):
+                raise ValueError("win_flag (None) cannot default to flag, if flag is not a scalar. "
+                                 "Pls specify `win_flag` or omit `flag_after` and `flag_before`.")
+        else:
+            if not self.isValidFlag(win_flag):
+                raise ValueError(f"invalid win_flag: {win_flag}")
+
+        # blow up the mask to the whole size of flags
+        base = mask.reindex_like(self._flags[field]).fillna(False)
+        before, after = False, False
+
+        if flag_before is not None:
+            closed = 'both'
+            if isinstance(flag_before, int):
+                flag_before, closed = flag_before + 1, None
+            r = customRoller(base, window=flag_before, min_periods=1, closed=closed, expand=True, forward=True)
+            before = r.sum().astype(bool)
+
+        if flag_after is not None:
+            closed = 'both'
+            if isinstance(flag_after, int):
+                flag_after, closed = flag_after + 1, None
+            r = customRoller(base, window=flag_after, min_periods=1, closed=closed, expand=True)
+            after = r.sum().astype(bool)
+
+        # does not include base, to avoid overriding flags that just was set
+        # by the test, because flag and win_flag may differ.
+        mask = ~base & (after | before)
+
+        # also do not to overwrite worse flags
+        if not force:
+            mask &= self.getFlags(field) < win_flag
+
+        return mask, win_flag
+
+    def clearFlags(self, field: str, loc: LocT = None, inplace=False, **kwargs) -> BaseFlaggerT:
         assertScalar("field", field, optional=False)
-        return self.setFlags(field=field, loc=loc, iloc=iloc, flag=self.UNFLAGGED, force=True, **kwargs)
-
-    def isFlagged(
-        self, field=None, loc: LocT = None, iloc: IlocT = None, flag: FlagT = None, comparator: str = ">", **kwargs,
-    ) -> PandasT:
-        assertScalar("field", field, optional=True)
-        assertScalar("flag", flag, optional=True)
-        self._checkFlag(flag)
-        flag = self.GOOD if flag is None else flag
-        flags = self.getFlags(field, loc, iloc, **kwargs)
+        if "force" in kwargs:
+            raise ValueError("Keyword 'force' is not allowed here.")
+        if "flag" in kwargs:
+            raise ValueError("Keyword 'flag' is not allowed here.")
+        return self.setFlags(field=field, loc=loc, flag=self.UNFLAGGED, force=True, inplace=inplace, **kwargs)
+
+    def isFlagged(self, field=None, loc: LocT = None, flag: FlagT = None, comparator: str = ">") -> PandasT:
+        """
+        Returns boolean data that indicate where data has been flagged.
+
+        Parameters
+        ----------
+        field : str, list-like, default None
+            The field(s)/column(s) of the data to be tested if flagged.
+            If None all columns are used.
+
+        loc : mask, slice, pd.Index, etc., default None
+            The location/rows of the data to be tested if flagged.
+            If None all rows are used.
+
+        flag : str, category, list-like, default None
+            The flag(s) that define data as flagged. If None, `flagger.GOOD`
+            is used.
+
+        comparator : {'<', '<=', '==', '!=', '>=', '>'}, default '>'
+            Defines how the comparison is done. The `flags` are always on the
+            left-hand-side, thus, the flag to compare is always on the right-
+            hand-side. Eg. a call with all defaults, return the equivalent
+            of `flagger.getFlags() > flagger.GOOD`
+
+        Returns
+        -------
+        pandas.Series or dios.DictOfSeries : Return Series if field is a scalar,
+        otherwise DictOfSeries.
+        """
+        if isinstance(flag, pd.Series):
+            raise TypeError("flag: pd.Series is not allowed")
+        flags_to_compare = set(toSequence(flag, self.GOOD))
+
+        flags = self.getFlags(field, loc)
         cp = COMPARATOR_MAP[comparator]
-        flagged = pd.notna(flags) & cp(flags, flag)
+
+        # notna() to prevent nans to become True,
+        # eg.: `np.nan != 0 -> True`
+        flagged = flags.notna()
+
+        # passing an empty list must result
+        # in a everywhere-False data
+        if len(flags_to_compare) == 0:
+            flagged[:] = False
+        else:
+            for f in flags_to_compare:
+                if not self.isValidFlag(f):
+                    raise ValueError(f"invalid flag: {f}")
+                flagged &= cp(flags, f)
+
         return flagged
 
-    def _copy(self, flags: pd.DataFrame = None) -> BaseFlaggerT:
-        out = deepcopy(self)
-        if flags is not None:
-            out._flags = flags
+    def copy(self, flags=None) -> BaseFlaggerT:
+        if flags is None:
+            out = deepcopy(self)
+        else:
+            # if flags is given and self.flags is big,
+            # this hack will bring some speed improvement
+            # NOTE: there should be nicer way to do this,
+            #       why not through a constructur method?
+            saved = self._flags
+            self._flags = None
+            out = deepcopy(self)
+            out._flags = flags.copy()
+            self._flags = saved
         return out
 
-    def _locatorMask(self, field: str = None, loc: LocT = None, iloc: IlocT = None) -> PandasT:
-        field = field or slice(None)
-        locator = [l for l in (loc, iloc, slice(None)) if l is not None][0]
-        index = self._flags.index
-        mask = pd.Series(data=np.zeros(len(index), dtype=bool), index=index)
-        mask[locator] = True
-        return mask
+    def isValidFlag(self, flag: FlagT) -> bool:
+        """
+        Check if given flag is known to this flagger.
 
-    def _broadcastFlags(self, field: str, flag: FlagT) -> pd.Series:
+        Parameters
+        ----------
+        flag: str
+            The flag to be checked.
 
-        this = self.getFlags(field)
+        Returns
+        -------
+        bool
+        """
+        # This is a very rudimentary fallback for the check
+        # and the child flagger may should implement a better
+        # version of it.
+        return flag == self.BAD or flag == self.GOOD or flag == self.UNFLAGGED or self.isSUSPICIOUS(flag)
+
+    def replaceField(self, field, flags, inplace=False, **kwargs):
+        """ Replace or delete all data for a given field.
+
+        Parameters
+        ----------
+        field : str
+            The field to replace / delete. If the field already exist, the respected data
+            is replaced, otherwise the data is inserted in the respected field column.
+        flags : pandas.Series or None
+            If None, the series denoted by `field` will be deleted. Otherwise
+            a series of flags (dtype flagger.dtype) that will replace the series
+            currently stored under `field`
+        inplace : bool, default False
+            If False, a flagger copy is returned, otherwise the flagger is not copied.
+        **kwargs : dict
+            ignored.
+
+        Returns
+        -------
+        flagger: saqc.flagger.BaseFlagger
+            The flagger object or a copy of it (if inplace=True).
+
+        Raises
+        ------
+        ValueError: (delete) if field does not exist
+        TypeError: (replace / insert) if flags are not pd.Series
+        """
 
-        if np.isscalar(flag):
-            flag = np.full_like(this, flag)
+        assertScalar("field", field, optional=False)
 
-        return pd.Series(data=flag, index=this.index, name=field, dtype=self.dtype)
+        out = self if inplace else deepcopy(self)
 
-    def _checkFlag(self, flag):
-        if flag is not None and not self._isDtype(flag):
-            raise TypeError(f"invalid flag value '{flag}' for flagger 'self.__class__'")
-        return flag
+        # delete
+        if flags is None:
+            if field not in self._flags:
+                raise ValueError(f"{field}: field does not exist")
+            del out._flags[field]
 
-    def _assureDtype(self, flags):
-        # NOTE: building up new DataFrames is significantly
-        #       faster than assigning into existing ones
-        if isinstance(flags, pd.Series):
-            return flags.astype(self.dtype)
-        tmp = OrderedDict()
-        for c in flags.columns:
-            tmp[c] = flags[c].astype(self.dtype)
-        return pd.DataFrame(tmp)
+        # insert / replace
+        else:
+            if not isinstance(flags, pd.Series):
+                raise TypeError(f"`flags` must be pd.Series.")
+            out._flags[field] = flags.astype(self.dtype)
+        return out
 
-    @abstractmethod
-    def _isDtype(self, flag) -> bool:
-        pass
+    def _check_field(self, field):
+        """ Check if (all) field(s) in self._flags. """
+
+        # wait for outcome of
+        # https://git.ufz.de/rdm-software/saqc/issues/46
+        failed = []
+        if isinstance(field, str):
+            if field not in self._flags:
+                failed += [field]
+        else:
+            try:
+                for f in field:
+                    if f not in self._flags:
+                        failed += [f]
+            # not iterable, probably a slice or
+            # any indexer we dont have to check
+            except TypeError:
+                pass
+
+        if failed:
+            raise ValueError(f"key(s) missing in flags: {failed}")
+        return field
 
     @property
     @abstractmethod
     def UNFLAGGED(self) -> FlagT:
         """ Return the flag that indicates unflagged data """
-        pass
 
     @property
     @abstractmethod
     def GOOD(self) -> FlagT:
         """ Return the flag that indicates the very best data """
-        pass
 
     @property
     @abstractmethod
     def BAD(self) -> FlagT:
         """ Return the flag that indicates the worst data """
-        pass
 
     @abstractmethod
     def isSUSPICIOUS(self, flag: FlagT) -> bool:
         """ Return bool that indicates if the given flag is valid, but neither
         UNFLAGGED, BAD, nor GOOD."""
-        pass
diff --git a/saqc/flagger/categoricalflagger.py b/saqc/flagger/categoricalflagger.py
index eb5384d2bdea5a5ff24323ae416fd440474a87f7..20d2680343bfc659a5e95809732852b26e913a23 100644
--- a/saqc/flagger/categoricalflagger.py
+++ b/saqc/flagger/categoricalflagger.py
@@ -6,7 +6,7 @@ from collections import OrderedDict
 import pandas as pd
 
 from saqc.flagger.baseflagger import BaseFlagger
-from saqc.lib.tools import assertDataFrame
+from saqc.lib.tools import assertDictOfSeries
 
 
 class Flags(pd.CategoricalDtype):
@@ -25,14 +25,6 @@ class CategoricalFlagger(BaseFlagger):
         super().__init__(dtype=Flags(flags))
         self._categories = self.dtype.categories
 
-    def _isDtype(self, flag) -> bool:
-        """
-        not needed here, move out
-        """
-        if isinstance(flag, pd.Series):
-            return isinstance(flag.dtype, pd.CategoricalDtype) and flag.dtype == self.dtype
-        return flag in self.dtype.categories
-
     @property
     def UNFLAGGED(self):
         return self._categories[0]
diff --git a/saqc/flagger/dmpflagger.py b/saqc/flagger/dmpflagger.py
index dfba7412589cfac3c2da8318ebdd57a8ebd98789..d4ff7cd5b4d54171df97a66d9d5d13b3812172e8 100644
--- a/saqc/flagger/dmpflagger.py
+++ b/saqc/flagger/dmpflagger.py
@@ -1,15 +1,19 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
-import subprocess
+
 import json
 from copy import deepcopy
-from collections import OrderedDict
-from typing import Union, Sequence
+from typing import TypeVar, Optional, List
 
 import pandas as pd
 
+import dios
+
+from saqc.flagger.baseflagger import diosT
 from saqc.flagger.categoricalflagger import CategoricalFlagger
-from saqc.lib.tools import assertDataFrame, toSequence, assertScalar
+from saqc.lib.tools import assertScalar, mergeDios, mutateIndex
+
+DmpFlaggerT = TypeVar("DmpFlaggerT")
 
 
 class Keywords:
@@ -31,86 +35,214 @@ FLAGS = ["NIL", "OK", "DOUBTFUL", "BAD"]
 
 
 class DmpFlagger(CategoricalFlagger):
-    def __init__(self):
+    def __init__(self, **kwargs):
         super().__init__(FLAGS)
         self.flags_fields = [FlagFields.FLAG, FlagFields.CAUSE, FlagFields.COMMENT]
-        version = subprocess.run(
-            "git describe --tags --always --dirty", shell=True, check=False, stdout=subprocess.PIPE,
-        ).stdout
-        self.project_version = version.decode().strip()
+        self.extra_defaults = dict(cause=FLAGS[0], comment="")
         self.signature = ("flag", "comment", "cause", "force")
+
+        self._global_comments = kwargs
         self._flags = None
+        self._causes = None
+        self._comments = None
+
+    @property
+    def causes(self):
+        return self._causes
+
+    @property
+    def comments(self):
+        return self._comments
+
+    def toFrame(self):
+        out = pd.concat(
+            [self._flags.to_df(), self._causes.to_df(), self._comments.to_df()],
+            axis=1,
+            keys=[FlagFields.FLAG, FlagFields.CAUSE, FlagFields.COMMENT],
+        )
+        out = out.reorder_levels(order=[1, 0], axis=1).sort_index(axis=1, level=0, sort_remaining=False)
+        return out
 
-    def initFlags(self, data: pd.DataFrame = None, flags: pd.DataFrame = None):
+    def initFlags(self, data: dios.DictOfSeries = None, flags: dios.DictOfSeries = None):
         """
         initialize a flagger based on the given 'data' or 'flags'
         if 'data' is not None: return a flagger with flagger.UNFALGGED values
         if 'flags' is not None: return a flagger with the given flags
         """
 
-        if data is not None:
-            flags = pd.DataFrame(data="", columns=self._getColumnIndex(data.columns), index=data.index,)
-            flags.loc[:, self._getColumnIndex(data.columns, [FlagFields.FLAG])] = self.UNFLAGGED
-        elif flags is not None:
-            if not isinstance(flags.columns, pd.MultiIndex):
-                cols = flags.columns
-                flags = flags.copy()
-                flags.columns = self._getColumnIndex(cols, [FlagFields.FLAG])
-                flags = flags.reindex(columns=self._getColumnIndex(cols), fill_value="")
+        # implicit set self._flags, and make deepcopy of self aka. DmpFlagger
+        newflagger = super().initFlags(data=data, flags=flags)
+        newflagger._causes = newflagger._flags.astype(str)
+        newflagger._comments = newflagger._flags.astype(str)
+        newflagger._causes[:], newflagger._comments[:] = "", ""
+        return newflagger
+
+    def slice(self, field=None, loc=None, drop=None, inplace=False):
+        newflagger = super().slice(field=field, loc=loc, drop=drop, inplace=inplace)
+        flags = newflagger._flags
+        newflagger._causes = self._causes.aloc[flags, ...]
+        newflagger._comments = self._comments.aloc[flags, ...]
+        return newflagger
+
+    def merge(self, other: DmpFlaggerT, subset: Optional[List] = None, join: str = "merge", inplace=False):
+        assert isinstance(other, DmpFlagger)
+        flags = mergeDios(self._flags, other._flags, subset=subset, join=join)
+        causes = mergeDios(self._causes, other._causes, subset=subset, join=join)
+        comments = mergeDios(self._comments, other._comments, subset=subset, join=join)
+        if inplace:
+            self._flags = flags
+            self._causes = causes
+            self._comments = comments
+            return self
         else:
-            raise TypeError("either 'data' or 'flags' are required")
-
-        return self._copy(self._assureDtype(flags))
-
-    def getFlagger(self, field=None, loc=None, iloc=None):
-        # NOTE: we need to preserve all indexing levels
-        assertScalar("field", field, optional=True)
-        variables = self._flags.columns.get_level_values(ColumnLevels.VARIABLES).drop_duplicates()
-        cols = toSequence(field, variables)
-        out = super().getFlagger(field, loc, iloc)
-        out._flags.columns = self._getColumnIndex(cols)
-        return out
-
-    def getFlags(self, field=None, loc=None, iloc=None):
-        assertScalar("field", field, optional=True)
-        field = field or slice(None)
-        mask = self._locatorMask(field, loc, iloc)
-        flags = self._flags.xs(FlagFields.FLAG, level=ColumnLevels.FLAGS, axis=1).copy()
-        return super()._assureDtype(flags.loc[mask, field])
+            return self._construct_new(flags, causes, comments)
+
+    def getFlags(self, field=None, loc=None, full=False):
+        # loc should be a valid 2D-indexer and
+        # then field must be None. Otherwise aloc
+        # will fail and throw the correct Error.
+        if isinstance(loc, diosT) and field is None:
+            indexer = loc
+        else:
+            loc = slice(None) if loc is None else loc
+            field = slice(None) if field is None else self._check_field(field)
+            indexer = (loc, field)
+
+        # this is a bug in `dios.aloc`, which may return a shallow copied dios, if `slice(None)` is passed
+        # as row indexer. Thus is because pandas `.loc` return a shallow copy if a null-slice is passed to a series.
+        flags = self._flags.aloc[indexer].copy()
+
+        if full:
+            causes = self._causes.aloc[indexer].copy()
+            comments = self._comments.aloc[indexer].copy()
+            return flags, dict(cause=causes, comment=comments)
+        else:
+            return flags
+
+    def setFlags(
+        self,
+        field,
+        loc=None,
+        flag=None,
+        cause="OTHER",
+        comment="",
+        force=False,
+        inplace=False,
+        with_extra=False,
+        flag_after=None,
+        flag_before=None,
+        win_flag=None,
+        **kwargs
+    ):
+        assert "iloc" not in kwargs, "deprecated keyword, iloc"
+        assertScalar("field", self._check_field(field), optional=False)
+
+        out = self if inplace else deepcopy(self)
+
+        if with_extra:
+            for val in [comment, cause, flag]:
+                if not isinstance(val, pd.Series):
+                    raise TypeError(f"`flag`, `cause`, `comment` must be pd.Series, if `with_extra=True`.")
+            assert flag.index.equals(comment.index) and flag.index.equals(cause.index)
 
-    def setFlags(self, field, loc=None, iloc=None, flag=None, force=False, comment="", cause="", **kwargs):
-        assertScalar("field", field, optional=True)
+        else:
+            flag = self.BAD if flag is None else flag
+            comment = json.dumps(
+                {**self._global_comments,
+                 "comment": comment,
+                 "test": kwargs.get("func_name", "")}
+            )
+
+        flags = self.getFlags(field=field, loc=loc)
+        if force:
+            mask = pd.Series(True, index=flags.index, dtype=bool)
+        else:
+            mask = flags < flag
 
-        flag = self.BAD if flag is None else self._checkFlag(flag)
+        # set flags of the test
+        out._flags.aloc[mask, field] = flag
+        out._causes.aloc[mask, field] = cause
+        out._comments.aloc[mask, field] = comment
 
-        comment = json.dumps({"comment": comment, "commit": self.project_version, "test": kwargs.get("func_name", ""),})
+        # calc and set window flags
+        if flag_after is not None or flag_before is not None:
+            win_mask, win_flag = self._getWindowMask(field, mask, flag_after, flag_before, win_flag, flag, force)
+            out._flags.aloc[win_mask, field] = win_flag
+            out._causes.aloc[win_mask, field] = cause
+            out._comments.aloc[win_mask, field] = comment
 
-        this = self.getFlags(field=field)
-        other = self._broadcastFlags(field=field, flag=flag)
-        mask = self._locatorMask(field, loc, iloc)
-        if not force:
-            mask &= (this < other).values
+        return out
 
-        out = deepcopy(self)
-        out._flags.loc[mask, field] = other[mask], cause, comment
+    def replaceField(self, field, flags, inplace=False, cause=None, comment=None, **kwargs):
+        """ Replace or delete all data for a given field.
+
+        Parameters
+        ----------
+        field : str
+            The field to replace / delete. If the field already exist, the respected data
+            is replaced, otherwise the data is inserted in the respected field column.
+        flags : pandas.Series or None
+            If None, the series denoted by `field` will be deleted. Otherwise
+            a series of flags (dtype flagger.dtype) that will replace the series
+            currently stored under `field`
+        causes : pandas.Series
+            A series of causes (dtype str).
+        comments : pandas.Series
+            A series of comments (dtype str).
+        inplace : bool, default False
+            If False, a flagger copy is returned, otherwise the flagger is not copied.
+        **kwargs : dict
+            ignored.
+
+        Returns
+        -------
+        flagger: saqc.flagger.BaseFlagger
+            The flagger object or a copy of it (if inplace=True).
+
+        Raises
+        ------
+        ValueError: (delete) if field does not exist
+        TypeError: (replace / insert) if flags, causes, comments are not pd.Series
+        AssertionError: (replace / insert) if flags, causes, comments does not have the same index
+
+        Notes
+        -----
+        If deletion is requested (`flags=None`), `causes` and `comments` are don't-care.
+
+        Flags, causes and comments must have the same index, if flags is not None, also
+        each is casted implicit to the respected dtype.
+        """
+        assertScalar("field", field, optional=False)
+        out = self if inplace else deepcopy(self)
+        causes, comments = cause, comment
+
+        # delete
+        if flags is None:
+            if field not in self._flags:
+                raise ValueError(f"{field}: field does not exist")
+            del out._flags[field]
+            del out._comments[field]
+            del out._causes[field]
+
+        # insert / replace
+        else:
+            for val in [flags, causes, comments]:
+                if not isinstance(val, pd.Series):
+                    raise TypeError(f"`flag`, `cause`, `comment` must be pd.Series.")
+            assert flags.index.equals(comments.index) and flags.index.equals(causes.index)
+            out._flags[field] = flags.astype(self.dtype)
+            out._causes[field] = causes.astype(str)
+            out._comments[field] = comments.astype(str)
         return out
 
-    def _getColumnIndex(
-        self, cols: Union[str, Sequence[str]], fields: Union[str, Sequence[str]] = None
-    ) -> pd.MultiIndex:
-        cols = toSequence(cols)
-        fields = toSequence(fields, self.flags_fields)
-        return pd.MultiIndex.from_product([cols, fields], names=[ColumnLevels.VARIABLES, ColumnLevels.FLAGS])
-
-    def _assureDtype(self, flags):
-        # NOTE: building up new DataFrames is significantly
-        #       faster than assigning into existing ones
-        tmp = OrderedDict()
-        for (var, flag_field) in flags.columns:
-            col_data = flags[(var, flag_field)]
-            if flag_field == FlagFields.FLAG:
-                col_data = col_data.astype(self.dtype)
-            else:
-                col_data = col_data.astype(str)
-            tmp[(var, flag_field)] = col_data
-        return pd.DataFrame(tmp, columns=flags.columns, index=flags.index)
+    def _construct_new(self, flags, causes, comments) -> DmpFlaggerT:
+        new = DmpFlagger()
+        new._global_comments = self._global_comments
+        new._flags = flags
+        new._causes = causes
+        new._comments = comments
+        return new
+
+    @property
+    def SUSPICIOUS(self):
+        return FLAGS[-2]
diff --git a/saqc/flagger/positionalflagger.py b/saqc/flagger/positionalflagger.py
new file mode 100644
index 0000000000000000000000000000000000000000..00af0b2b1c9b678ab0a778740d14599022f949d6
--- /dev/null
+++ b/saqc/flagger/positionalflagger.py
@@ -0,0 +1,144 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import json
+from copy import deepcopy
+
+import pandas as pd
+
+from dios import DictOfSeries
+from saqc.flagger.baseflagger import BaseFlagger, COMPARATOR_MAP
+from saqc.flagger.dmpflagger import DmpFlagger
+from saqc.lib.tools import toSequence, assertScalar
+
+
+FLAGS = ("-1", "0", "1", "2")
+
+
+class PositionalFlagger(BaseFlagger):
+    def __init__(self):
+        super().__init__(dtype=str)
+
+    def setFlags(
+            self,
+            field: str,
+            loc=None,
+            position=-1,
+            flag=None,
+            force: bool = False,
+            inplace: bool = False,
+            with_extra=False,
+            flag_after=None,
+            flag_before=None,
+            win_flag=None,
+            **kwargs
+    ):
+        assertScalar("field", field, optional=False)
+
+        # prepping
+        flag = str(self.BAD if flag is None else flag)
+        self.isValidFlag(flag, fail=True)
+        out = self if inplace else deepcopy(self)
+        out_flags = out._flags[field]
+
+        idx = self.getFlags(field, loc).index
+        mask = pd.Series(True, index=idx, dtype=bool)
+        mask = mask.reindex_like(out_flags).fillna(False)
+
+        # replace unflagged with the magic starter '9'
+        out_flags = out_flags.str.replace(f"^{self.UNFLAGGED}", "9", regex=True)
+
+        # bring all flags to the desired length
+        # length = position # if position > 0 else out_flags.str.len().max()
+        if position == -1:
+            length = position = out_flags.str.len().max()
+        else:
+            length = position = position + 1
+        out_flags = out_flags.str.pad(length + 1, fillchar=self.GOOD, side="right")
+
+        # we rigorously overwrite existing flags
+        new_flags = out_flags.str[position]
+        new_flags.loc[mask] = flag
+
+        # calc window flags
+        if flag_after is not None or flag_before is not None:
+            win_mask, win_flag = self._getWindowMask(field, mask, flag_after, flag_before, win_flag, flag, force)
+            new_flags.loc[win_mask] = win_flag
+
+        out._flags[field] = out_flags.str[:position] + new_flags + out_flags.str[position+1:]
+        return out
+
+    def isFlagged(self, field=None, loc=None, flag=None, comparator=">"):
+
+        field = slice(None) if field is None else field
+        flags = self._getMaxFlag(field, loc).astype(int)
+        flags = flags.loc[:, field]
+
+        # notna() to prevent nans to become True,
+        # eg.: `np.nan != 0 -> True`
+        flagged = flags.notna()
+        flags_to_compare = set(toSequence(flag, self.GOOD))
+        if not flags_to_compare:
+            flagged[:] = False
+            return flagged
+
+        cp = COMPARATOR_MAP[comparator]
+        for f in flags_to_compare:
+            self.isValidFlag(f, fail=True)
+            flagged &= cp(flags, int(f))
+        return flagged
+
+    def isValidFlag(self, flag, fail=False):
+        check = flag in FLAGS
+        if check is False and fail is True:
+            raise ValueError(f"invalid flag {flag}, given values should be in '{FLAGS}'")
+        return check
+
+    def _getMaxFlag(self, field, loc):
+
+        data = {}
+        flags = self.getFlags(field, loc)
+        if isinstance(flags, pd.Series):
+            flags = flags.to_frame()
+        for col_name, col in flags.iteritems():
+            mask = col != self.UNFLAGGED
+            col = col.str.replace("^9", "0", regex=True)
+            col[mask] = col[mask].apply(lambda x: max(list(x)))
+            data[col_name] = col
+        return DictOfSeries(data)
+
+    def toDmpFlagger(self):
+        self = PositionalFlagger().initFlags(flags=self._flags)
+        dmp_flagger = DmpFlagger().initFlags(data=self._flags)
+        flag_map = {
+            self.BAD: dmp_flagger.BAD,
+            self.SUSPICIOUS: dmp_flagger.SUSPICIOUS,
+            self.GOOD: dmp_flagger.GOOD,
+        }
+        for pos_flag, dmp_flag in flag_map.items():
+            loc = self.isFlagged(flag=pos_flag, comparator="==")
+            dmp_flagger._flags.aloc[loc] = dmp_flag
+
+        dmp_flagger._comments.loc[:] = self._flags.to_df().applymap(lambda v: json.dumps({"flag": v}))
+        dmp_flagger._causes.loc[:] = "OTHER"
+        return dmp_flagger
+
+    @property
+    def UNFLAGGED(self):
+        return FLAGS[0]
+
+    @property
+    def GOOD(self):
+        return FLAGS[1]
+
+    @property
+    def SUSPICIOUS(self):
+        return FLAGS[2]
+
+    @property
+    def BAD(self):
+        return FLAGS[3]
+
+    def isSUSPICIOUS(self, flag):
+        return flag == self.SUSPICIOUS
+
diff --git a/saqc/funcs/__init__.py b/saqc/funcs/__init__.py
index dd99e37239d85533f38cd5a0780087f96c8335db..e5c5153cbfef7a22c55982abbf7ae8cb369ffe74 100644
--- a/saqc/funcs/__init__.py
+++ b/saqc/funcs/__init__.py
@@ -2,10 +2,12 @@
 # -*- coding: utf-8 -*-
 
 # imports needed to make the functions register themself
-from .register import register
-from .functions import *
-from .breaks_detection import *
-from .constants_detection import *
-from .soil_moisture_tests import *
-from .spikes_detection import *
-from .harm_functions import *
+from saqc.core.register import register
+from saqc.funcs.functions import *
+from saqc.funcs.breaks_detection import *
+from saqc.funcs.constants_detection import *
+from saqc.funcs.soil_moisture_tests import *
+from saqc.funcs.spikes_detection import *
+from saqc.funcs.harm_functions import *
+from saqc.funcs.modelling import *
+from saqc.funcs.proc_functions import *
diff --git a/saqc/funcs/breaks_detection.py b/saqc/funcs/breaks_detection.py
index e8b5d1a79610e5c09eaa1cd521c2dc0a674518b9..d6da3d33d97f98bb2377244622c709f6487a5f8a 100644
--- a/saqc/funcs/breaks_detection.py
+++ b/saqc/funcs/breaks_detection.py
@@ -3,14 +3,90 @@
 
 import numpy as np
 import pandas as pd
+import dios
 
 from scipy.signal import savgol_filter
 
-from saqc.funcs.register import register
-from saqc.lib.tools import retrieveTrustworthyOriginal
+from saqc.core.register import register
+from saqc.lib.tools import retrieveTrustworthyOriginal, detectDeviants
 
 
-@register()
+@register(masking='all')
+def breaks_flagRegimeAnomaly(data, field, flagger, cluster_field, norm_spread, linkage_method='single',
+                             metric=lambda x, y: np.abs(np.nanmean(x) - np.nanmean(y)),
+                             norm_frac=0.5, set_cluster=True, set_flags=True, **kwargs):
+    """
+    A function to flag values belonging to an anomalous regime regarding modelling regimes of field.
+
+    "Normality" is determined in terms of a maximum spreading distance, regimes must not exceed in respect
+    to a certain metric and linkage method.
+
+    In addition, only a range of regimes is considered "normal", if it models more then `norm_frac` percentage of
+    the valid samples in "field".
+
+    Note, that you must detect the regime changepoints prior to calling this function.
+
+    Note, that it is possible to perform hypothesis tests for regime equality by passing the metric
+    a function for p-value calculation and selecting linkage method "complete".
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    cluster_field : str
+        The name of the column in data, holding the cluster labels for the samples in field. (has to be indexed
+        equal to field)
+    norm_spread : float
+        A threshold denoting the valuelevel, up to wich clusters a agglomerated.
+    linkage_method : {"single", "complete", "average", "weighted", "centroid", "median", "ward"}, default "single"
+        The linkage method used for hierarchical (agglomerative) clustering of the variables.
+    metric : Callable[[numpy.array, numpy.array], float], default lambda x, y: np.abs(np.nanmean(x) - np.nanmean(y))
+        A metric function for calculating the dissimilarity between 2 regimes. Defaults to just the difference in mean.
+    norm_frac : float
+        Has to be in [0,1]. Determines the minimum percentage of samples,
+        the "normal" group has to comprise to be the normal group actually.
+    set_cluster : bool, default True
+        If True, all data, considered "anormal", gets assigned a negative clusterlabel.
+    set_flags : bool, default True
+        Wheather or not to flag abnormal values (do not flag them, if you want to correct them
+        afterwards, becasue flagged values usually are not visible in further tests.).
+
+    kwargs
+
+    Returns
+    -------
+
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    """
+
+    clusterser = data[cluster_field]
+    cluster = np.unique(clusterser)
+    cluster_dios = dios.DictOfSeries({i: data[field][clusterser == i] for i in cluster})
+    plateaus = detectDeviants(cluster_dios, metric, norm_spread, norm_frac, linkage_method, 'samples')
+
+    if set_flags:
+        for p in plateaus:
+            flagger = flagger.setFlags(field, loc=cluster_dios.iloc[:, p].index, **kwargs)
+
+    if set_cluster:
+        for p in plateaus:
+            if cluster[p] > 0:
+                clusterser[clusterser == cluster[p]] = -cluster[p]
+
+    data[cluster_field] = clusterser
+    return data, flagger
+
+
+@register(masking='field')
 def breaks_flagSpektrumBased(
     data,
     field,
@@ -27,57 +103,77 @@ def breaks_flagSpektrumBased(
     **kwargs
 ):
 
-    """ This Function is an generalization of the Spectrum based break flagging mechanism as presented in:
-
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
+    """
+    The Function is a generalization of the Spectrum based break flagging mechanism as presented in:
 
     The function flags breaks (jumps/drops) in input measurement series by evaluating its derivatives.
     A measurement y_t is flagged a, break, if:
 
-    (1) y_t is changing relatively to its preceeding value by at least (100*rel_change_rate_min) percent
-    (2) y_(t-1) is difffering from its preceeding value, by a margin of at least "thresh_abs"
-    (3) Absolute first derivative |(y_t)'| has to be at least "first_der_factor" times as big as the arithmetic middle
-        over all the first derivative values within a 2 times "first_der_window_size" hours window, centered at t.
+    (1) y_t is changing relatively to its preceeding value by at least (100*`rel_change_rate_min`) percent
+    (2) y_(t-1) is difffering from its preceeding value, by a margin of at least `thresh_abs`
+    (3) Absolute first derivative |(y_t)'| has to be at least `first_der_factor` times as big as the arithmetic middle
+        over all the first derivative values within a 2 times `first_der_window_size` hours window, centered at t.
     (4) The ratio of the second derivatives at t and t+1 has to be "aproximately" 1.
-        ([1-scnd__der_ration_margin_1, 1+scnd_ratio_margin_1])
-    (5) The ratio of the second derivatives at t+1 and t+2 has to be larger than scnd_der_ratio_margin_2
+        ([1-`scnd_der_ration_margin_1`, 1+`scnd_ratio_margin_1`])
+    (5) The ratio of the second derivatives at t+1 and t+2 has to be larger than `scnd_der_ratio_margin_2`
 
     NOTE 1: As no reliable statement about the plausibility of the meassurements before and after the jump is possible,
     only the jump itself is flagged. For flagging constant values following upon a jump, use a flagConstants test.
 
     NOTE 2: All derivatives in the reference publication are obtained by applying a Savitzky-Golay filter to the data
-    before differentiating. However, i was not able to reproduce satisfaction of all the conditions for synthetically
-    constructed breaks.
-    Especially condition [4] and [5]! This is because smoothing distributes the harshness of the break over the
-    smoothing window. Since just taking the differences as derivatives did work well for my empirical data set,
-    the parameter "smooth" defaults to "raw". That means, that derivatives will be obtained by just using the
-    differences series.
-    You are free of course, to change this parameter to "savgol" and play around with the associated filter options.
-    (see parameter description below)
-
-
-
-
-       :param data:                        The pandas dataframe holding the data-to-be flagged.
-                                           Data must be indexed by a datetime series and be harmonized onto a
-                                           time raster with seconds precision (skips allowed).
-       :param flags:                       A dataframe holding the flags/flag-entries associated with "data".
-       :param field:                       Fieldname of the Soil moisture measurements field in data.
-       :param flagger:                     A flagger - object. (saqc.flagger.X)
-       :param smooth:                      Bool. Method for obtaining dataseries' derivatives.
-                                           False: Just take series step differences (default)
-                                           True: Smooth data with a Savitzky Golay Filter before differentiating.
-       :param smooth_window:               Offset string. Size of the filter window, used to calculate the derivatives.
-                                           (relevant only, if: smooth is True)
-       :param smooth_poly_deg:             Integer. Polynomial order, used for smoothing with savitzk golay filter.
-                                           (relevant only, if: smooth_func='savgol')
-       :param thresh_rel                   Float in [0,1]. See (1) of function descritpion above to learn more
-       :param thresh_abs                   Float > 0. See (2) of function descritpion above to learn more.
-       :param first_der_factor             Float > 0. See (3) of function descritpion above to learn more.
-       :param first_der_window_range        Offset_String. See (3) of function description to learn more.
-       :param scnd_der_ratio_margin_1      Float in [0,1]. See (4) of function descritpion above to learn more.
-       :param scnd_der_ratio_margin_2      Float in [0,1]. See (5) of function descritpion above to learn more.
+    before differentiating.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    thresh_rel : float, default 0.1
+        Float in [0,1]. See (1) of function description above to learn more
+    thresh_abs : float, default 0.01
+        Float > 0. See (2) of function descritpion above to learn more.
+    first_der_factor : float, default 10
+        Float > 0. See (3) of function descritpion above to learn more.
+    first_der_window_range : str, default '12h'
+        Offset string. See (3) of function description to learn more.
+    scnd_der_ratio_margin_1 : float, default 0.05
+        Float in [0,1]. See (4) of function descritpion above to learn more.
+    scnd_der_ratio_margin_2 : float, default 10
+        Float in [0,1]. See (5) of function descritpion above to learn more.
+    smooth : bool, default True
+        Method for obtaining dataseries' derivatives.
+        * False: Just take series step differences (default)
+        * True: Smooth data with a Savitzky Golay Filter before differentiating.
+    smooth_window : {None, str}, default 2
+        Effective only if `smooth` = True
+        Offset string. Size of the filter window, used to calculate the derivatives.
+    smooth_poly_deg : int, default 2
+        Effective only, if `smooth` = True
+        Polynomial order, used for smoothing with savitzk golay filter.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    The Function is a generalization of the Spectrum based break flagging mechanism as presented in:
+
+    [1] Dorigo,W. et al.: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
+
+    Find a brief mathematical description of the function here:
+
+    [2] https://git.ufz.de/rdm-software/saqc/-/blob/testfuncDocs/docs/funcs
+        /FormalDescriptions.md#breaks_flagspektrumbased
     """
 
     # retrieve data series input at its original sampling rate
@@ -91,7 +187,7 @@ def breaks_flagSpektrumBased(
     # relative - change - break criteria testing:
     abs_change = np.abs(dataseries.shift(+1) - dataseries)
     breaks = (abs_change > thresh_abs) & (abs_change / dataseries > thresh_rel)
-    breaks = breaks[breaks == True]
+    breaks = breaks[breaks]
 
     # First derivative criterion
     smoothing_periods = int(np.ceil((smooth_window.seconds / data_rate.n)))
@@ -150,8 +246,6 @@ def breaks_flagSpektrumBased(
         else:
             breaks[brake] = False
 
-    breaks = breaks[breaks == True]
-
-    flagger = flagger.setFlags(field, breaks.index, **kwargs)
+    flagger = flagger.setFlags(field, breaks, **kwargs)
 
     return data, flagger
diff --git a/saqc/funcs/constants_detection.py b/saqc/funcs/constants_detection.py
index d6577d61a552b660f2d787f4ef40a8f2af89ff8f..d402056901b32bd78997f89fb2671305f7dfe2ae 100644
--- a/saqc/funcs/constants_detection.py
+++ b/saqc/funcs/constants_detection.py
@@ -4,42 +4,67 @@
 import numpy as np
 import pandas as pd
 
-from saqc.funcs.register import register
+from saqc.core.register import register
 from saqc.lib.ts_operators import varQC
-from saqc.lib.tools import retrieveTrustworthyOriginal
+from saqc.lib.tools import retrieveTrustworthyOriginal, customRoller
 
 
-@register()
+@register(masking='field')
 def constants_flagBasic(data, field, flagger, thresh, window, **kwargs):
     """
+    This functions flags plateaus/series of constant values of length `window` if
+    their maximum total change is smaller than thresh.
+
+    Function flags plateaus/series of constant values. Any interval of values y(t),..y(t+n) is flagged, if:
+
+    (1) n > `window`
+    (2) |(y(t + i) - (t + j)| < `thresh`, for all i,j in [0, 1, ..., n]
+
     Flag values are (semi-)constant.
 
-    :param data: dataframe
-    :param field: column in data
-    :param flagger: saqc flagger obj
-    :param thresh: the difference between two values must be below that
-    :param window: sliding window
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    thresh : float
+        Upper bound for the maximum total change of an interval to be flagged constant.
+    window : str
+        Lower bound for the size of an interval to be flagged constant.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
     """
-    d = data[field]
-
-    # find all constant values in a row with a forward search
-    r = d.rolling(window=window)
-    mask = (r.max() - r.min() <= thresh) & (r.count() > 1)
 
-    # backward rolling for offset windows hack
-    bw = mask[::-1].copy()
-    bw.index = bw.index.max() - bw.index
+    d = data[field]
+    if not isinstance(window, str):
+        raise TypeError('window must be offset string.')
 
-    # propagate the mask(!), backwards
-    bwmask = bw.rolling(window=window).sum() > 0
+    # min_periods=2 ensures that at least two non-nan values are present
+    # in each window and also min() == max() == d[i] is not possible.
+    kws = dict(window=window, min_periods=2, expand=False)
 
-    mask |= bwmask[::-1].values
+    # find all consecutive constant values in one direction...
+    r = customRoller(d, **kws)
+    m1 = r.max() - r.min() <= thresh
+    # and in the other
+    r = customRoller(d, forward=True, **kws)
+    m2 = r.max() - r.min() <= thresh
+    mask = m1 | m2
 
     flagger = flagger.setFlags(field, mask, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def constants_flagVarianceBased(
     data, field, flagger, window="12h", thresh=0.0005, max_missing=None, max_consec_missing=None, **kwargs
 ):
@@ -47,24 +72,37 @@ def constants_flagVarianceBased(
     """
     Function flags plateaus/series of constant values. Any interval of values y(t),..y(t+n) is flagged, if:
 
-    (1) n > "plateau_interval_min"
-    (2) variance(y(t),...,y(t+n) < thresh
-
-    :param data:                        The pandas dataframe holding the data-to-be flagged.
-                                        Data must be indexed by a datetime series and be harmonized onto a
-                                        time raster with seconds precision (skips allowed).
-    :param field:                       Fieldname of the Soil moisture measurements field in data.
-    :param flagger:                     A flagger - object. (saqc.flagger.X)
-    :param window:                      Offset String. Only intervals of minimum size "window" have the
-                                        chance to get flagged as constant intervals
-    :param thresh:                      Float. The upper barrier, the variance of an interval mus not exceed, if the
-                                        interval wants to be flagged a plateau.
-    :param max_missing:                 maximum number of nan values tolerated in an interval, for retrieving a valid
-                                        variance from it. (Intervals with a number of nans exceeding "max_missing"
-                                        have no chance to get flagged a plateau!)
-    :param max_consec_missing:          Maximum number of consecutive nan values allowed in an interval to retrieve a
-                                        valid  variance from it. (Intervals with a number of nans exceeding
-                                        "max_missing" have no chance to get flagged a plateau!)
+    (1) n > `window`
+    (2) variance(y(t),...,y(t+n) < `thresh`
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    window : str
+        Only intervals of minimum size "window" have the chance to get flagged as constant intervals
+    thresh : float
+        The upper bound, the variance of an interval must not exceed, if the interval wants to be flagged a plateau.
+    max_missing : {None, int}, default None
+        Maximum number of nan values tolerated in an interval, for retrieving a valid
+        variance from it. (Intervals with a number of nans exceeding "max_missing"
+        have no chance to get flagged a plateau!)
+    max_consec_missing : {None, int}, default None
+        Maximum number of consecutive nan values allowed in an interval to retrieve a
+        valid  variance from it. (Intervals with a number of nans exceeding
+        "max_consec_missing" have no chance to get flagged a plateau!)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
     """
 
     dataseries, data_rate = retrieveTrustworthyOriginal(data, field, flagger)
diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py
index 0000ac6c001f5d9ae0c31f36d326b7a181feabbd..c93731500a732099cfaf71a4b37d6e753ab50644 100644
--- a/saqc/funcs/functions.py
+++ b/saqc/funcs/functions.py
@@ -1,91 +1,408 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+from functools import partial
+from inspect import signature
+
+import dios
 import numpy as np
 import pandas as pd
+import scipy
+import itertools
+import collections
+import numba
+from mlxtend.evaluate import permutation_test
+from scipy import stats
+from scipy.cluster.hierarchy import linkage, fcluster
+
+
+from saqc.lib.tools import groupConsecutives, detectDeviants
+from saqc.lib.tools import groupConsecutives, seasonalMask
+from saqc.funcs.proc_functions import proc_fork, proc_drop, proc_projectFlags
+from saqc.funcs.modelling import modelling_mask
 
-from saqc.lib.tools import sesonalMask, flagWindow, groupConsecutives
+from saqc.core.register import register
+from saqc.core.visitor import ENVIRONMENT
+from dios import DictOfSeries
+from typing import Any
 
-from saqc.funcs.register import register
 
+def _dslIsFlagged(flagger, var, flag=None, comparator=">="):
+    """
+    helper function for `flagGeneric`
+    """
+    return flagger.isFlagged(var.name, flag=flag, comparator=comparator)
 
-@register()
-def procGeneric(data, field, flagger, func, **kwargs):
+
+def _execGeneric(flagger, data, func, field, nodata):
     # TODO:
-    # - add new fields to te flagger
-    data[field] = func.squeeze()
+    # - check series.index compatibility
+    # - field is only needed to translate 'this' parameters
+    #    -> maybe we could do the translation on the tree instead
+
+    sig = signature(func)
+    args = []
+    for k, v in sig.parameters.items():
+        k = field if k == "this" else k
+        if k not in data:
+            raise NameError(f"variable '{k}' not found")
+        args.append(data[k])
+
+    globs = {
+        "isflagged": partial(_dslIsFlagged, flagger),
+        "ismissing": lambda var: ((var == nodata) | pd.isnull(var)),
+        "mask": lambda cond: data[cond.name].mask(cond),
+        "this": field,
+        "NODATA": nodata,
+        "GOOD": flagger.GOOD,
+        "BAD": flagger.BAD,
+        "UNFLAGGED": flagger.UNFLAGGED,
+        **ENVIRONMENT,
+    }
+    func.__globals__.update(globs)
+    return func(*args)
+
+
+@register(masking='all')
+def procGeneric(data, field, flagger, func, nodata=np.nan, **kwargs):
+    """
+    generate/process data with generically defined functions.
+
+    The functions can depend on on any of the fields present in data.
+
+    Formally, what the function does, is the following:
+
+    1.  Let F be a Callable, depending on fields f_1, f_2,...f_K, (F = F(f_1, f_2,...f_K))
+        Than, for every timestamp t_i that occurs in at least one of the timeseries data[f_j] (outer join),
+        The value v_i is computed via:
+        v_i = data([f_1][t_i], data[f_2][t_i], ..., data[f_K][t_i]), if all data[f_j][t_i] do exist
+        v_i = `nodata`, if at least one of the data[f_j][t_i] is missing.
+    2.  The result is stored to data[field] (gets generated if not present)
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, where you want the result from the generic expressions processing to be written to.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    func : Callable
+        The data processing function with parameter names that will be
+        interpreted as data column entries.
+        See the examples section to learn more.
+    nodata : any, default np.nan
+        The value that indicates missing/invalid data
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        The shape of the data may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        The flags shape may have changed relatively to the input flagger.
+
+    Examples
+    --------
+    Some examples on what to pass to the func parameter:
+    To compute the sum of the variables "temperature" and "uncertainty", you would pass the function:
+
+    >>> lambda temperature, uncertainty: temperature + uncertainty
+
+    You also can pass numpy and pandas functions:
+
+    >>> lambda temperature, uncertainty: np.round(temperature) * np.sqrt(uncertainty)
+
+    """
+    data[field] = _execGeneric(flagger, data, func, field, nodata).squeeze()
+    # NOTE:
+    # The flags to `field` will be (re-)set to UNFLAGGED
+    # That leads to the following problem:
+    # flagger.merge merges the given flaggers, if
+    # `field` did already exist before the call to `procGeneric`
+    # but with a differing index, we end up with:
+    # len(data[field]) != len(flagger.getFlags(field))
+    # see: test/funcs/test_generic_functions.py::test_procGenericMultiple
+
+    # TODO:
+    # We need a way to simply overwrite a given flagger column, maybe
+    # an optional keyword to merge ?
+    flagger = flagger.merge(flagger.initFlags(data[field]))
     return data, flagger
 
 
-@register()
-def flagGeneric(data, field, flagger, func, **kwargs):
+@register(masking='all')
+def flagGeneric(data, field, flagger, func, nodata=np.nan, **kwargs):
+    """
+    a function to flag a data column by evaluation of a generic expression.
+
+    The expression can depend on any of the fields present in data.
+
+    Formally, what the function does, is the following:
+
+    Let X be an expression, depending on fields f_1, f_2,...f_K, (X = X(f_1, f_2,...f_K))
+    Than for every timestamp t_i in data[field]:
+    data[field][t_i] is flagged if X(data[f_1][t_i], data[f_2][t_i], ..., data[f_K][t_i]) is True.
+
+    Note, that all value series included in the expression to evaluate must be labeled identically to field.
+
+    Note, that the expression is passed in the form of a Callable and that this callables variable names are
+    interpreted as actual names in the data header. See the examples section to get an idea.
+
+    Note, that all the numpy functions are available within the generic expressions.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, where you want the result from the generic expressions evaluation to be projected
+        to.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    func : Callable
+        The expression that is to be evaluated is passed in form of a callable, with parameter names that will be
+        interpreted as data column entries. The Callable must return an boolen array like.
+        See the examples section to learn more.
+    nodata : any, default np.nan
+        The value that indicates missing/invalid data
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    Examples
+    --------
+    Some examples on what to pass to the func parameter:
+    To flag the variable `field`, if the sum of the variables
+    "temperature" and "uncertainty" is below zero, you would pass the function:
+
+    >>> lambda temperature, uncertainty: temperature + uncertainty < 0
+
+    There is the reserved name 'This', that always refers to `field`. So, to flag field if field is negative, you can
+    also pass:
+
+    >>> lambda this: this < 0
+
+    If you want to make dependent the flagging from flags already present in the data, you can use the built-in
+    ``isflagged`` method. For example, to flag the 'temperature', if 'level' is flagged, you would use:
+
+    >>> lambda level: isflagged(level)
+
+    You can furthermore specify a flagging level, you want to compare the flags to. For example, for flagging
+    'temperature', if 'level' is flagged at a level named 'doubtfull' or worse, use:
+
+    >>> lambda level: isflagged(level, flag='doubtfull', comparator='<=')
+
+    If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
+    and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
+
+    >>> lambda level: isflagged(level, flag=UNFLAGGED, comparator='==')
+
+    Your expression also is allowed to include pandas and numpy functions
+
+    >>> lambda level: np.sqrt(level) > 7
+    """
     # NOTE:
     # The naming of the func parameter is pretty confusing
     # as it actually holds the result of a generic expression
-    mask = func.squeeze()
+    mask = _execGeneric(flagger, data, func, field, nodata).squeeze()
     if np.isscalar(mask):
         raise TypeError(f"generic expression does not return an array")
     if not np.issubdtype(mask.dtype, np.bool_):
         raise TypeError(f"generic expression does not return a boolean array")
-    flagger = flagger.setFlags(field, mask, **kwargs)
+
+    if field not in flagger.getFlags():
+        flagger = flagger.merge(flagger.initFlags(data=pd.Series(index=mask.index, name=field)))
+
+    # if flagger.getFlags(field).empty:
+    #     flagger = flagger.merge(
+    #         flagger.initFlags(
+    #             data=pd.Series(name=field, index=mask.index, dtype=np.float64)))
+    flagger = flagger.setFlags(field=field, loc=mask, **kwargs)
     return data, flagger
 
 
-@register()
-def flagRange(data, field, flagger, min, max, **kwargs):
+@register(masking='field')
+def flagRange(data, field, flagger, min=-np.inf, max=np.inf, **kwargs):
+    """
+    Function flags values not covered by the closed interval [`min`, `max`].
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    min : float
+        Lower bound for valid data.
+    max : float
+        Upper bound for valid data.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
+    # using .values is very much faster
     datacol = data[field].values
     mask = (datacol < min) | (datacol > max)
     flagger = flagger.setFlags(field, mask, **kwargs)
     return data, flagger
 
 
-@register()
+
+@register(masking='field')
 def flagMissing(data, field, flagger, nodata=np.nan, **kwargs):
+    """
+    The function flags all values indicating missing data.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    nodata : any, default np.nan
+        A value that defines missing data.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
     datacol = data[field]
     if np.isnan(nodata):
         mask = datacol.isna()
     else:
-        mask = datacol[datacol == nodata]
+        mask = datacol == nodata
 
     flagger = flagger.setFlags(field, loc=mask, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def flagSesonalRange(
-    data, field, flagger, min, max, startmonth=1, endmonth=12, startday=1, endday=31, **kwargs,
+        data, field, flagger, min, max, startmonth=1, endmonth=12, startday=1, endday=31, **kwargs,
 ):
-    smask = sesonalMask(data.index, startmonth, startday, endmonth, endday)
+    """
+    Function applies a range check onto data chunks (seasons).
 
-    d = data.loc[smask, [field]]
-    if d.empty:
-        return data, flagger
+    The data chunks to be tested are defined by annual seasons that range from a starting date,
+    to an ending date, wheras the dates are defined by month and day number.
 
-    _, flagger_range = flagRange(d, field, flagger.getFlagger(loc=d.index), min=min, max=max, **kwargs)
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    min : float
+        Lower bound for valid data.
+    max : float
+        Upper bound for valid data.
+    startmonth : int
+        Starting month of the season to flag.
+    endmonth : int
+        Ending month of the season to flag.
+    startday : int
+        Starting day of the season to flag.
+    endday : int
+        Ending day of the season to flag
 
-    if not flagger_range.isFlagged(field).any():
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+    if data[field].empty:
         return data, flagger
 
-    flagger = flagger.setFlagger(flagger_range)
+    newfield = f"{field}_masked"
+    start = f"{startmonth:02}-{startday:02}T00:00:00"
+    end = f"{endmonth:02}-{endday:02}T00:00:00"
+
+    data, flagger = proc_fork(data, field, flagger, suffix="_masked")
+    data, flagger = modelling_mask(data, newfield, flagger, mode='seasonal', season_start=start, season_end=end,
+                                   include_bounds=True)
+    data, flagger = flagRange(data, newfield, flagger, min=min, max=max, **kwargs)
+    data, flagger = proc_projectFlags(data, field, flagger, method='match', source=newfield)
+    data, flagger = proc_drop(data, newfield, flagger)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def clearFlags(data, field, flagger, **kwargs):
     flagger = flagger.clearFlags(field, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def forceFlags(data, field, flagger, flag, **kwargs):
-    flagger = flagger.clearFlags(field).setFlags(field, flag=flag, **kwargs)
+    flagger = flagger.clearFlags(field).setFlags(field, flag=flag, inplace=True, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def flagIsolated(
-    data, field, flagger, gap_window, group_window, **kwargs,
+        data, field, flagger, gap_window, group_window, **kwargs,
 ):
+    """
+    The function flags arbitrary large groups of values, if they are surrounded by sufficiently
+    large data gaps. A gap is defined as group of missing and/or flagged values.
+
+    A series of values x_k,x_(k+1),...,x_(k+n), with associated timestamps t_k,t_(k+1),...,t_(k+n),
+    is considered to be isolated, if:
+
+    1. t_(k+1) - t_n < `group_window`
+    2. None of the x_j with 0 < t_k - t_j < `gap_window`, is valid or unflagged (preceeding gap).
+    3. None of the x_j with 0 < t_j - t_(k+n) < `gap_window`, is valid or unflagged (succeding gap).
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    gap_window :
+        The minimum size of the gap before and after a group of valid values, making this group considered an
+        isolated group. See condition (2) and (3)
+    group_window :
+        The maximum temporal extension allowed for a group that is isolated by gaps of size 'gap_window',
+        to be actually flagged as isolated group. See condition (1).
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
 
     gap_window = pd.tseries.frequencies.to_offset(gap_window)
     group_window = pd.tseries.frequencies.to_offset(group_window)
@@ -99,9 +416,9 @@ def flagIsolated(
             start = srs.index[0]
             stop = srs.index[-1]
             if stop - start <= group_window:
-                left = mask[start - gap_window : start].iloc[:-1]
+                left = mask[start - gap_window: start].iloc[:-1]
                 if left.all():
-                    right = mask[stop : stop + gap_window].iloc[1:]
+                    right = mask[stop: stop + gap_window].iloc[1:]
                     if right.all():
                         flags[start:stop] = True
 
@@ -110,6 +427,579 @@ def flagIsolated(
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def flagDummy(data, field, flagger, **kwargs):
+    """
+    Function does nothing but returning data and flagger.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+    """
+    return data, flagger
+
+
+@register(masking='field')
+def flagForceFail(data, field, flagger, **kwargs):
+    """
+    Function raises a runtime error.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+
+    """
+    raise RuntimeError("Works as expected :D")
+
+
+@register(masking='field')
+def flagUnflagged(data, field, flagger, **kwargs):
+    """
+    Function sets the flagger.GOOD flag to all values flagged better then flagger.GOOD.
+    If there is an entry 'flag' in the kwargs dictionary passed, the
+    function sets the kwargs['flag'] flag to all values flagged better kwargs['flag']
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    kwargs : Dict
+        If kwargs contains 'flag' entry, kwargs['flag] is set, if no entry 'flag' is present,
+        'flagger.UNFLAGGED' is set.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+    """
+
+    flag = kwargs.pop('flag', flagger.GOOD)
+    flagger = flagger.setFlags(field, flag=flag, **kwargs)
+    return data, flagger
+
+
+@register(masking='field')
+def flagGood(data, field, flagger, **kwargs):
+    """
+    Function sets the flagger.GOOD flag to all values flagged better then flagger.GOOD.
+
+    Parameters
+    ----------
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+
+    """
+    kwargs.pop('flag', None)
+    return flagUnflagged(data, field, flagger, **kwargs)
+
+
+@register(masking='field')
+def flagManual(data, field, flagger, mdata, mflag: Any = 1, method="plain", **kwargs):
+    """
+    Flag data by given, "manually generated" data.
+
+    The data is flagged at locations where `mdata` is equal to a provided flag (`mflag`).
+    The format of mdata can be an indexed object, like pd.Series, pd.Dataframe or dios.DictOfSeries,
+    but also can be a plain list- or array-like.
+    How indexed mdata is aligned to data is specified via the `method` parameter.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    mdata : {pd.Series, pd.Dataframe, DictOfSeries, str}
+        The "manually generated" data
+    mflag : scalar
+        The flag that indicates data points in `mdata`, of wich the projection in data should be flagged.
+    method : {'plain', 'ontime', 'left-open', 'right-open'}, default plain
+        Defines how mdata is projected on data. Except for the 'plain' method, the methods assume mdata to have an
+        index.
+
+        * 'plain': mdata must have the same length as data and is projected one-to-one on data.
+        * 'ontime': works only with indexed mdata. mdata entries are matched with data entries that have the same index.
+        * 'right-open': mdata defines intervals, values are to be projected on.
+          The intervals are defined by any two consecutive timestamps t_1 and 1_2 in mdata.
+          the value at t_1 gets projected onto all data timestamps t with t_1 <= t < t_2.
+        * 'left-open': like 'right-open', but the projected interval now covers all t with t_1 < t <= t_2.
+
+    Returns
+    -------
+    data, flagger: original data, modified flagger
+    
+    Examples
+    --------
+    An example for mdata
+    >>> mdata = pd.Series([1,0,1], index=pd.to_datetime(['2000-02', '2000-03', '2001-05']))
+    >>> mdata
+    2000-02-01    1
+    2000-03-01    0
+    2001-05-01    1
+    dtype: int64
+
+    On *dayly* data, with the 'ontime' method, only the provided timestamnps are used.
+    Bear in mind that only exact timestamps apply, any offset will result in ignoring
+    the timestamp.
+    >>> _, fl = flagManual(data, field, flagger, mdata, mflag=1, method='ontime')
+    >>> fl.isFlagged(field)
+    2000-01-31    False
+    2000-02-01    True
+    2000-02-02    False
+    2000-02-03    False
+    ..            ..
+    2000-02-29    False
+    2000-03-01    True
+    2000-03-02    False
+    Freq: D, dtype: bool
+
+    With the 'right-open' method, the mdata is forward fill:
+    >>> _, fl = flagManual(data, field, flagger, mdata, mflag=1, method='right-open')
+    >>> fl.isFlagged(field)
+    2000-01-31    False
+    2000-02-01    True
+    2000-02-02    True
+    ..            ..
+    2000-02-29    True
+    2000-03-01    False
+    2000-03-02    False
+    Freq: D, dtype: bool
+
+    With the 'left-open' method, backward filling is used:
+    >>> _, fl = flagManual(data, field, flagger, mdata, mflag=1, method='left-open')
+    >>> fl.isFlagged(field)
+    2000-01-31    False
+    2000-02-01    False
+    2000-02-02    True
+    ..            ..
+    2000-02-29    True
+    2000-03-01    True
+    2000-03-02    False
+    Freq: D, dtype: bool
+    """
+    dat = data[field]
+    if isinstance(mdata, str):
+        # todo import path type in mdata, use
+        #  s = pd.read_csv(mdata, index_col=N, usecol=[N,N,..]) <- use positional
+        #  use a list-arg in config to get the columns
+        #  at last, fall throug to next checks
+        raise NotImplementedError("giving a path is currently not supported")
+
+    if isinstance(mdata, (pd.DataFrame, DictOfSeries)):
+        mdata = mdata[field]
+
+    hasindex = isinstance(mdata, (pd.Series, pd.DataFrame, DictOfSeries))
+    if not hasindex and method != "plain":
+        raise ValueError("mdata has no index")
+
+    if method == "plain":
+        if hasindex:
+            mdata = mdata.to_numpy()
+        if len(mdata) != len(dat):
+            raise ValueError("mdata must have same length then data")
+        mdata = pd.Series(mdata, index=dat.index)
+    elif method == "ontime":
+        pass  # reindex will do the job later
+    elif method in ["left-open", "right-open"]:
+        mdata = mdata.reindex(dat.index.union(mdata.index))
+
+        # -->)[t0-->)[t1--> (ffill)
+        if method == "right-open":
+            mdata = mdata.ffill()
+
+        # <--t0](<--t1](<-- (bfill)
+        if method == "left-open":
+            mdata = mdata.bfill()
+    else:
+        raise ValueError(method)
+
+    mask = mdata == mflag
+    mask = mask.reindex(dat.index).fillna(False)
+    flagger = flagger.setFlags(field=field, loc=mask, **kwargs)
+    return data, flagger
+
+
+@register(masking='all')
+def flagCrossScoring(data, field, flagger, fields, thresh, cross_stat='modZscore', **kwargs):
+    """
+    Function checks for outliers relatively to the "horizontal" input data axis.
+
+    For `fields` :math:`=[f_1,f_2,...,f_N]` and timestamps :math:`[t_1,t_2,...,t_K]`, the following steps are taken
+    for outlier detection:
+
+    1. All timestamps :math:`t_i`, where there is one :math:`f_k`, with :math:`data[f_K]` having no entry at
+       :math:`t_i`, are excluded from the following process (inner join of the :math:`f_i` fields.)
+    2. for every :math:`0 <= i <= K`, the value
+       :math:`m_j = median(\\{data[f_1][t_i], data[f_2][t_i], ..., data[f_N][t_i]\\})` is calculated
+    2. for every :math:`0 <= i <= K`, the set
+       :math:`\\{data[f_1][t_i] - m_j, data[f_2][t_i] - m_j, ..., data[f_N][t_i] - m_j\\}` is tested for outliers with the
+       specified method (`cross_stat` parameter).
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        A dummy parameter.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    fields : str
+        List of fieldnames in data, determining wich variables are to be included into the flagging process.
+    thresh : float
+        Threshold which the outlier score of an value must exceed, for being flagged an outlier.
+    cross_stat : {'modZscore', 'Zscore'}, default 'modZscore'
+        Method used for calculating the outlier scores.
+
+        * ``'modZscore'``: Median based "sigma"-ish approach. See Referenecs [1].
+        * ``'Zscore'``: Score values by how many times the standard deviation they differ from the median.
+          See References [1]
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the input flagger.
+
+    References
+    ----------
+    [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
+    """
+
+    df = data[fields].loc[data[fields].index_of('shared')].to_df()
+
+    if isinstance(cross_stat, str):
+        if cross_stat == 'modZscore':
+            MAD_series = df.subtract(df.median(axis=1), axis=0).abs().median(axis=1)
+            diff_scores = ((0.6745 * (df.subtract(df.median(axis=1), axis=0))).divide(MAD_series, axis=0)).abs()
+        elif cross_stat == 'Zscore':
+            diff_scores = (df.subtract(df.mean(axis=1), axis=0)).divide(df.std(axis=1), axis=0).abs()
+        else:
+            raise ValueError(cross_stat)
+    else:
+        try:
+            stat = getattr(df, cross_stat.__name__)(axis=1)
+        except AttributeError:
+            stat = df.aggregate(cross_stat, axis=1)
+        diff_scores = df.subtract(stat, axis=0).abs()
+
+    mask = diff_scores > thresh
+    for var in fields:
+        flagger = flagger.setFlags(var, mask[var], **kwargs)
+
+    return data, flagger
+
+@register(masking='all')
+def flagDriftFromNorm(data, field, flagger, fields, segment_freq, norm_spread, norm_frac=0.5,
+                      metric=lambda x, y: scipy.spatial.distance.pdist(np.array([x, y]),
+                                                                       metric='cityblock') / len(x),
+                      linkage_method='single', **kwargs):
+    """
+    The function flags value courses that significantly deviate from a group of normal value courses.
+
+    "Normality" is determined in terms of a maximum spreading distance, that members of a normal group must not exceed.
+    In addition, only a group is considered "normal" if it contains more then `norm_frac` percent of the
+    variables in "fields".
+
+    See the Notes section for a more detailed presentation of the algorithm
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        A dummy parameter.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    fields : str
+        List of fieldnames in data, determining which variables are to be included into the flagging process.
+    segment_freq : str
+        An offset string, determining the size of the seperate datachunks that the algorihm is to be piecewise
+        applied on.
+    norm_spread : float
+        A parameter limiting the maximum "spread" of the timeseries, allowed in the "normal" group. See Notes section
+        for more details.
+    norm_frac : float, default 0.5
+        Has to be in [0,1]. Determines the minimum percentage of variables, the "normal" group has to comprise to be the
+        normal group actually. The higher that value, the more stable the algorithm will be with respect to false
+        positives. Also, nobody knows what happens, if this value is below 0.5.
+    metric : Callable[(numpyp.array, numpy-array), float]
+        A distance function. It should be a function of 2 1-dimensional arrays and return a float scalar value.
+        This value is interpreted as the distance of the two input arrays. The default is the averaged manhatten metric.
+        See the Notes section to get an idea of why this could be a good choice.
+    linkage_method : {"single", "complete", "average", "weighted", "centroid", "median", "ward"}, default "single"
+        The linkage method used for hierarchical (agglomerative) clustering of the timeseries.
+        See the Notes section for more details.
+        The keyword gets passed on to scipy.hierarchy.linkage. See its documentation to learn more about the different
+        keywords (References [1]).
+        See wikipedia for an introduction to hierarchical clustering (References [2]).
+    kwargs
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the input flagger.
+
+    Notes
+    -----
+    following steps are performed for every data "segment" of length `segment_freq` in order to find the
+    "abnormal" data:
+
+    1. Calculate the distances :math:`d(x_i,x_j)` for all :math:`x_i` in parameter `fields`. (with :math:`d`
+       denoting the distance function
+       passed to the parameter `metric`.
+    2. Calculate a dendogram with a hierarchical linkage algorithm, specified by the parameter `linkage_method`.
+    3. Flatten the dendogram at the level, the agglomeration costs exceed the value given by the parameter `norm_spread`
+    4. check if there is a cluster containing more than `norm_frac` percentage of the variables in fields.
+
+        1. if yes: flag all the variables that are not in that cluster (inside the segment)
+        2. if no: flag nothing
+
+    The main parameter giving control over the algorithms behavior is the `norm_spread` parameter, that determines
+    the maximum spread of a normal group by limiting the costs, a cluster agglomeration must not exceed in every
+    linkage step.
+    For singleton clusters, that costs just equal half the distance, the timeseries in the clusters, have to
+    each other. So, no timeseries can be clustered together, that are more then
+    2*`norm_spread` distanted from each other.
+    When timeseries get clustered together, this new clusters distance to all the other timeseries/clusters is
+    calculated according to the linkage method specified by `linkage_method`. By default, it is the minimum distance,
+    the members of the clusters have to each other.
+    Having that in mind, it is advisable to choose a distance function, that can be well interpreted in the units
+    dimension of the measurement and where the interpretation is invariant over the length of the timeseries.
+    That is, why, the "averaged manhatten metric" is set as the metric default, since it corresponds to the
+    averaged value distance, two timeseries have (as opposed by euclidean, for example).
+
+    References
+    ----------
+    Documentation of the underlying hierarchical clustering algorithm:
+        [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
+    Introduction to Hierarchical clustering:
+        [2] https://en.wikipedia.org/wiki/Hierarchical_clustering
+    """
+
+    data_to_flag = data[fields].to_df()
+    data_to_flag.dropna(inplace=True)
+    segments = data_to_flag.groupby(pd.Grouper(freq=segment_freq))
+    for segment in segments:
+        if segment[1].shape[0] <= 1:
+            continue
+        drifters = detectDeviants(segment[1], metric, norm_spread, norm_frac, linkage_method, 'variables')
+
+        for var in drifters:
+            flagger = flagger.setFlags(fields[var], loc=segment[1].index, **kwargs)
+
+    return data, flagger
+
+@register(masking='all')
+def flagDriftFromReference(data, field, flagger, fields, segment_freq, thresh,
+                      metric=lambda x, y: scipy.spatial.distance.pdist(np.array([x, y]),
+                                                                    metric='cityblock')/len(x),
+                       **kwargs):
+    """
+    The function flags value courses that deviate from a reference course by a margin exceeding a certain threshold.
+
+    The deviation is measured by the distance function passed to parameter metric.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The reference variable, the deviation from wich determines the flagging.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    fields : str
+        List of fieldnames in data, determining wich variables are to be included into the flagging process.
+    segment_freq : str
+        An offset string, determining the size of the seperate datachunks that the algorihm is to be piecewise
+        applied on.
+    thresh : float
+        The threshod by wich normal variables can deviate from the reference variable at max.
+    metric : Callable[(numpyp.array, numpy-array), float]
+        A distance function. It should be a function of 2 1-dimensional arrays and return a float scalar value.
+        This value is interpreted as the distance of the two input arrays. The default is the averaged manhatten metric.
+        See the Notes section to get an idea of why this could be a good choice.
+    kwargs
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the input flagger.
+
+    Notes
+    -----
+    it is advisable to choose a distance function, that can be well interpreted in the units
+    dimension of the measurement and where the interpretation is invariant over the length of the timeseries.
+    That is, why, the "averaged manhatten metric" is set as the metric default, since it corresponds to the
+    averaged value distance, two timeseries have (as opposed by euclidean, for example).
+    """
+
+    data_to_flag = data[fields].to_df()
+    data_to_flag.dropna(inplace=True)
+    if field not in fields:
+        fields.append(field)
+    var_num = len(fields)
+    segments = data_to_flag.groupby(pd.Grouper(freq=segment_freq))
+
+    for segment in segments:
+
+        if segment[1].shape[0] <= 1:
+            continue
+        for i in range(var_num):
+            dist = metric(segment[1].iloc[:, i].values, segment[1].loc[:, field].values)
+            if dist > thresh:
+                flagger = flagger.setFlags(fields[i], loc=segment[1].index, **kwargs)
+
+    return data, flagger
+
+
+@register(masking='all')
+def flagDriftScale(data, field, flagger, fields_scale1, fields_scale2, segment_freq, norm_spread, norm_frac=0.5,
+                      metric=lambda x, y: scipy.spatial.distance.pdist(np.array([x, y]),
+                                                                                    metric='cityblock')/len(x),
+                      linkage_method='single', **kwargs):
+
+
+    """
+    The function linearly rescales one set of variables to another set of variables with a different scale and then
+    flags value courses that significantly deviate from a group of normal value courses.
+
+    The two sets of variables can be linearly scaled one to another and hence the scaling transformation is performed
+    via linear regression: A linear regression is performed on each pair of variables giving a slope and an intercept.
+    The transformation is then calculated a the median of all the calculated slopes and intercepts.
+
+    Once the transformation is performed, the function flags those values, that deviate from a group of normal values.
+    "Normality" is determined in terms of a maximum spreading distance, that members of a normal group must not exceed.
+    In addition, only a group is considered "normal" if it contains more then `norm_frac` percent of the
+    variables in "fields".
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        A dummy parameter.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional informations related to `data`.
+    fields_scale1 : str
+        List of fieldnames in data to be included into the flagging process which are scaled according to scaling
+        scheme 1.
+    fields_scale2 : str
+        List of fieldnames in data to be included into the flagging process which are scaled according to scaling
+        scheme 2.
+    segment_freq : str
+        An offset string, determining the size of the seperate datachunks that the algorihm is to be piecewise
+        applied on.
+    norm_spread : float
+        A parameter limiting the maximum "spread" of the timeseries, allowed in the "normal" group. See Notes section
+        for more details.
+    norm_frac : float, default 0.5
+        Has to be in [0,1]. Determines the minimum percentage of variables, the "normal" group has to comprise to be the
+        normal group actually. The higher that value, the more stable the algorithm will be with respect to false
+        positives. Also, nobody knows what happens, if this value is below 0.5.
+    metric : Callable[(numpyp.array, numpy-array), float]
+        A distance function. It should be a function of 2 1-dimensional arrays and return a float scalar value.
+        This value is interpreted as the distance of the two input arrays. The default is the averaged manhatten metric.
+        See the Notes section to get an idea of why this could be a good choice.
+    linkage_method : {"single", "complete", "average", "weighted", "centroid", "median", "ward"}, default "single"
+        The linkage method used for hierarchical (agglomerative) clustering of the timeseries.
+        See the Notes section for more details.
+        The keyword gets passed on to scipy.hierarchy.linkage. See its documentation to learn more about the different
+        keywords (References [1]).
+        See wikipedia for an introduction to hierarchical clustering (References [2]).
+    kwargs
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the input flagger.
+
+    References
+    ----------
+    Documentation of the underlying hierarchical clustering algorithm:
+        [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
+    Introduction to Hierarchical clustering:
+        [2] https://en.wikipedia.org/wiki/Hierarchical_clustering
+    """
+
+    fields = fields_scale1 + fields_scale2
+    data_to_flag = data[fields].to_df()
+    data_to_flag.dropna(inplace=True)
+
+    convert_slope = []
+    convert_intercept = []
+
+    for field1 in fields_scale1:
+        for field2 in fields_scale2:
+            slope, intercept, r_value, p_value, std_err = stats.linregress(data_to_flag[field1], data_to_flag[field2])
+            convert_slope.append(slope)
+            convert_intercept.append(intercept)
+
+    factor_slope = np.median(convert_slope)
+    factor_intercept = np.median(convert_intercept)
+
+    dat = dios.DictOfSeries()
+    for field1 in fields_scale1:
+        dat[field1] = factor_intercept + factor_slope * data_to_flag[field1]
+    for field2 in fields_scale2:
+        dat[field2] = data_to_flag[field2]
+
+    dat_to_flag = dat[fields].to_df()
+
+    segments = dat_to_flag.groupby(pd.Grouper(freq=segment_freq))
+    for segment in segments:
+        if segment[1].shape[0] <= 1:
+            continue
+        drifters = detectDeviants(segment[1], metric, norm_spread, norm_frac, linkage_method, 'variables')
+        for var in drifters:
+            flagger = flagger.setFlags(fields[var], loc=segment[1].index, **kwargs)
+
     return data, flagger
diff --git a/saqc/funcs/harm_functions.py b/saqc/funcs/harm_functions.py
index bdd7909a7a1e92675d81eea50f31366cf88e4eb5..49762412c2e0473d0604b59209739cd7641f0ce5 100644
--- a/saqc/funcs/harm_functions.py
+++ b/saqc/funcs/harm_functions.py
@@ -1,904 +1,351 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import pandas as pd
+
 import numpy as np
 import logging
-
-from saqc.funcs.functions import flagMissing
-from saqc.funcs.register import register
-from saqc.lib.tools import toSequence, getFuncFromInput
-
+from saqc.core.register import register
+from saqc.funcs.proc_functions import (
+    proc_interpolateGrid,
+    proc_shift,
+    proc_fork,
+    proc_resample,
+    proc_projectFlags,
+    proc_drop,
+    proc_rename,
+    ORIGINAL_SUFFIX,
+)
 
 logger = logging.getLogger("SaQC")
 
 
-class Heap:
-    INDEX = "initial_ts"
-    DATA = "original_data"
-    FLAGGER = "original_flagger"
-    FREQ = "freq"
-    METHOD = "reshape_method"
-    DROP = "drop_flags"
-
-
-HARM_2_DEHARM = {
-    "fshift": "invert_fshift",
-    "bshift": "invert_bshift",
-    "nshift": "invert_nearest",
-    "fagg": "invert_fshift",
-    "bagg": "invert_bshift",
-    "nagg": "invert_nearest",
-    "fagg_no_deharm": "regain",
-    "bagg_no_deharm": "regain",
-    "nagg_no_deharm": "regain",
-}
-
-
-def harmWrapper(heap={}):
-    # NOTE:
-    # (1) - harmonization will ALWAYS flag flagger.BAD all the np.nan values and afterwards DROP ALL
-    #       flagger.BAD flagged values from flags frame for further flagging!!!!!!!!!!!!!!!!!!!!!
-    def harmonize(
-        data,
-        field,
-        flagger,
-        freq,
-        inter_method,
-        reshape_method,
-        inter_agg="mean",
-        inter_order=1,
-        inter_downcast=False,
-        reshape_agg="max",
-        reshape_missing_flag=None,
-        reshape_shift_comment=False,
-        drop_flags=None,
-        data_missing_value=np.nan,
-        **kwargs,
-    ):
-
-        # get funcs from strings:
-        inter_agg = getFuncFromInput(inter_agg)
-        reshape_agg = getFuncFromInput(reshape_agg)
-
-        # for some tingle tangle reasons, resolving the harmonization will not be sound, if not all missing/np.nan
-        # values get flagged initially:
-        data, flagger = flagMissing(data, field, flagger, nodata=data_missing_value, **kwargs)
-        # and dropped for harmonization:
-        if drop_flags is not None:
-            if flagger.BAD not in drop_flags:
-                drop_flags.append(flagger.BAD)
-
-        # before sending the current flags and data frame to the future (for backtracking reasons), we clear it
-        # from merge-nans that just resulted from harmonization of other variables!
-        dat_col, flagger_merged = _fromMerged(data, flagger, field)
-
-        # now we send the flags frame in its current shape to the future:
-        heap[field] = {
-            Heap.DATA: dat_col,
-            Heap.FLAGGER: flagger_merged,
-            Heap.FREQ: freq,
-            Heap.METHOD: reshape_method,
-            Heap.DROP: drop_flags,
-        }
-
-        # furthermore we need to memorize the initial timestamp to ensure output format will equal input format.
-        if Heap.INDEX not in heap.keys():
-            heap.update({Heap.INDEX: dat_col.index})
-
-        # now we can manipulate it without loosing information gathered before harmonization
-        dat_col, flagger_merged_clean = _outsortCrap(dat_col, field, flagger_merged, drop_flags=drop_flags,)
-
-        # interpolation! (yeah)
-        dat_col, chunk_bounds = _interpolateGrid(
-            dat_col,
-            freq,
-            method=inter_method,
-            order=inter_order,
-            agg_method=inter_agg,
-            total_range=(heap[Heap.INDEX][0], heap[Heap.INDEX][-1]),
-            downcast_interpolation=inter_downcast,
-        )
-
-        # flags now have to be carefully adjusted according to the changes/shifts we did to data
-        flagger_merged_clean_reshaped = _reshapeFlags(
-            flagger_merged_clean,
-            field,
-            ref_index=dat_col.index,
-            method=reshape_method,
-            agg_method=reshape_agg,
-            missing_flag=reshape_missing_flag,
-            set_shift_comment=reshape_shift_comment,
-            block_flags=chunk_bounds,
-            **kwargs,
-        )
-
-        # finally we happily blow up the data and flags frame again,
-        # to release them on their ongoing journey through saqc.
-        data, flagger_out = _toMerged(
-            data, flagger, field, data_to_insert=dat_col, flagger_to_insert=flagger_merged_clean_reshaped, **kwargs
-        )
-
-        return data, flagger_out
-
-    def deharmonize(data, field, flagger, co_flagging=False, **kwargs):
-
-        # Check if there is backtracking information available for actual harmonization resolving
-        if field not in heap:
-            logger.warning(
-                'No backtracking data for resolving harmonization of "{}". Reverse projection of flags gets'
-                " skipped!".format(field)
-            )
-            return data, flagger
-
-        # get some deharm configuration infos from the heap:
-        harm_info = heap.pop(field)
-        resolve_method = HARM_2_DEHARM[harm_info[Heap.METHOD]]
-
-        # retrieve data and flags from the merged saqc-conform data frame (and by that get rid of blow-up entries).
-        dat_col, flagger_merged = _fromMerged(data, flagger, field)
-
-        # reconstruct the drops that were performed before harmonization
-        drops, flagger_original_clean = _outsortCrap(
-            dat_col, field, harm_info[Heap.FLAGGER], drop_flags=harm_info[Heap.DROP], return_drops=True,
-        )
-
-        # with reconstructed pre-harmonization flags-frame -> perform the projection of the flags calculated for
-        # the harmonized timeseries, onto the original timestamps
-        flagger_back = _backtrackFlags(
-            flagger_merged,
-            flagger_original_clean,
-            harm_info[Heap.FREQ],
-            track_method=resolve_method,
-            co_flagging=co_flagging,
-        )
-        flags_back = flagger_back.getFlags()
-
-        # now: re-insert the pre-harmonization-drops
-        flags_col = flags_back.reindex(flags_back.index.join(drops.index, how="outer"))
-        # due to assignment reluctants with 1-d-dataframes we are squeezing:
-        flags_col = flags_col.squeeze(axis=1)
-        drops = drops.squeeze(axis=1)
-        flags_col.loc[drops.index] = drops
-
-        # but to stick with the policy of always having flags as pd.DataFrames we blow up the flags col again:
-        if isinstance(flags_col, pd.Series):
-            flags_col = flags_col.to_frame()
-        flagger_back_full = flagger.initFlags(flags=flags_col)
-
-        dat_col = harm_info[Heap.DATA].reindex(flags_col.index, fill_value=np.nan)
-        dat_col.name = field
-        # transform the result into the form, data travels through saqc:
-        data, flagger_out = _toMerged(
-            data, flagger, field, dat_col, flagger_back_full, target_index=heap[Heap.INDEX], **kwargs
-        )
-        # clear heap if nessecary:
-        if len(heap) == 1 and Heap.INDEX in heap:
-            del heap[Heap.INDEX]
-
-        # bye bye data
-        return data, flagger_out
-
-    return harmonize, deharmonize
-
-
-harm_harmonize, harm_deharmonize = harmWrapper()
-register()(harm_harmonize)
-register()(harm_deharmonize)
-
-
-# (de-)harmonize helper
-def _outsortCrap(
-    data, field, flagger, drop_flags=None, return_drops=False,
-):
-
-    """Harmonization gets the more easy, the more data points we can exclude from crowded sampling intervals.
-    Depending on passed key word options the function will remove nan entries and as-suspicious-flagged values from
-    the data and the flags passed. In deharmonization the function is used to reconstruct original flags field shape.
-
-    :param data:            pd.Series. ['data'].
-    :param flagger:         saqc.flagger.
-    :param drop_suspicious: Boolean. Default = True. If True, only values that are flagged GOOD or UNFLAGGED get
-                            processed.
-    :param drop_bad:        Boolean. Default = True. If True, BAD-flagged values get dropped from data.
-    :param drop_list:       List or None. Default = None. List of flags that shall be dropped from data. If None is
-                            passed (default), list based data dropping is omitted.
-    :param return_drops:    Boolean. Default = False. If True, return the drops only. If False, return the data and
-                            flags without drops.
-    :return:                If return_drops=False. (default) Returns data, flags tuple with values-to-be-dropped
-                            dropped.
-                            If return_drops=True. Returns the dropped flags.
-    """
-
-    drop_mask = pd.Series(data=False, index=data.index)
-
-    drop_flags = toSequence(drop_flags, default=flagger.BAD)
-    for drop_flag in drop_flags:
-        drop_mask = drop_mask | flagger.isFlagged(field, flag=drop_flag, comparator="==")
-
-    flagger_out = flagger.getFlagger(loc=~drop_mask)
-    if return_drops:
-        return flagger.getFlags(loc=drop_mask), flagger_out
-    return data[~drop_mask], flagger_out
-
-
-def _makeGrid(t0, t1, freq, name=None):
+@register(masking='none')
+def harm_shift2Grid(data, field, flagger, freq, method="nshift", to_drop=None, **kwargs):
     """
-    Returns a frequency grid, covering the date range of 'data'.
-    :param data:    pd.Series. ['data']
-    :param freq:    Offset String. Intended Sampling frequency.
-    :return:        pd.Series. ['data'].
+    A method to "regularize" data by shifting data points forward/backward to a regular timestamp.
+
+    A series of data is considered "regular", if it is sampled regularly (= having uniform sampling rate).
+
+    Method keywords:
+
+    * ``'nshift'``:  every grid point gets assigned the nearest value in its range (*range = +/-(freq/2)*)
+    * ``'bshift'``:  every grid point gets assigned its first succeeding value - if there is one available in the
+            succeeding sampling interval.
+    * ``'fshift'``:  every grid point gets assigned its ultimately preceding value - if there is one available in
+      the preceeding sampling interval.
+
+    Note: the flags associated with every datapoint will just get shifted with them.
+
+    Note: if there is no valid data (existing and not-na) available in a sampling interval assigned to a regular
+    timestamp by the selected method, nan gets assigned to this timestamp. The associated flag will be of value
+    ``flagger.UNFLAGGED``.
+
+    Note: all data nans get excluded defaultly from shifting. If to_drop is None - all *BAD* flagged values get
+    excluded as well.
+
+    Note: the method will likely and significantly alter values and shape of ``data[field]``. The original data is kept
+    in the data dios and assigned to the fieldname ``field + '_original'``.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The field name of the column, holding the data-to-be-regularized.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.freq
+    freq : str
+        The frequency of the grid you want to shift your data to.
+    method : {'nshift', 'bshift', 'fshift'}, default 'nshift'
+        Specifies if datapoints get propagated forwards, backwards or to the nearest grid timestamp.
+        See description above for details
+    to_drop : {List[str], str}, default None
+        Flag types you want to drop before shifting - effectively excluding values that are flagged
+        with a flag in to_drop from the shifting process. Default - results in flagger.BAD
+        values being dropped initially.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
     """
 
-    harm_start = t0.floor(freq=freq)
-    harm_end = t1.ceil(freq=freq)
-    return pd.date_range(start=harm_start, end=harm_end, freq=freq, name=name)
-
-
-def _insertGrid(data, freq):
-    """
-    Depending on the frequency, the data has to be harmonized to, the passed data series gets reindexed with an index,
-    containing the 'original' entries and additionally, if not present, the equidistant entries of the frequency grid.
-    :param data:    pd.Series. ['data']
-    :param freq:    Offset String. Intended Sampling frequency.
-    :return:        pd.Series. ['data'].
-    """
-
-    return data.reindex(
-        data.index.join(_makeGrid(data.index[0], data.index[-1], freq, name=data.index.name), how="outer",)
+    data, flagger = proc_fork(data, field, flagger)
+    data, flagger = proc_shift(
+        data, field, flagger, freq, method, to_drop=to_drop, empty_intervals_flag=flagger.UNFLAGGED, **kwargs
     )
+    return data, flagger
 
 
-def _interpolateGrid(
-    data, freq, method, order=1, agg_method=sum, total_range=None, downcast_interpolation=False,
+@register(masking='none')
+def harm_aggregate2Grid(
+        data, field, flagger, freq, value_func, flag_func=np.nanmax, method="nagg", to_drop=None, **kwargs
 ):
-    """The function calculates grid point values for a passed pd.Series (['data']) by applying
-    the selected interpolation/fill method. (passed to key word 'method'). The interpolation will apply for grid points
-    only, that have preceding (forward-aggregation/forward-shifts) or succeeding (backward-aggregation/backward-shift)
-    values, or both ("real" interpolations, like linear, polynomial, ...-> see documentation below).
-
-    Data must be cleared from nans before entering here.
-
-    Methods:
-    All Methods calculate new values at grid points, if there is no data available for that sampling point.
-
-    1. "real" INTERPOLATIONS
-
-    There are available all the interpolation methods from the pandas.interpolate() method and they are applicable by
-    the very same key words, that you would pass to pd.Series.interpolates's method parameter.
-
-    Be careful with pd.Series.interpolate's 'nearest' and 'pad':
-    To just fill grid points forward/backward or from the nearest point - and
-    assign grid points, that refer to missing data, a nan value, the use of "fshift", "bshift" and "nshift" is
-    recommended, to ensure the result expected. (The methods diverge in some special cases).
-
-    To make an index-aware linear interpolation, "times" has to be passed - NOT 'linear'.
-
-    2. SHIFTS:
-
-    'fshift'        -  every grid point gets assigned its ultimately preceeding value - if there is one available in
-                    the preceeding sampling interval.
-    'bshift'        -  every grid point gets assigned its first succeeding value - if there is one available in the
-                    succeeding sampling interval.
-    'nshift' -  every grid point gets assigned the nearest value in its range ( range = +/-(freq/2) ).
-
-    3. AGGREGATIONS
-
-    'nagg'   - all values in the range (+/- freq/2) of a grid point get aggregated with agg_method and assigned
-                    to it.
-    'bagg'          - all values in a sampling interval get aggregated with agg_method and the result gets assigned to
-                    the last grid point
-    'fagg'          - all values in a sampling interval get aggregated with agg_method and the result gets assigned to
-                    the next grid point
-
-    :param data:        pd.DataFrame. ['data'].
-    :param freq:        Offset String. the grid frequency.
-    :param method:      String. Method you want to interpolate with. See function doc above.
-    :param order:       Integer. Default = 1. If an interpolation method is selected that needs
-                        to know about its "order", this is where you pass it. (For example 'polynomial', 'spline')
-    :param agg_method:  Func. Default = sum. If an aggregation method is selected for grid point filling,
-                        you need to pass the aggregation method to this very parameter. Note that it should be able
-                        to handle empty argument series passed as well as np.nan passed.
-    :param total_range  2-Tuple of pandas Timestamps.
-                        The total range of all the data in the Dataframe that is currently processed. If not
-                        None, the resulting harmonization grid of the current data column will range over the total
-                        Data-range. This ensures not having nan-entries in the flags dataframe after harmonization.
-    :return:            pd.DataFrame. ['data'].
     """
-
-    chunk_bounds = None
-    aggregations = ["nagg", "bagg", "fagg"]
-    shifts = ["fshift", "bshift", "nshift"]
-    interpolations = [
-        "linear",
-        "time",
-        "nearest",
-        "zero",
-        "slinear",
-        "quadratic",
-        "cubic",
-        "spline",
-        "barycentric",
-        "polynomial",
-        "krogh",
-        "piecewise_polynomial",
-        "spline",
-        "pchip",
-        "akima",
-    ]
-    data = data.copy()
-    ref_index = _makeGrid(data.index[0], data.index[-1], freq, name=data.index.name)
-    if total_range is not None:
-        total_index = _makeGrid(total_range[0], total_range[1], freq, name=data.index.name)
-
-    # Aggregations:
-    if method in aggregations:
-        if method == "nagg":
-            # all values within a grid points range (+/- freq/2, closed to the left) get aggregated with 'agg method'
-            # some timestamp acrobatics to feed the base keyword properly
-            seconds_total = pd.Timedelta(freq).total_seconds()
-            seconds_string = str(int(seconds_total)) + "s"
-            # calculate the series of aggregated values
-            data = data.resample(seconds_string, base=seconds_total / 2, loffset=pd.Timedelta(freq) / 2).apply(
-                agg_method
-            )
-
-        elif method == "bagg":
-            # all values in a sampling interval get aggregated with agg_method and assigned to the last grid point
-            data = data.resample(freq).apply(agg_method)
-        # if method is fagg
-        else:
-            # all values in a sampling interval get aggregated with agg_method and assigned to the next grid point
-            data = data.resample(freq, closed="right", label="right").apply(agg_method)
-        # some consistency cleanup:
-        if total_range is None:
-            data = data.reindex(ref_index)
-
-    # Shifts
-    elif method in shifts:
-        if method == "fshift":
-            direction = "ffill"
-            tolerance = pd.Timedelta(freq)
-
-        elif method == "bshift":
-            direction = "bfill"
-            tolerance = pd.Timedelta(freq)
-        # if method = nshift
-        else:
-            direction = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        data = data.reindex(ref_index, method=direction, tolerance=tolerance)
-
-    # Interpolations:
-    elif method in interpolations:
-
-        # account for annoying case of subsequent frequency alligned values, differing exactly by the margin
-        # 2*freq:
-        spec_case_mask = data.asfreq(freq).dropna().index.to_series()
-        spec_case_mask = (spec_case_mask - spec_case_mask.shift(1)) == 2 * pd.Timedelta(freq)
-        spec_case_mask = spec_case_mask[spec_case_mask]
-        if not spec_case_mask.empty:
-            spec_case_mask = spec_case_mask.tshift(-1, freq)
-
-        data = _insertGrid(data, freq)
-        data, chunk_bounds = _interpolate(
-            data, method, order=order, inter_limit=2, downcast_interpolation=downcast_interpolation,
-        )
-
-        # exclude falsely interpolated values:
-        data[spec_case_mask.index] = np.nan
-
-        if total_range is None:
-            data = data.asfreq(freq, fill_value=np.nan)
-
-    else:
-        methods = "\n".join([", ".join(shifts), ", ".join(aggregations), ", ".join(interpolations)])
-        raise ValueError(f"Unknown interpolation method: '{method}', please select from:\n{methods}")
-
-    if total_range is not None:
-        data = data.reindex(total_index)
-
-    return data, chunk_bounds
-
-
-def _interpolate(data, method, order=2, inter_limit=2, downcast_interpolation=False):
-    """
-    The function interpolates nan-values (and nan-grids) in timeseries data. It can be passed all the method keywords
-    from the pd.Series.interpolate method and will than apply this very methods. Note, that the inter_limit keyword
-    really restricts the interpolation to chunks, not containing more than "inter_limit" nan entries
-    (thereby opposing the limit keyword of pd.Series.interpolate).
-
-    :param data:                    pd.Series. The data series to be interpolated
-    :param method:                  String. Method keyword designating interpolation method to use.
-    :param order:                   Integer. If your desired interpolation method needs an order to be passed -
-                                    here you pass it.
-    :param inter_limit:             Integer. Default = 2. Limit up to wich nan - gaps in the data get interpolated.
-                                    Its default value suits an interpolation that only will apply on an inserted
-                                    frequency grid.
-    :param downcast_interpolation:  Boolean. Default False. If True:
-                                    If a data chunk not contains enough values for interpolation of the order "order",
-                                    the highest order possible will be selected for that chunks interpolation."
-    :return:
-    """
-
-    gap_mask = (data.rolling(inter_limit, min_periods=0).apply(lambda x: np.sum(np.isnan(x)), raw=True)) != inter_limit
-
-    if inter_limit == 2:
-        gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
-    else:
-        gap_mask = (
-            gap_mask.replace(True, np.nan).fillna(method="bfill", limit=inter_limit).replace(np.nan, True).astype(bool)
-        )
-    # start end ending points of interpolation chunks have to be memorized to block their flagging:
-    chunk_switches = gap_mask.astype(int).diff()
-    chunk_starts = chunk_switches[chunk_switches == -1].index
-    chunk_ends = chunk_switches[(chunk_switches.shift(-1) == 1)].index
-    chunk_bounds = chunk_starts.join(chunk_ends, how="outer", sort=True)
-
-    data = data[gap_mask]
-
-    if method in ["linear", "time"]:
-
-        data.interpolate(method=method, inplace=True, limit=1, limit_area="inside")
-
-    else:
-        dat_name = data.name
-        gap_mask = (~gap_mask).cumsum()
-        data = pd.merge(gap_mask, data, how="inner", left_index=True, right_index=True)
-
-        def _interpolWrapper(x, wrap_order=order, wrap_method=method):
-            if x.count() > wrap_order:
-                try:
-                    return x.interpolate(method=wrap_method, order=int(wrap_order))
-                except (NotImplementedError, ValueError):
-                    logger.warning(
-                        "Interpolation with method {} is not supported at order {}. "
-                        "Interpolation will be performed with order {}".format(
-                            method, str(wrap_order), str(wrap_order - 1)
-                        )
-                    )
-                    return _interpolWrapper(x, int(wrap_order - 1), wrap_method)
-            elif x.size < 3:
-                return x
-            else:
-                if downcast_interpolation:
-                    return _interpolWrapper(x, int(x.count() - 1), wrap_method)
-                else:
-                    return x
-
-        data = data.groupby(data.columns[0]).transform(_interpolWrapper)
-        # squeezing the 1-dimensional frame resulting from groupby for consistency reasons
-        data = data.squeeze(axis=1)
-        data.name = dat_name
-    return data, chunk_bounds
-
-
-def _reshapeFlags(
-    flagger,
-    field,
-    ref_index,
-    method="fshift",
-    agg_method=max,
-    missing_flag=None,
-    set_shift_comment=True,
-    block_flags=None,
-    **kwargs,
-):
-    """To continue processing flags after harmonization/interpolation, old pre-harm flags have to be distributed onto
-    new grid.
-
-    There are the following methods available for flags projection. Note, that not every combination of flags projection
-    and interpolation method will lead to useful results. (For example, interpolating with 'fshift' and projecting with
-    bfill' would be a bad approach obviously.):
-
-    Passed aggregation methods shall return a valid flag for empty sampling intervals, or the value np.nan
-    - since np.nan values will be replaced by "missing_flag" anyway.
-
-    'fshift'/'bshift'   - forward/backward projection. Only the very
-                        first/last flag will be projected onto the last/next grid point. Extra flag fields like comment,
-                        just get shifted along with the flag. Only inserted flags for empty intervals will take the
-                        **kwargs argument.
-                        Set 'set_shift_comment' to True,  to apply kwargs** to all flags (currently really slow)
-    'fagg'/'bagg'       - All flags, referring to a sampling intervals measurements get aggregated forward/backward
-                        with the agg_method selected.
-
-    'nshift'     - every grid point gets assigned the nearest flag in its range
-                        ( range = grid_point +/-(freq/2) ).Extra flag fields like comment,
-                        just get shifted along with the flag. Only inserted flags for empty intervals will take the
-                        **kwargs argument.
-
-    'nagg'         - every grid point gets assigned the aggregation (by agg_method), of all the flags in its range.
-
-    :param flagger:     saqc.flagger. The flagger, the passed flags frame refer to.
-    :param method:      String. Default = 'fshift'. A methods keyword. (see func doc above)
-    :param agg_method:  Func. Default = max. method, multiple flags shall be aggregated with, if an aggregation method
-                        is selected for flags projection.
-    :param missing_flag:Integer. Default = -1. If there were no flags referring to the harmonized interval, this
-                        parameter determines wich flag will be inserted into the reshaped flags frame by selecting
-                        flagger.flags[missing_flag]. The parameter defaults to the worst flag that can be thought of, in
-                        terms of the used flagger.
-    :param set_shift_comment:   Boolean. Default = False. The shifting methods for flags projection are really fast,
-                        however, the methods used, do not allow to 'reflag' and apply eventually passed **kwargs.
-                        Setting set_shift_comment to True, **kwargs will be applied, but the whole process will slow
-                        down significantly.
-    :block_flags:       DatetimeIndex. A DatetimeIndex containing labels that will get the "nan-flag" assigned.
-                        This option mainly is introduced to account for the backtracking inconsistencies at the end
-                        and beginning of interpolation chunks.
-    :return: flags:     pd.Series/pd.DataFrame. The reshaped pandas like Flags object, referring to the harmonized data.
+    A method to "regularize" data by aggregating (resampling) data at a regular timestamp.
+
+    A series of data is considered "regular", if it is sampled regularly (= having uniform sampling rate).
+
+    The data will therefor get aggregated with a function, specified by the `value_func` parameter and
+    the result gets projected onto the new timestamps with a method, specified by "method".
+
+    The following method (keywords) are available:
+
+    * ``'nagg'``: (aggreagtion to nearest) - all values in the range (+/- freq/2) of a grid point get aggregated with
+      `agg_func`. and assigned to it. Flags get aggregated by `flag_func` and assigned the same way.
+    * ``'bagg'``: (backwards aggregation) - all values in a sampling interval get aggregated with agg_func and the
+      result gets assigned to the last regular timestamp. Flags get aggregated by `flag_func` and assigned the same way.
+    * ``'fagg'``: (forward aggregation) - all values in a sampling interval get aggregated with agg_func and the result
+      gets assigned to the next regular timestamp. Flags get aggregated by `flag_func` and assigned the same way.
+
+    Note, that, if there is no valid data (exisitng and not-na) available in a sampling interval assigned to a regular
+    timestamp by the selected method, nan gets assigned to this timestamp. The associated flag will be of value
+    ``flagger.UNFLAGGED``.
+
+    Note: the method will likely and significantly alter values and shape of ``data[field]``. The original data is kept
+    in the data dios and assigned to the fieldname ``field + '_original'``.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-regularized.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.freq
+    freq : str
+        The sampling frequency the data is to be aggregated (resampled) at.
+    value_func : Callable
+        The function you want to use for aggregation.
+    flag_func : Callable
+        The function you want to aggregate the flags with. It should be capable of operating on the flags dtype
+        (usually ordered categorical).
+    method : {'fagg', 'bagg', 'nagg'}, default 'nagg'
+        Specifies which intervals to be aggregated for a certain timestamp. (preceeding, succeeding or
+        "surrounding" interval). See description above for more details.
+    to_drop : {List[str], str}, default None
+        Flagtypes you want to drop before aggregation - effectively excluding values that are flagged
+        with a flag in to_drop from the aggregation process. Default results in flagger.BAD
+        values being dropped initially.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
     """
 
-    missing_flag = missing_flag or flagger.BAD
-    aggregations = [
-        "nagg",
-        "bagg",
-        "fagg",
-        "nagg_no_deharm",
-        "bagg_no_deharm",
-        "fagg_no_deharm",
-    ]
-    shifts = ["fshift", "bshift", "nshift"]
-
-    freq = ref_index.freq
-
-    if method in shifts:
-        # forward/backward projection of every intervals last/first flag - rest will be dropped
-        if method == "fshift":
-            direction = "ffill"
-            tolerance = pd.Timedelta(freq)
-
-        elif method == "bshift":
-            direction = "bfill"
-            tolerance = pd.Timedelta(freq)
-        # varset for nshift
-        else:
-            direction = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        flags = flagger.getFlags().reindex(ref_index, tolerance=tolerance, method=direction, fill_value=np.nan)
-
-        # if you want to keep previous comments - only newly generated missing flags get commented:
-        flags_series = flags.squeeze()
-
-        flagger_new = flagger.initFlags(flags=flags).setFlags(
-            field, loc=flags_series.isna(), flag=missing_flag, force=True, **kwargs
-        )
-
-        if set_shift_comment:
-            flagger_new = flagger_new.setFlags(field, flag=flags_series, force=True, **kwargs)
-
-    elif method in aggregations:
-        # prepare resampling keywords
-        if method in ["fagg", "fagg_no_deharm"]:
-            closed = "right"
-            label = "right"
-            base = 0
-            freq_string = freq
-        elif method in ["bagg", "bagg_no_deharm"]:
-            closed = "left"
-            label = "left"
-            base = 0
-            freq_string = freq
-        # var sets for 'nagg':
-        else:
-            closed = "left"
-            label = "left"
-            seconds_total = pd.Timedelta(freq).total_seconds()
-            base = seconds_total / 2
-            freq_string = str(int(seconds_total)) + "s"
-            i_start = flagger.getFlags().index[0]
-            if abs(i_start - i_start.floor(freq)) <= pd.Timedelta(freq) / 2:
-                shift_correcture = 1
-            else:
-                shift_correcture = -1
-
-        # resampling the flags series with aggregation method
-        flags = (
-            flagger.getFlags()
-            # NOTE: otherwise the datetime index will get lost
-            .squeeze()
-            .resample(freq_string, closed=closed, label=label, base=base)
-            # NOTE: breaks for non categorical flaggers
-            .apply(lambda x: agg_method(x) if not x.empty else missing_flag)
-            .astype(flagger.dtype)
-        )
-
-        if method == "nagg":
-            flags = flags.shift(periods=shift_correcture, freq=pd.Timedelta(freq) / 2)
-
-        # some consistency clean up to ensure new flags frame matching new data frames size:
-        if ref_index[0] != flags.index[0]:
-            flags = pd.Series(data=flagger.BAD, index=[ref_index[0]]).astype(flagger.dtype).append(flags)
-        if ref_index[-1] != flags.index[-1]:
-            flags = flags.append(pd.Series(data=flagger.BAD, index=[ref_index[-1]]).astype(flagger.dtype))
-
-        # block flagging/backtracking of chunk_starts/chunk_ends
-        if block_flags is not None:
-            flags[block_flags] = np.nan
-
-        flagger_new = flagger.initFlags(flags=flags.to_frame(name=field))
-
-    else:
-        methods = ", ".join(shifts + ["\n"] + aggregations)
-        raise ValueError(
-            "Passed reshaping method keyword:'{}', is unknown. Please select from: \n '{}'.".format(method, methods)
-        )
-
-    # block flagging/backtracking of chunk_starts/chunk_ends
-    if block_flags is not None:
-        flagger_new = flagger_new.setFlags(
-            field, loc=block_flags, flag=pd.Series(np.nan, index=block_flags).astype(flagger_new.dtype), force=True,
-        )
-    return flagger_new
-
-
-def _backtrackFlags(flagger_post, flagger_pre, freq, track_method="invert_fshift", co_flagging=False):
-
-    # in the case of "real" up/downsampling - evaluating the harm flags against the original flags makes no sence!
-    if track_method in ["regain"]:
-        return flagger_pre
-
-    # NOTE: PROBLEM flager_pre carries one value ib exces (index: -3)
-    flags_post = flagger_post.getFlags()
-    flags_pre = flagger_pre.getFlags()
-    flags_header = flags_post.columns
-    if track_method in ["invert_fshift", "invert_bshift", "invert_nearest"] and co_flagging is True:
-        if track_method == "invert_fshift":
-            method = "bfill"
-            tolerance = pd.Timedelta(freq)
-        elif track_method == "invert_bshift":
-            method = "ffill"
-            tolerance = pd.Timedelta(freq)
-        # var set for "invert nearest"
-        else:
-            # NOTE: co_flagging bug path
-            method = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        flags_post = flags_post.reindex(flags_pre.index, method=method, tolerance=tolerance)
-        replacement_mask = flags_post.squeeze() > flags_pre.squeeze()
-        # there is a mysterious problem when assigning 1-d-dataframes - so we squeeze:
-        flags_pre = flags_pre.squeeze(axis=1)
-        flags_post = flags_post.squeeze(axis=1)
-        flags_pre.loc[replacement_mask] = flags_post.loc[replacement_mask]
-
-    if track_method in ["invert_fshift", "invert_bshift", "invert_nearest"] and co_flagging is False:
-        if track_method == "invert_fshift":
-            method = "backward"
-            tolerance = pd.Timedelta(freq)
-        elif track_method == "invert_bshift":
-            method = "forward"
-            tolerance = pd.Timedelta(freq)
-        # var set for 'invert nearest'
-        else:
-            method = "nearest"
-            tolerance = pd.Timedelta(freq) / 2
-
-        flags_post = pd.merge_asof(
-            flags_post,
-            pd.DataFrame(flags_pre.index.values, index=flags_pre.index, columns=["pre_index"]),
-            left_index=True,
-            right_index=True,
-            tolerance=tolerance,
-            direction=method,
-        )
-
-        flags_post.dropna(subset=["pre_index"], inplace=True)
-        flags_post.set_index(["pre_index"], inplace=True)
-
-        # restore flag shape
-        flags_post.columns = flags_header
-
-        replacement_mask = flags_post.squeeze() > flags_pre.loc[flags_post.index, :].squeeze()
-        # there is a mysterious problem when assigning 1-d-dataframes - so we squeeze:
-        flags_pre = flags_pre.squeeze(axis=1)
-        flags_post = flags_post.squeeze(axis=1)
-        flags_pre.loc[replacement_mask[replacement_mask].index] = flags_post.loc[replacement_mask]
-
-    # sticking to the nomenklatura of always-DF for flags:
-    if isinstance(flags_pre, pd.Series):
-        flags_pre = flags_pre.to_frame()
-
-    return flagger_pre.initFlags(flags=flags_pre)
-
-
-def _fromMerged(data, flagger, fieldname):
-    # we need a not-na mask for the flags data to be retrieved:
-    mask = flagger.getFlags(fieldname).notna()
-    return data.loc[mask[mask].index, fieldname], flagger.getFlagger(field=fieldname, loc=mask)
-
-
-def _toMerged(data, flagger, fieldname, data_to_insert, flagger_to_insert, target_index=None, **kwargs):
-
-    data = data.copy()
-    flags = flagger._flags
-    flags_to_insert = flagger_to_insert._flags
-
-    if isinstance(data, pd.Series):
-        data = data.to_frame()
-
-    data.drop(fieldname, axis="columns", errors="ignore", inplace=True)
-    flags.drop(fieldname, axis="columns", errors="ignore", inplace=True)
-
-    # first case: there is no data, the data-to-insert would have
-    # to be merged with, and also are we not deharmonizing:
-    if (data.empty) and (target_index is None):
-        return data_to_insert.to_frame(name=fieldname), flagger_to_insert
-
-    # if thats not the case: generate the drop mask for the remaining data:
-    mask = data.isna().all(axis=1)
-    # we only want to drop lines, that do not have to be re-inserted in the merge:
-    drops = mask[mask].index.difference(data_to_insert.index)
-    # clear mask, but keep index
-    mask[:] = True
-    # final mask:
-    mask[drops] = False
-
-    # if we are not "de-harmonizing":
-    if target_index is None:
-        # erase nan rows in the data, that became redundant because of harmonization and merge with data-to-insert:
-        data = pd.merge(data[mask], data_to_insert, how="outer", left_index=True, right_index=True)
-        flags = pd.merge(flags[mask], flags_to_insert, how="outer", left_index=True, right_index=True)
-        return data, flagger.initFlags(flags=flags)
-
-    else:
-        # trivial case: there is only one variable ("reindexing to make sure shape matches pre-harm shape"):
-        if data.empty:
-            data = data_to_insert.reindex(target_index).to_frame(name=fieldname)
-            flags = flags_to_insert.reindex(target_index, fill_value=flagger.UNFLAGGED)
-            return data, flagger.initFlags(flags=flags)
-        # annoying case: more than one variables:
-        # erase nan rows resulting from harmonization but keep/regain those, that were initially present in the data:
-        new_index = data[mask].index.join(target_index, how="outer")
-        data = data.reindex(new_index)
-        flags = flags.reindex(new_index, fill_value=flagger.UNFLAGGED)
-        data = pd.merge(data, data_to_insert, how="outer", left_index=True, right_index=True)
-        flags = pd.merge(flags, flags_to_insert, how="outer", left_index=True, right_index=True)
-
-        # internally harmonization memorizes its own manipulation by inserting nan flags -
-        # those we will now assign the flagger.bad flag by the "missingTest":
-        return flagMissing(data, fieldname, flagger.initFlags(flags=flags), nodata=np.nan, **kwargs)
-
-
-@register()
-def harm_shift2Grid(data, field, flagger, freq, method="nshift", drop_flags=None, **kwargs):
-    return harm_harmonize(
-        data, field, flagger, freq, inter_method=method, reshape_method=method, drop_flags=drop_flags, **kwargs,
-    )
-
-
-@register()
-def harm_aggregate2Grid(
-    data, field, flagger, freq, value_func, flag_func="max", method="nagg", drop_flags=None, **kwargs,
-):
-    return harm_harmonize(
+    data, flagger = proc_fork(data, field, flagger)
+    data, flagger = proc_resample(
         data,
         field,
         flagger,
         freq,
-        inter_method=method,
-        reshape_method=method,
-        inter_agg=value_func,
-        reshape_agg=flag_func,
-        drop_flags=drop_flags,
+        agg_func=value_func,
+        flag_agg_func=flag_func,
+        method=method,
+        empty_intervals_flag=flagger.UNFLAGGED,
+        to_drop=to_drop,
+        all_na_2_empty=True,
         **kwargs,
     )
+    return data, flagger
 
 
-@register()
-def harm_linear2Grid(data, field, flagger, freq, method="nagg", func="max", drop_flags=None, **kwargs):
-    return harm_harmonize(
-        data,
-        field,
-        flagger,
-        freq,
-        inter_method="time",
-        reshape_method=method,
-        reshape_agg=func,
-        drop_flags=drop_flags,
-        **kwargs,
+@register(masking='none')
+def harm_linear2Grid(data, field, flagger, freq, to_drop=None, **kwargs):
+    """
+    A method to "regularize" data by interpolating linearly the data at regular timestamp.
+
+    A series of data is considered "regular", if it is sampled regularly (= having uniform sampling rate).
+
+    Interpolated values will get assigned the worst flag within freq-range.
+
+    Note: the method will likely and significantly alter values and shape of ``data[field]``. The original data is kept
+    in the data dios and assigned to the fieldname ``field + '_original'``.
+
+    Note, that the data only gets interpolated at those (regular) timestamps, that have a valid (existing and
+    not-na) datapoint preceeding them and one succeeding them within freq range.
+    Regular timestamp that do not suffice this condition get nan assigned AND The associated flag will be of value
+    ``flagger.UNFLAGGED``.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-regularized.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.freq
+    freq : str
+        An offset string. The frequency of the grid you want to interpolate your data at.
+    to_drop : {List[str], str}, default None
+        Flagtypes you want to drop before interpolation - effectively excluding values that are flagged
+        with a flag in to_drop from the interpolation process. Default results in flagger.BAD
+        values being dropped initially.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+
+    data, flagger = proc_fork(data, field, flagger)
+    data, flagger = proc_interpolateGrid(
+        data, field, flagger, freq, "time", to_drop=to_drop, empty_intervals_flag=flagger.UNFLAGGED, **kwargs
     )
+    return data, flagger
 
 
-@register()
-def harm_interpolate2Grid(
-    data, field, flagger, freq, method, order=1, flag_method="nagg", flag_func="max", drop_flags=None, **kwargs,
-):
-    return harm_harmonize(
+@register(masking='none')
+def harm_interpolate2Grid(data, field, flagger, freq, method, order=1, to_drop=None, **kwargs,):
+    """
+    A method to "regularize" data by interpolating the data at regular timestamp.
+
+    A series of data is considered "regular", if it is sampled regularly (= having uniform sampling rate).
+
+    Interpolated values will get assigned the worst flag within freq-range.
+
+    There are available all the interpolations from the pandas.Series.interpolate method and they are called by
+    the very same keywords.
+
+    Note, that, to perform a timestamp aware, linear interpolation, you have to pass ``'time'`` as `method`,
+    and NOT ``'linear'``.
+
+    Note: the `method` will likely and significantly alter values and shape of ``data[field]``. The original data is
+    kept in the data dios and assigned to the fieldname ``field + '_original'``.
+
+    Note, that the data only gets interpolated at those (regular) timestamps, that have a valid (existing and
+    not-na) datapoint preceeding them and one succeeding them within freq range.
+    Regular timestamp that do not suffice this condition get nan assigned AND The associated flag will be of value
+    ``flagger.UNFLAGGED``.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-regularized.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.freq
+    freq : str
+        An offset string. The frequency of the grid you want to interpolate your data at.
+    method : {"linear", "time", "nearest", "zero", "slinear", "quadratic", "cubic", "spline", "barycentric",
+        "polynomial", "krogh", "piecewise_polynomial", "spline", "pchip", "akima"}: string
+        The interpolation method you want to apply.
+    order : int, default 1
+        If your selected interpolation method can be performed at different *orders* - here you pass the desired
+        order.
+    to_drop : {List[str], str}, default None
+        Flagtypes you want to drop before interpolation - effectively excluding values that are flagged
+        with a flag in `to_drop` from the interpolation process. Default results in ``flagger.BAD``
+        values being dropped initially.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+
+    data, flagger = proc_fork(data, field, flagger)
+    data, flagger = proc_interpolateGrid(
         data,
         field,
         flagger,
         freq,
-        inter_method=method,
+        method=method,
         inter_order=order,
-        reshape_method=flag_method,
-        reshape_agg=flag_func,
-        drop_flags=drop_flags,
+        to_drop=to_drop,
+        empty_intervals_flag=flagger.UNFLAGGED,
         **kwargs,
     )
+    return data, flagger
 
 
-@register()
-def harm_downsample(
-    data,
-    field,
-    flagger,
-    sample_freq,
-    agg_freq,
-    sample_func="mean",
-    agg_func="mean",
-    invalid_flags=None,
-    max_invalid=None,
-    **kwargs,
-):
-
-    agg_func = getFuncFromInput(agg_func)
-
-    if max_invalid is None:
-        max_invalid = np.inf
-
-    if sample_func is not None:
-        sample_func = getFuncFromInput(sample_func)
-
-    # define the "fastest possible" aggregator
-    if sample_func is None:
-        if max_invalid < np.inf:
-
-            def aggregator(x):
-                if x.isna().sum() < max_invalid:
-                    return agg_func(x)
-                else:
-                    return np.nan
-
-        else:
-
-            def aggregator(x):
-                return agg_func(x)
-
-    else:
-
-        dummy_resampler = pd.Series(np.nan, index=[pd.Timedelta("1min")]).resample("1min")
-        if hasattr(dummy_resampler, sample_func.__name__):
-
-            sample_func_name = sample_func.__name__
-            if max_invalid < np.inf:
-
-                def aggregator(x):
-                    y = getattr(x.resample(sample_freq), sample_func_name)()
-                    if y.isna().sum() < max_invalid:
-                        return agg_func(y)
-                    else:
-                        return np.nan
-
-            else:
-
-                def aggregator(x):
-                    return agg_func(getattr(x.resample(sample_freq), sample_func_name)())
-
-        else:
-            if max_invalid < np.inf:
-
-                def aggregator(x):
-                    y = x.resample(sample_freq).apply(sample_func)
-                    if y.isna().sum() < max_invalid:
-                        return agg_func(y)
-                    else:
-                        return np.nan
-
-            else:
-
-                def aggregator(x):
-                    return agg_func(x.resample(sample_freq).apply(sample_func))
+@register(masking='none')
+def harm_deharmonize(data, field, flagger, method, to_drop=None, **kwargs):
+    """
+    The Function function "undoes" regularization, by regaining the original data and projecting the
+    flags calculated for the regularized data onto the original ones.
+
+    Afterwards the regularized data is removed from the data dios and ``'field'`` will be associated
+    with the original data "again".
+
+    Wherever the flags in the original data are "better" then the regularized flags projected on them,
+    they get overridden with this regularized flags value.
+
+    Which regularized flags are to be projected on which original flags, is controlled by the "method" parameters.
+
+    Generally, if you regularized with the method "X", you should pass the method "inverse_X" to the deharmonization.
+    If you regularized with an interpolation, the method "inverse_interpolation" would be the appropriate choice.
+    Also you should pass the same drop flags keyword.
+
+    The deharm methods in detail:
+    ("original_flags" are associated with the original data that is to be regained,
+    "regularized_flags" are associated with the regularized data that is to be "deharmonized",
+    "freq" refers to the regularized datas sampling frequencie)
+
+    * ``'inverse_nagg'``: all original_flags within the range *+/- freq/2* of a regularized_flag, get assigned this
+      regularized flags value. (if regularized_flags > original_flag)
+    * ``'inverse_bagg'``: all original_flags succeeding a regularized_flag within the range of "freq", get assigned this
+      regularized flags value. (if regularized_flag > original_flag)
+    * ``'inverse_fagg'``: all original_flags preceeding a regularized_flag within the range of "freq", get assigned this
+      regularized flags value. (if regularized_flag > original_flag)
+
+    * ``'inverse_interpolation'``: all original_flags within the range *+/- freq* of a regularized_flag, get assigned this
+      regularized flags value (if regularized_flag > original_flag).
+
+    * ``'inverse_nshift'``: That original_flag within the range +/- *freq/2*, that is nearest to a regularized_flag,
+      gets the regularized flags value. (if regularized_flag > original_flag)
+    * ``'inverse_bshift'``: That original_flag succeeding a source flag within the range freq, that is nearest to a
+      regularized_flag, gets assigned this regularized flags value. (if regularized_flag > original_flag)
+    * ``'inverse_nshift'``: That original_flag preceeding a regularized flag within the range freq, that is nearest to a
+      regularized_flag, gets assigned this regularized flags value. (if source_flag > original_flag)
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-deharmonized.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.freq
+    method : {'inverse_fagg', 'inverse_bagg', 'inverse_nagg', 'inverse_fshift', 'inverse_bshift', 'inverse_nshift',
+            'inverse_interpolation'}
+        The method used for projection of regularized flags onto original flags. See description above for more
+        details.
+    to_drop : {List[str], str}, default None
+        Flagtypes you want to drop before interpolation - effectively excluding values that are flagged
+        with a flag in to_drop from the interpolation process. Default results in flagger.BAD
+        values being dropped initially.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
 
-    return harm_harmonize(
-        data,
-        field,
-        flagger,
-        agg_freq,
-        inter_method="bagg",
-        reshape_method="bagg_no_deharm",
-        inter_agg=aggregator,
-        reshape_agg="max",
-        drop_flags=invalid_flags,
-        **kwargs,
-    )
+    newfield = str(field) + ORIGINAL_SUFFIX
+    data, flagger = proc_projectFlags(data, newfield, flagger, method, source=field, to_drop=to_drop, **kwargs)
+    data, flagger = proc_drop(data, field, flagger)
+    data, flagger = proc_rename(data, newfield, flagger, field)
+    return data, flagger
diff --git a/saqc/funcs/modelling.py b/saqc/funcs/modelling.py
new file mode 100644
index 0000000000000000000000000000000000000000..59f169c521583b41b83c5781741ae1efa5836f05
--- /dev/null
+++ b/saqc/funcs/modelling.py
@@ -0,0 +1,576 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import pandas as pd
+import numpy as np
+import numba
+from saqc.core.register import register
+from saqc.lib.ts_operators import (
+    polyRoller,
+    polyRollerNoMissing,
+    polyRollerNumba,
+    polyRollerNoMissingNumba,
+    polyRollerIrregular,
+    count
+)
+from saqc.lib.tools import seasonalMask, customRoller
+import logging
+
+logger = logging.getLogger("SaQC")
+
+
+@register(masking='field')
+def modelling_polyFit(data, field, flagger, winsz, polydeg, numba="auto", eval_flags=True, min_periods=0, **kwargs):
+    """
+    Function fits a polynomial model to the data and returns the residues.
+
+    The residue for value x is calculated by fitting a polynomial of degree "polydeg" to a data slice
+    of size "winsz", wich has x at its center.
+
+    Note, that the residues will be stored to the `field` field of the input data, so that the original data, the
+    polynomial is fitted to, gets overridden.
+
+    Note, that, if data[field] is not alligned to an equidistant frequency grid, the window size passed,
+    has to be an offset string. Also numba boost options don`t apply for irregularly sampled
+    timeseries.
+
+    Note, that calculating the residues tends to be quite costy, because a function fitting is perfomed for every
+    sample. To improve performance, consider the following possibillities:
+
+    In case your data is sampled at an equidistant frequency grid:
+
+    (1) If you know your data to have no significant number of missing values, or if you do not want to
+        calculate residues for windows containing missing values any way, performance can be increased by setting
+        min_periods=winsz.
+
+    (2) If your data consists of more then around 200000 samples, setting numba=True, will boost the
+        calculations up to a factor of 5 (for samplesize > 300000) - however for lower sample sizes,
+        numba will slow down the calculations, also, up to a factor of 5, for sample_size < 50000.
+        By default (numba='auto'), numba is set to true, if the data sample size exceeds 200000.
+
+    in case your data is not sampled at an equidistant frequency grid:
+
+    (1) Harmonization/resampling of your data will have a noticable impact on polyfittings performance - since
+        numba_boost doesnt apply for irregularly sampled data in the current implementation.
+
+    Note, that in the current implementation, the initial and final winsz/2 values do not get fitted.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-modelled.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    winsz : {str, int}
+        The size of the window you want to use for fitting. If an integer is passed, the size
+        refers to the number of periods for every fitting window. If an offset string is passed,
+        the size refers to the total temporal extension. The window will be centered around the vaule-to-be-fitted.
+        For regularly sampled timeseries the period number will be casted down to an odd number if
+        even.
+    polydeg : int
+        The degree of the polynomial used for fitting
+    numba : {True, False, "auto"}, default "auto"
+        Wheather or not to apply numbas just-in-time compilation onto the poly fit function. This will noticably
+        increase the speed of calculation, if the sample size is sufficiently high.
+        If "auto" is selected, numba compatible fit functions get applied for data consisiting of > 200000 samples.
+    eval_flags : bool, default True
+        Wheather or not to assign new flags to the calculated residuals. If True, a residual gets assigned the worst
+        flag present in the interval, the data for its calculation was obtained from.
+    min_periods : {int, np.nan}, default 0
+        The minimum number of periods, that has to be available in every values fitting surrounding for the polynomial
+        fit to be performed. If there are not enough values, np.nan gets assigned. Default (0) results in fitting
+        regardless of the number of values present (results in overfitting for too sparse intervals). To automatically
+        set the minimum number of periods to the number of values in an offset defined window size, pass np.nan.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    """
+    if data[field].empty:
+        return data, flagger
+    data = data.copy()
+    to_fit = data[field]
+    flags = flagger.getFlags(field)
+    if not to_fit.index.freqstr:
+        if isinstance(winsz, int):
+            raise NotImplementedError("Integer based window size is not supported for not-harmonized" "sample series.")
+        # get interval centers
+        centers = np.floor((to_fit.rolling(pd.Timedelta(winsz) / 2, closed="both", min_periods=min_periods).count()))
+        centers = centers.drop(centers[centers.isna()].index)
+        centers = centers.astype(int)
+        residues = to_fit.rolling(pd.Timedelta(winsz), closed="both", min_periods=min_periods).apply(
+            polyRollerIrregular, args=(centers, polydeg)
+        )
+
+        def center_func(x, y=centers):
+            pos = x.index[int(len(x) - y[x.index[-1]])]
+            return y.index.get_loc(pos)
+
+        centers_iloc = centers.rolling(winsz, closed="both").apply(center_func, raw=False).astype(int)
+        temp = residues.copy()
+        for k in centers_iloc.iteritems():
+            residues.iloc[k[1]] = temp[k[0]]
+        residues[residues.index[0] : residues.index[centers_iloc[0]]] = np.nan
+        residues[residues.index[centers_iloc[-1]] : residues.index[-1]] = np.nan
+    else:
+        if isinstance(winsz, str):
+            winsz = int(np.floor(pd.Timedelta(winsz) / pd.Timedelta(to_fit.index.freqstr)))
+        if winsz % 2 == 0:
+            winsz = int(winsz - 1)
+        if numba == "auto":
+            if to_fit.shape[0] < 200000:
+                numba = False
+            else:
+                numba = True
+
+        val_range = np.arange(0, winsz)
+        center_index = int(np.floor(winsz / 2))
+        if min_periods < winsz:
+            if min_periods > 0:
+                to_fit = to_fit.rolling(winsz, min_periods=min_periods, center=True).apply(
+                    lambda x, y: x[y], raw=True, args=(center_index,)
+                )
+
+            # we need a missing value marker that is not nan, because nan values dont get passed by pandas rolling
+            # method
+            miss_marker = to_fit.min()
+            miss_marker = np.floor(miss_marker - 1)
+            na_mask = to_fit.isna()
+            to_fit[na_mask] = miss_marker
+            if numba:
+                residues = to_fit.rolling(winsz).apply(
+                    polyRollerNumba,
+                    args=(miss_marker, val_range, center_index, polydeg),
+                    raw=True,
+                    engine="numba",
+                    engine_kwargs={"no_python": True},
+                )
+                # due to a tiny bug - rolling with center=True doesnt work when using numba engine.
+                residues = residues.shift(-int(center_index))
+            else:
+                residues = to_fit.rolling(winsz, center=True).apply(
+                    polyRoller, args=(miss_marker, val_range, center_index, polydeg), raw=True
+                )
+            residues[na_mask] = np.nan
+        else:
+            # we only fit fully populated intervals:
+            if numba:
+                residues = to_fit.rolling(winsz).apply(
+                    polyRollerNoMissingNumba,
+                    args=(val_range, center_index, polydeg),
+                    engine="numba",
+                    engine_kwargs={"no_python": True},
+                    raw=True,
+                )
+                # due to a tiny bug - rolling with center=True doesnt work when using numba engine.
+                residues = residues.shift(-int(center_index))
+            else:
+                residues = to_fit.rolling(winsz, center=True).apply(
+                    polyRollerNoMissing, args=(val_range, center_index, polydeg), raw=True
+                )
+
+    residues = residues - to_fit
+    data[field] = residues
+    if eval_flags:
+        num_cats, codes = flags.factorize()
+        num_cats = pd.Series(num_cats, index=flags.index).rolling(winsz, center=True, min_periods=min_periods).max()
+        nan_samples = num_cats[num_cats.isna()]
+        num_cats.drop(nan_samples.index, inplace=True)
+        to_flag = pd.Series(codes[num_cats.astype(int)], index=num_cats.index)
+        to_flag = to_flag.align(nan_samples)[0]
+        to_flag[nan_samples.index] = flags[nan_samples.index]
+        flagger = flagger.setFlags(field, to_flag.values, **kwargs)
+
+    return data, flagger
+
+
+@register(masking='field')
+def modelling_rollingMean(data, field, flagger, winsz, eval_flags=True, min_periods=0, center=True, **kwargs):
+    """
+    Models the data with the rolling mean and returns the residues.
+
+    Note, that the residues will be stored to the `field` field of the input data, so that the data that is modelled
+    gets overridden.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-modelled.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    winsz : {int, str}
+        The size of the window you want to roll with. If an integer is passed, the size
+        refers to the number of periods for every fitting window. If an offset string is passed,
+        the size refers to the total temporal extension.
+        For regularly sampled timeseries, the period number will be casted down to an odd number if
+        center = True.
+    eval_flags : bool, default True
+        Wheather or not to assign new flags to the calculated residuals. If True, a residual gets assigned the worst
+        flag present in the interval, the data for its calculation was obtained from.
+        Currently not implemented in combination with not-harmonized timeseries.
+    min_periods : int, default 0
+        The minimum number of periods, that has to be available in every values fitting surrounding for the mean
+        fitting to be performed. If there are not enough values, np.nan gets assigned. Default (0) results in fitting
+        regardless of the number of values present.
+    center : bool, default True
+        Wheather or not to center the window the mean is calculated of around the reference value. If False,
+        the reference value is placed to the right of the window (classic rolling mean with lag.)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
+    data = data.copy()
+    to_fit = data[field]
+    flags = flagger.getFlags(field)
+    if to_fit.empty:
+        return data, flagger
+
+    # starting with the annoying case: finding the rolling interval centers of not-harmonized input time series:
+    if (to_fit.index.freqstr is None) and center:
+        if isinstance(winsz, int):
+            raise NotImplementedError(
+                "Integer based window size is not supported for not-harmonized"
+                'sample series when rolling with "center=True".'
+            )
+        # get interval centers
+        centers = np.floor((to_fit.rolling(pd.Timedelta(winsz) / 2, closed="both", min_periods=min_periods).count()))
+        centers = centers.drop(centers[centers.isna()].index)
+        centers = centers.astype(int)
+        means = to_fit.rolling(pd.Timedelta(winsz), closed="both", min_periods=min_periods).mean()
+
+        def center_func(x, y=centers):
+            pos = x.index[int(len(x) - y[x.index[-1]])]
+            return y.index.get_loc(pos)
+
+        centers_iloc = centers.rolling(winsz, closed="both").apply(center_func, raw=False).astype(int)
+        temp = means.copy()
+        for k in centers_iloc.iteritems():
+            means.iloc[k[1]] = temp[k[0]]
+        # last values are false, due to structural reasons:
+        means[means.index[centers_iloc[-1]] : means.index[-1]] = np.nan
+
+    # everything is more easy if data[field] is harmonized:
+    else:
+        if isinstance(winsz, str):
+            winsz = int(np.floor(pd.Timedelta(winsz) / pd.Timedelta(to_fit.index.freqstr)))
+        if (winsz % 2 == 0) & center:
+            winsz = int(winsz - 1)
+
+        means = to_fit.rolling(window=winsz, center=center, closed="both").mean()
+
+    residues = means - to_fit
+    data[field] = residues
+    if eval_flags:
+        num_cats, codes = flags.factorize()
+        num_cats = pd.Series(num_cats, index=flags.index).rolling(winsz, center=True, min_periods=min_periods).max()
+        nan_samples = num_cats[num_cats.isna()]
+        num_cats.drop(nan_samples.index, inplace=True)
+        to_flag = pd.Series(codes[num_cats.astype(int)], index=num_cats.index)
+        to_flag = to_flag.align(nan_samples)[0]
+        to_flag[nan_samples.index] = flags[nan_samples.index]
+        flagger = flagger.setFlags(field, to_flag.values, **kwargs)
+
+    return data, flagger
+
+
+def modelling_mask(data, field, flagger, mode, mask_var=None, season_start=None, season_end=None,
+                   include_bounds=True):
+    """
+    This function realizes masking within saqc.
+
+    Due to some inner saqc mechanics, it is not straight forwardly possible to exclude
+    values or datachunks from flagging routines. This function replaces flags with np.nan
+    value, wherever values are to get masked. Furthermore, the masked values get replaced by
+    np.nan, so that they dont effect calculations.
+
+    Here comes a recipe on how to apply a flagging function only on a masked chunk of the variable field:
+
+    1. dublicate "field" in the input data (proc_fork)
+    2. mask the dublicated data (modelling_mask)
+    3. apply the tests you only want to be applied onto the masked data chunks (saqc_tests)
+    4. project the flags, calculated on the dublicated and masked data onto the original field data
+        (proc_projectFlags or flagGeneric)
+    5. drop the dublicated data (proc_drop)
+
+    To see an implemented example, checkout flagSeasonalRange in the saqc.functions module
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-masked.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    mode : {"seasonal", "mask_var"}
+        The masking mode.
+        - "seasonal": parameters "season_start", "season_end" are evaluated to generate a seasonal (periodical) mask
+        - "mask_var": data[mask_var] is expected to be a boolean valued timeseries and is used as mask.
+    mask_var : {None, str}, default None
+        Only effective if mode == "mask_var"
+        Fieldname of the column, holding the data that is to be used as mask. (must be moolean series)
+        Neither the series` length nor its labels have to match data[field]`s index and length. An inner join of the
+        indices will be calculated and values get masked where the values of the inner join are "True".
+    season_start : {None, str}, default None
+        Only effective if mode == "seasonal"
+        String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
+        Has to be of same length as `season_end` parameter.
+        See examples section below for some examples.
+    season_end : {None, str}, default None
+        Only effective if mode == "seasonal"
+        String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
+        Has to be of same length as `season_end` parameter.
+        See examples section below for some examples.
+    include_bounds : boolean
+        Wheather or not to include the mask defining bounds to the mask.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+
+    Examples
+    --------
+    The `season_start` and `season_end` parameters provide a conveniant way to generate seasonal / date-periodic masks.
+    They have to be strings of the forms: "mm-ddTHH:MM:SS", "ddTHH:MM:SS" , "HH:MM:SS", "MM:SS" or "SS"
+    (mm=month, dd=day, HH=hour, MM=minute, SS=second)
+    Single digit specifications have to be given with leading zeros.
+    `season_start` and `seas   on_end` strings have to be of same length (refer to the same periodicity)
+    The highest date unit gives the period.
+    For example:
+
+    >>> season_start = "01T15:00:00"
+    >>> season_end = "13T17:30:00"
+
+    Will result in all values sampled between 15:00 at the first and  17:30 at the 13th of every month get masked
+
+    >>> season_start = "01:00"
+    >>> season_end = "04:00"
+
+    All the values between the first and 4th minute of every hour get masked.
+
+    >>> season_start = "01-01T00:00:00"
+    >>> season_end = "01-03T00:00:00"
+
+    Mask january and february of evcomprosed in theery year. masking is inclusive always, so in this case the mask will
+    include 00:00:00 at the first of march. To exclude this one, pass:
+
+    >>> season_start = "01-01T00:00:00"
+    >>> season_end = "02-28T23:59:59"
+
+    To mask intervals that lap over a seasons frame, like nights, or winter, exchange sequence of season start and
+    season end. For example, to mask night hours between 22:00:00 in the evening and 06:00:00 in the morning, pass:
+
+    >>> season_start = "22:00:00"
+    >>> season_end = "06:00:00"
+
+    When inclusive_selection="season", all above examples work the same way, only that you now
+    determine wich values NOT TO mask (=wich values are to constitute the "seasons").
+    """
+    data = data.copy()
+    datcol_idx = data[field].index
+
+    if mode == 'seasonal':
+        to_mask = seasonalMask(datcol_idx, season_start, season_end, include_bounds)
+    elif mode == 'mask_var':
+        idx = data[mask_var].index.intersection(datcol_idx)
+        to_mask = data.loc[idx, mask_var]
+    else:
+        raise ValueError("Keyword passed as masking mode is unknown ({})!".format(mode))
+
+    data.aloc[to_mask, field] = np.nan
+    flagger = flagger.setFlags(field, loc=to_mask, flag=np.nan, force=True)
+
+    return data, flagger
+
+
+@numba.jit(parallel=True, nopython=True)
+def _slidingWindowSearchNumba(data_arr, bwd_start, fwd_end, split, stat_func, thresh_func, num_val):
+    stat_arr = np.zeros(num_val)
+    thresh_arr = np.zeros(num_val)
+    for win_i in numba.prange(0, num_val-1):
+        x = data_arr[bwd_start[win_i]:split[win_i]]
+        y = data_arr[split[win_i]:fwd_end[win_i]]
+        stat_arr[win_i] = stat_func(x, y)
+        thresh_arr[win_i] = thresh_func(x, y)
+    return stat_arr, thresh_arr
+
+
+def _slidingWindowSearch(data_arr, bwd_start, fwd_end, split, stat_func, thresh_func, num_val):
+    stat_arr = np.zeros(num_val)
+    thresh_arr = np.zeros(num_val)
+    for win_i in range(0, num_val-1):
+        x = data_arr[bwd_start[win_i]:split[win_i]]
+        y = data_arr[split[win_i]:fwd_end[win_i]]
+        stat_arr[win_i] = stat_func(x, y)
+        thresh_arr[win_i] = thresh_func(x, y)
+    return stat_arr, thresh_arr
+
+
+def _reduceCPCluster(stat_arr, thresh_arr, start, end, obj_func, num_val):
+    out_arr = np.zeros(shape=num_val, dtype=bool)
+    for win_i in numba.prange(0, num_val):
+        s, e = start[win_i], end[win_i]
+        x = stat_arr[s:e]
+        y = thresh_arr[s:e]
+        pos = s + obj_func(x, y) + 1
+        out_arr[s:e] = False
+        out_arr[pos] = True
+    return out_arr
+
+
+@register(masking='field')
+def modelling_changePointCluster(data, field, flagger, stat_func, thresh_func, bwd_window, min_periods_bwd,
+                                 fwd_window=None, min_periods_fwd=None, closed='both', try_to_jit=True,
+                                 reduce_window=None, reduce_func=lambda x, y: x.argmax(), flag_changepoints=False,
+                                 model_by_resids=False, **kwargs):
+    """
+    Assigns label to the data, aiming to reflect continous regimes of the processes the data is assumed to be
+    generated by.
+    The regime change points detection is based on a sliding window search.
+
+    Note, that the cluster labels will be stored to the `field` field of the input data, so that the data that is
+    clustered gets overridden.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The reference variable, the deviation from wich determines the flagging.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional informations related to `data`.
+    stat_func : Callable[numpy.array, numpy.array]
+        A function that assigns a value to every twin window. Left window content will be passed to first variable,
+        right window content will be passed to the second.
+    thresh_func : Callable[numpy.array, numpy.array]
+        A function that determines the value level, exceeding wich qualifies a timestamps stat func value as denoting a
+        changepoint.
+    bwd_window : str
+        The left (backwards facing) windows temporal extension (freq-string).
+    min_periods_bwd : {str, int}
+        Minimum number of periods that have to be present in a backwards facing window, for a changepoint test to be
+        performed.
+    fwd_window : {Non/home/luenensc/PyPojects/testSpace/flagBasicMystery.pye, str}, default None
+        The right (fo/home/luenensc/PyPojects/testSpace/flagBasicMystery.pyrward facing) windows temporal extension (freq-string).
+    min_periods_fwd : {None, str, int}, default None
+        Minimum numbe/home/luenensc/PyPojects/testSpace/flagBasicMystery.pyr of periods that have to be present in a forward facing window, for a changepoint test to be
+        performed.
+    closed : {'right', 'left', 'both', 'neither'}, default 'both'
+        Determines the closure of the sliding windows.
+    reduce_window : {None, False, str}, default None
+        The sliding window search method is not an exact CP search method and usually there wont be
+        detected a single changepoint, but a "region" of change around a changepoint.
+        If `agg_range` is not False, for every window of size `agg_range`, there
+        will be selected the value with index `reduce_func(x, y)` and the others will be dropped.
+        If `reduce_window` is None, the reduction window size equals the
+        twin window size the changepoints have been detected with.
+    reduce_func : Callable[numpy.array, numpy.array], default lambda x, y: x.argmax()
+        A function that must return an index value upon input of two arrays x and y.
+        First input parameter will hold the result from the stat_func evaluation for every
+        reduction window. Second input parameter holds the result from the thresh_func evaluation.
+        The default reduction function just selects the value that maximizes the stat_func.
+    flag_changepoints : bool, default False
+        If true, the points, where there is a change in data modelling regime detected get flagged bad.
+    model_by_resids _ bool, default False
+        If True, the data is replaced by the stat_funcs results instead of regime labels.
+
+    Returns
+    -------
+
+    """
+    data = data.copy()
+    data_ser = data[field].dropna()
+    center = False
+    var_len = data_ser.shape[0]
+    if fwd_window is None:
+        fwd_window = bwd_window
+    if min_periods_fwd is None:
+        min_periods_fwd = min_periods_bwd
+    if reduce_window is None:
+        reduce_window = f"{int(pd.Timedelta(bwd_window).total_seconds() + pd.Timedelta(fwd_window).total_seconds())}s"
+
+    roller = customRoller(data_ser, window=bwd_window)
+    bwd_start, bwd_end = roller.window.get_window_bounds(len(data_ser), min_periods=min_periods_bwd, closed=closed)
+
+    roller = customRoller(data_ser, window=fwd_window, forward=True)
+    fwd_start, fwd_end = roller.window.get_window_bounds(len(data_ser), min_periods=min_periods_fwd, closed=closed)
+
+    min_mask = ~((fwd_end - fwd_start <= min_periods_fwd) | (bwd_end - bwd_start <= min_periods_bwd))
+    fwd_end = fwd_end[min_mask]
+    split = bwd_end[min_mask]
+    bwd_start = bwd_start[min_mask]
+    masked_index = data_ser.index[min_mask]
+    check_len = len(fwd_end)
+    data_arr = data_ser.values
+
+    if try_to_jit:
+        jit_sf = numba.jit(stat_func, nopython=True)
+        jit_tf = numba.jit(thresh_func, nopython=True)
+        try:
+            jit_sf(data_arr[bwd_start[0]:bwd_end[0]], data_arr[fwd_start[0]:fwd_end[0]])
+            jit_tf(data_arr[bwd_start[0]:bwd_end[0]], data_arr[fwd_start[0]:fwd_end[0]])
+            stat_func = jit_sf
+            thresh_func = jit_tf
+            try_to_jit = True
+        except numba.core.errors.TypingError:
+            try_to_jit = False
+            logging.warning('Could not jit passed statistic - omitting jitting!')
+
+    if try_to_jit:
+        stat_arr, thresh_arr = _slidingWindowSearchNumba(data_arr, bwd_start, fwd_end, split, stat_func, thresh_func,
+                                                    check_len)
+    else:
+        stat_arr, thresh_arr = _slidingWindowSearch(data_arr, bwd_start, fwd_end, split, stat_func, thresh_func,
+                                                    check_len)
+    result_arr = stat_arr > thresh_arr
+
+    if model_by_resids:
+        residues = pd.Series(np.nan, index=data[field].index)
+        residues[masked_index] = stat_arr
+        data[field] = residues
+        flagger = flagger.setFlags(field, flag=flagger.UNFLAGGED, force=True, **kwargs)
+        return data, flagger
+
+    det_index = masked_index[result_arr]
+    detected = pd.Series(True, index=det_index)
+    if reduce_window is not False:
+        l = detected.shape[0]
+        roller = customRoller(detected, window=reduce_window)
+        start, end = roller.window.get_window_bounds(num_values=l, min_periods=1, closed='both', center=True)
+
+        detected = _reduceCPCluster(stat_arr[result_arr], thresh_arr[result_arr], start, end, reduce_func, l)
+        det_index = det_index[detected]
+
+    cluster = pd.Series(False, index=data[field].index)
+    cluster[det_index] = True
+    cluster = cluster.cumsum()
+    # (better to start cluster labels with number one)
+    cluster += 1
+    data[field] = cluster
+    flagger = flagger.setFlags(field, flag=flagger.UNFLAGGED, force=True, **kwargs)
+    if flag_changepoints:
+        flagger = flagger.setFlags(field, loc=det_index)
+    return data, flagger
diff --git a/saqc/funcs/pattern_rec.py b/saqc/funcs/pattern_rec.py
new file mode 100644
index 0000000000000000000000000000000000000000..83f392df76a9c0dfc5a1a1af2e7fce108c92caf9
--- /dev/null
+++ b/saqc/funcs/pattern_rec.py
@@ -0,0 +1,152 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pandas as pd
+import dtw
+import pywt
+from mlxtend.evaluate import permutation_test
+
+from saqc.core.register import register
+from saqc.lib.tools import customRoller
+
+
+@register(masking='field')
+def flagPattern_wavelet(data, field, flagger, ref_field, widths=(1, 2, 4, 8), waveform='mexh', **kwargs):
+    """
+    Pattern recognition via wavelets.
+
+    The steps are:
+     1. work on chunks returned by a moving window
+     2. each chunk is compared to the given pattern, using the wavelet algorithm as presented in [1]
+     3. if the compared chunk is equal to the given pattern it gets flagged
+
+    Parameters
+    ----------
+
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to correct.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    ref_field: str
+        The fieldname in `data' which holds the pattern.
+    widths: tuple of int
+        Widths for wavelet decomposition. [1] recommends a dyadic scale. Default: (1,2,4,8)
+    waveform: str.
+        Wavelet to be used for decomposition. Default: 'mexh'. See [2] for a list.
+
+    kwargs
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+
+    References
+    ----------
+
+    The underlying pattern recognition algorithm using wavelets is documented here:
+    [1] Maharaj, E.A. (2002): Pattern Recognition of Time Series using Wavelets. In: Härdle W., Rönz B. (eds) Compstat. Physica, Heidelberg, 978-3-7908-1517-7.
+
+    The documentation of the python package used for the wavelt decomposition can be found here:
+    [2] https://pywavelets.readthedocs.io/en/latest/ref/cwt.html#continuous-wavelet-families
+    """
+
+    ref = data[ref_field].to_numpy()
+    cwtmat_ref, _ = pywt.cwt(ref, widths, waveform)
+    wavepower_ref = np.power(cwtmat_ref, 2)
+    len_width = len(widths)
+
+    def func(x, y):
+        return x.sum() / y.sum()
+
+    def isPattern(chunk):
+        cwtmat_chunk, _ = pywt.cwt(chunk, widths, waveform)
+        wavepower_chunk = np.power(cwtmat_chunk, 2)
+
+        # Permutation test on Powersum of matrix
+        for i in range(len_width):
+            x = wavepower_ref[i]
+            y = wavepower_chunk[i]
+            pval = permutation_test(x, y, method='approximate', num_rounds=200, func=func, seed=0)
+            if min(pval, 1 - pval) > 0.01:
+                return True
+        return False
+
+    dat = data[field]
+    sz = len(ref)
+    mask = customRoller(dat, window=sz, min_periods=sz).apply(isPattern, raw=True)
+
+    flagger = flagger.setFlags(field, loc=mask, **kwargs)
+    return data, flagger
+
+
+@register(masking='field')
+def flagPattern_dtw(data, field, flagger, ref_field, max_distance=0.03, normalize=True, **kwargs):
+    """ Pattern Recognition via Dynamic Time Warping.
+
+    The steps are:
+     1. work on chunks returned by a moving window
+     2. each chunk is compared to the given pattern, using the dynamic time warping algorithm as presented in [1]
+     3. if the compared chunk is equal to the given pattern it gets flagged
+
+    Parameters
+    ----------
+
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to correct.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    ref_field: str
+        The fieldname in `data` which holds the pattern.
+    max_distance: float
+        Maximum dtw-distance between partition and pattern, so that partition is recognized as pattern. Default: 0.03
+    normalize: boolean.
+        Normalizing dtw-distance (see [1]). Default: True
+
+
+    kwargs
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+
+    References
+    ----------
+    Find a nice description of underlying the Dynamic Time Warping Algorithm here:
+
+    [1] https://cran.r-project.org/web/packages/dtw/dtw.pdf
+    """
+    ref = data[ref_field]
+    ref_var = ref.var()
+
+    def func(a, b):
+        return np.linalg.norm(a - b)
+
+    def isPattern(chunk):
+        dist, *_ = dtw.dtw(chunk, ref, func)
+        if normalize:
+            dist /= ref_var
+        return dist < max_distance
+
+    dat = data[field]
+    sz = len(ref)
+    mask = customRoller(dat, window=sz, min_periods=sz).apply(isPattern, raw=True)
+
+    flagger = flagger.setFlags(field, loc=mask, **kwargs)
+    return data, flagger
diff --git a/saqc/funcs/proc_functions.py b/saqc/funcs/proc_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa6974c781e90c49520c63300bfae6c15af426ae
--- /dev/null
+++ b/saqc/funcs/proc_functions.py
@@ -0,0 +1,1265 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import pandas as pd
+import numpy as np
+from saqc.core.register import register
+from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq, shift2Freq, expModelFunc
+from saqc.funcs.breaks_detection import breaks_flagRegimeAnomaly
+from saqc.funcs.modelling import modelling_changePointCluster
+from saqc.lib.tools import toSequence, mergeDios, dropper, mutateIndex, detectDeviants, evalFreqStr
+import dios
+import functools
+from scipy.optimize import curve_fit
+from sklearn.linear_model import LinearRegression
+from sklearn.utils import resample
+
+
+ORIGINAL_SUFFIX = "_original"
+
+METHOD2ARGS = {
+    "inverse_fshift": ("backward", pd.Timedelta),
+    "inverse_bshift": ("forward", pd.Timedelta),
+    "inverse_nshift": ("nearest", lambda x: pd.Timedelta(x) / 2),
+    "inverse_fagg": ("bfill", pd.Timedelta),
+    "inverse_bagg": ("ffill", pd.Timedelta),
+    "inverse_nagg": ("nearest", lambda x: pd.Timedelta(x) / 2),
+    "match": (None, lambda x: "0min"),
+}
+
+
+@register(masking='field')
+def proc_rollingInterpolateMissing(
+    data, field, flagger, winsz, func=np.median, center=True, min_periods=0, interpol_flag="UNFLAGGED", **kwargs
+):
+    """
+    Interpolates missing values (nan values present in the data) by assigning them the aggregation result of
+    a window surrounding them.
+
+    Note, that in the current implementation, center=True can only be used with integer window sizes - furthermore
+    note, that integer window sizes can yield screwed aggregation results for not-harmonized or irregular data.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-interpolated.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    winsz : int, str
+        The size of the window, the aggregation is computed from. Either counted in periods number (Integer passed),
+        or defined by a total temporal extension (offset String passed).
+    func : Callable
+        The function used for aggregation.
+    center : bool, default True
+        Wheather or not the window, the aggregation is computed of, is centered around the value to be interpolated.
+    min_periods : int
+        Minimum number of valid (not np.nan) values that have to be available in a window for its aggregation to be
+        computed.
+    interpol_flag : {'GOOD', 'BAD', 'UNFLAGGED', str}, default 'UNFLAGGED'
+        Flag that is to be inserted for the interpolated values. You can either pass one of the three major flag-classes
+        or specify directly a certain flag from the passed flagger.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
+    data = data.copy()
+    datcol = data[field]
+    roller = datcol.rolling(window=winsz, center=center, min_periods=min_periods)
+    try:
+        func_name = func.__name__
+        if func_name[:3] == "nan":
+            func_name = func_name[3:]
+        rolled = getattr(roller, func_name)()
+    except AttributeError:
+        rolled = roller.apply(func)
+
+    na_mask = datcol.isna()
+    interpolated = na_mask & ~rolled.isna()
+    datcol[na_mask] = rolled[na_mask]
+    data[field] = datcol
+
+    if interpol_flag:
+        if interpol_flag in ["BAD", "UNFLAGGED", "GOOD"]:
+            interpol_flag = getattr(flagger, interpol_flag)
+        flagger = flagger.setFlags(field, loc=interpolated, force=True, flag=interpol_flag, **kwargs)
+
+    return data, flagger
+
+
+@register(masking='field')
+def proc_interpolateMissing(
+    data,
+    field,
+    flagger,
+    method,
+    inter_order=2,
+    inter_limit=2,
+    interpol_flag="UNFLAGGED",
+    downgrade_interpolation=False,
+    not_interpol_flags=None,
+    **kwargs
+):
+
+    """
+    Function to interpolate nan values in the data.
+
+    There are available all the interpolation methods from the pandas.interpolate method and they are applicable by
+    the very same key words, that you would pass to the ``pd.Series.interpolate``'s method parameter.
+
+    Note, that the `inter_limit` keyword really restricts the interpolation to chunks, not containing more than
+    `inter_limit` successive nan entries.
+
+    Note, that the function differs from ``proc_interpolateGrid``, in its behaviour to ONLY interpolate nan values that
+    were already present in the data passed.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-interpolated.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    method : {"linear", "time", "nearest", "zero", "slinear", "quadratic", "cubic", "spline", "barycentric",
+        "polynomial", "krogh", "piecewise_polynomial", "spline", "pchip", "akima"}: string
+        The interpolation method you want to apply.
+    inter_order : int, default 2
+        If there your selected interpolation method can be performed at different 'orders' - here you pass the desired
+        order.
+    inter_limit : int, default 2
+        Maximum number of consecutive 'nan' values allowed for a gap to be interpolated.
+    interpol_flag : {'GOOD', 'BAD', 'UNFLAGGED', str}, default 'UNFLAGGED'
+        Flag that is to be inserted for the interpolated values. You can either pass one of the three major flag-classes
+        or specify directly a certain flag from the passed flagger.
+    downgrade_interpolation : bool, default False
+        If interpolation can not be performed at `inter_order` - (not enough values or not implemented at this order) -
+        automaticalyy try to interpolate at order `inter_order` :math:`- 1`.
+    not_interpol_flags : {None, str, List[str]}, default None
+        A list of flags or a single Flag, marking values, you want NOT to be interpolated.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
+    data = data.copy()
+    inter_data = interpolateNANs(
+        data[field],
+        method,
+        order=inter_order,
+        inter_limit=inter_limit,
+        downgrade_interpolation=downgrade_interpolation,
+        return_chunk_bounds=False,
+    )
+    interpolated = data[field].isna() & inter_data.notna()
+
+    if not_interpol_flags:
+        for f in toSequence(not_interpol_flags):
+            if f in ["BAD", "UNFLAGGED", "GOOD"]:
+                f = getattr(flagger, interpol_flag)
+            is_flagged = flagger.isFlagged(flag=f)[field]
+            cond = is_flagged & interpolated
+            inter_data.mask(cond, np.nan, inplace=True)
+        interpolated &= inter_data.notna()
+
+    if interpol_flag:
+        if interpol_flag in ["BAD", "UNFLAGGED", "GOOD"]:
+            interpol_flag = getattr(flagger, interpol_flag)
+        flagger = flagger.setFlags(field, loc=interpolated, force=True, flag=interpol_flag, **kwargs)
+
+    data[field] = inter_data
+    return data, flagger
+
+
+@register(masking='field')
+def proc_interpolateGrid(
+        data,
+        field,
+        flagger,
+        freq,
+        method,
+        inter_order=2,
+        to_drop=None,
+        downgrade_interpolation=False,
+        empty_intervals_flag=None,
+        grid_field=None,
+        inter_limit=2,
+        freq_check=None,
+        **kwargs):
+
+    """
+    Function to interpolate the data at regular (equidistant) timestamps (or Grid points).
+
+    Note, that the interpolation will only be calculated, for grid timestamps that have a preceding AND a succeeding
+    valid data value within "freq" range.
+
+    Note, that the function differs from proc_interpolateMissing, by returning a whole new data set, only containing
+    samples at the interpolated, equidistant timestamps (of frequency "freq").
+
+    Note, it is possible to interpolate unregular "grids" (with no frequencies). In fact, any date index
+    can be target of the interpolation. Just pass the field name of the variable, holding the index
+    you want to interpolate, to "grid_field". 'freq' is then use to determine the maximum gap size for
+    a grid point to be interpolated.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-interpolated.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    freq : str
+        An Offset String, interpreted as the frequency of
+        the grid you want to interpolate your data at.
+    method : {"linear", "time", "nearest", "zero", "slinear", "quadratic", "cubic", "spline", "barycentric",
+        "polynomial", "krogh", "piecewise_polynomial", "spline", "pchip", "akima"}: string
+        The interpolation method you want to apply.
+    inter_order : integer, default 2
+        If there your selected interpolation method can be performed at different 'orders' - here you pass the desired
+        order.
+    to_drop : {None, str, List[str]}, default None
+        Flags that refer to values you want to drop before interpolation - effectively excluding grid points from
+        interpolation, that are only surrounded by values having a flag in them, that is listed in drop flags. Default
+        results in the flaggers *BAD* flag to be the drop_flag.
+    downgrade_interpolation : bool, default False
+        If interpolation can not be performed at `inter_order` - (not enough values or not implemented at this order) -
+        automatically try to interpolate at order `inter_order` :math:`- 1`.
+    empty_intervals_flag : str, default None
+        A Flag, that you want to assign to those values in the resulting equidistant sample grid, that were not
+        surrounded by valid data in the original dataset, and thus were not interpolated. Default automatically assigns
+        ``flagger.BAD`` flag to those values.
+    grid_field : String, default None
+        Use the timestamp of another variable as (not necessarily regular) "grid" to be interpolated.
+    inter_limit : Integer, default 2
+        Maximum number of consecutive Grid values allowed for interpolation. If set
+        to *n*, chunks of *n* and more consecutive grid values, where there is no value in between, wont be
+        interpolated.
+    freq_check : {None, 'check', 'auto'}, default None
+
+        * ``None``: do not validate frequency-string passed to `freq`
+        * ``'check'``: estimate frequency and log a warning if estimate miss matchs frequency string passed to 'freq', or
+          if no uniform sampling rate could be estimated
+        * ``'auto'``: estimate frequency and use estimate. (Ignores `freq` parameter.)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+
+    datcol = data[field]
+    datcol = datcol.copy()
+    flagscol = flagger.getFlags(field)
+    freq = evalFreqStr(freq, freq_check, datcol.index)
+    if empty_intervals_flag is None:
+        empty_intervals_flag = flagger.BAD
+
+    drop_mask = dropper(field, to_drop, flagger, flagger.BAD)
+    drop_mask |= flagscol.isna()
+    drop_mask |= datcol.isna()
+    datcol[drop_mask] = np.nan
+    datcol.dropna(inplace=True)
+    freq = evalFreqStr(freq, freq_check, datcol.index)
+    if datcol.empty:
+        data[field] = datcol
+        reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+        flagger = flagger.slice(drop=field).merge(reshaped_flagger, subset=[field], inplace=True)
+        return data, flagger
+    # account for annoying case of subsequent frequency aligned values, differing exactly by the margin
+    # 2*freq:
+    spec_case_mask = datcol.index.to_series()
+    spec_case_mask = spec_case_mask - spec_case_mask.shift(1)
+    spec_case_mask = spec_case_mask == 2 * pd.Timedelta(freq)
+    spec_case_mask = spec_case_mask[spec_case_mask]
+    spec_case_mask = spec_case_mask.resample(freq).asfreq().dropna()
+
+    if not spec_case_mask.empty:
+        spec_case_mask = spec_case_mask.tshift(-1, freq)
+
+    # prepare grid interpolation:
+    if grid_field is None:
+        grid_index = pd.date_range(start=datcol.index[0].floor(freq), end=datcol.index[-1].ceil(freq), freq=freq,
+                                   name=datcol.index.name)
+    else:
+        grid_index = data[grid_field].index
+
+
+    aligned_start = datcol.index[0] == grid_index[0]
+    aligned_end = datcol.index[-1] == grid_index[-1]
+    datcol = datcol.reindex(datcol.index.join(grid_index, how="outer",))
+
+    # do the interpolation
+    inter_data, chunk_bounds = interpolateNANs(
+        datcol, method, order=inter_order, inter_limit=inter_limit, downgrade_interpolation=downgrade_interpolation,
+        return_chunk_bounds=True
+    )
+
+    if grid_field is None:
+        # override falsely interpolated values:
+        inter_data[spec_case_mask.index] = np.nan
+
+    # store interpolated grid
+    inter_data = inter_data[grid_index]
+    data[field] = inter_data
+
+    # flags reshaping (dropping data drops):
+    flagscol.drop(flagscol[drop_mask].index, inplace=True)
+
+    if grid_field is not None:
+        # only basic flag propagation supported for custom grids (take worst from preceeding/succeeding)
+        preceeding = flagscol.reindex(grid_index, method='ffill', tolerance=freq)
+        succeeding = flagscol.reindex(grid_index, method='bfill', tolerance=freq)
+        # check for too big gaps in the source data and drop the values interpolated in those too big gaps
+        na_mask = preceeding.isna() | succeeding.isna()
+        na_mask = na_mask[na_mask]
+        preceeding.drop(na_mask.index, inplace=True)
+        succeeding.drop(na_mask.index, inplace=True)
+        inter_data.drop(na_mask.index, inplace=True)
+        data[field] = inter_data
+        mask = succeeding > preceeding
+        preceeding.loc[mask] = succeeding.loc[mask]
+        flagscol = preceeding
+        flagger_new = flagger.initFlags(inter_data).setFlags(field, flag=flagscol, force=True, **kwargs)
+        flagger = flagger.slice(drop=field).merge(flagger_new)
+        return data, flagger
+
+    # for freq defined grids, max-aggregate flags of every grid points freq-ranged surrounding
+    # hack ahead! Resampling with overlapping intervals:
+    # 1. -> no rolling over categories allowed in pandas, so we translate manually:
+    cats = pd.CategoricalIndex(flagger.dtype.categories, ordered=True)
+    cats_dict = {cats[i]: i for i in range(0, len(cats))}
+    flagscol = flagscol.replace(cats_dict)
+    # 3. -> combine resample+rolling to resample with overlapping intervals:
+    flagscol = flagscol.resample(freq).max()
+    initial = flagscol[0]
+    flagscol = flagscol.rolling(2, center=True, closed="neither").max()
+    flagscol[0] = initial
+    cats_dict = {num: key for (key, num) in cats_dict.items()}
+    flagscol = flagscol.astype(int, errors="ignore").replace(cats_dict)
+    flagscol[flagscol.isna()] = empty_intervals_flag
+    # ...hack done
+
+    # we might miss the flag for interpolated data grids last entry (if we miss it - the datapoint is always nan
+    # - just settling a convention here(resulting GRID should start BEFORE first valid data entry and range to AFTER
+    # last valid data)):
+    if inter_data.shape[0] > flagscol.shape[0]:
+        flagscol = flagscol.append(pd.Series(empty_intervals_flag, index=[datcol.index[-1]]))
+
+    # Additional consistency operation: we have to block first/last interpolated datas flags - since they very
+    # likely represent chunk starts/ends (except data start and or end timestamp were grid-aligned before Grid
+    # interpolation already.)
+    if np.isnan(inter_data[0]) and not aligned_start:
+        chunk_bounds = chunk_bounds.insert(0, inter_data.index[0])
+    if np.isnan(inter_data[-1]) and not aligned_end:
+        chunk_bounds = chunk_bounds.append(pd.DatetimeIndex([inter_data.index[-1]]))
+    chunk_bounds = chunk_bounds.unique()
+    flagger_new = flagger.initFlags(inter_data).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+
+    # block chunk ends of interpolation
+    flags_to_block = pd.Series(np.nan, index=chunk_bounds).astype(flagger_new.dtype)
+    flagger_new = flagger_new.setFlags(field, loc=chunk_bounds, flag=flags_to_block, force=True, inplace=True)
+
+    flagger = flagger.slice(drop=field).merge(flagger_new, subset=[field], inplace=True)
+    return data, flagger
+
+
+@register(masking='field')
+def proc_resample(
+    data,
+    field,
+    flagger,
+    freq,
+    agg_func=np.mean,
+    method="bagg",
+    max_invalid_total_d=np.inf,
+    max_invalid_consec_d=np.inf,
+    max_invalid_consec_f=np.inf,
+    max_invalid_total_f=np.inf,
+    flag_agg_func=max,
+    empty_intervals_flag=None,
+    to_drop=None,
+    all_na_2_empty=False,
+    freq_check=None,
+    **kwargs
+):
+    """
+    Function to resample the data. Afterwards the data will be sampled at regular (equidistant) timestamps
+    (or Grid points). Sampling intervals therefor get aggregated with a function, specifyed by 'agg_func' parameter and
+    the result gets projected onto the new timestamps with a method, specified by "method". The following method
+    (keywords) are available:
+
+    * ``'nagg'``: all values in the range (+/- `freq`/2) of a grid point get aggregated with agg_func and assigned to it.
+    * ``'bagg'``: all values in a sampling interval get aggregated with agg_func and the result gets assigned to the last
+      grid point.
+    * ``'fagg'``: all values in a sampling interval get aggregated with agg_func and the result gets assigned to the next
+      grid point.
+
+
+    Note, that. if possible, functions passed to agg_func will get projected internally onto pandas.resample methods,
+    wich results in some reasonable performance boost - however, for this to work, you should pass functions that have
+    the __name__ attribute initialised and the according methods name assigned to it.
+    Furthermore, you shouldnt pass numpys nan-functions
+    (``nansum``, ``nanmean``,...) because those for example, have ``__name__ == 'nansum'`` and they will thus not
+    trigger ``resample.func()``, but the slower ``resample.apply(nanfunc)``. Also, internally, no nans get passed to
+    the functions anyway, so that there is no point in passing the nan functions.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-resampled.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    freq : str
+        An Offset String, that will be interpreted as the frequency you want to resample your data with.
+    agg_func : Callable
+        The function you want to use for aggregation.
+    method: {'fagg', 'bagg', 'nagg'}, default 'bagg'
+        Specifies which intervals to be aggregated for a certain timestamp. (preceding, succeeding or
+        "surrounding" interval). See description above for more details.
+    max_invalid_total_d : {np.inf, int}, np.inf
+        Maximum number of invalid (nan) datapoints, allowed per resampling interval. If max_invalid_total_d is
+        exceeded, the interval gets resampled to nan. By default (``np.inf``), there is no bound to the number of nan
+        values in an interval and only intervals containing ONLY nan values or those, containing no values at all,
+        get projected onto nan
+    max_invalid_consec_d : {np.inf, int}, default np.inf
+        Maximum number of consecutive invalid (nan) data points, allowed per resampling interval.
+        If max_invalid_consec_d is exceeded, the interval gets resampled to nan. By default (np.inf),
+        there is no bound to the number of consecutive nan values in an interval and only intervals
+        containing ONLY nan values, or those containing no values at all, get projected onto nan.
+    max_invalid_total_f : {np.inf, int}, default np.inf
+        Same as `max_invalid_total_d`, only applying for the flags. The flag regarded as "invalid" value,
+        is the one passed to empty_intervals_flag (default=``flagger.BAD``).
+        Also this is the flag assigned to invalid/empty intervals.
+    max_invalid_total_f : {np.inf, int}, default np.inf
+        Same as `max_invalid_total_f`, only applying onto flags. The flag regarded as "invalid" value, is the one passed
+        to empty_intervals_flag (default=flagger.BAD). Also this is the flag assigned to invalid/empty intervals.
+    flag_agg_func : Callable, default: max
+        The function you want to aggregate the flags with. It should be capable of operating on the flags dtype
+        (usually ordered categorical).
+    empty_intervals_flag : {None, str}, default None
+        A Flag, that you want to assign to invalid intervals. Invalid are those intervals, that contain nan values only,
+        or no values at all. Furthermore the empty_intervals_flag is the flag, serving as "invalid" identifyer when
+        checking for `max_total_invalid_f` and `max_consec_invalid_f patterns`. Default triggers ``flagger.BAD`` to be
+        assigned.
+    to_drop : {None, str, List[str]}, default None
+        Flags that refer to values you want to drop before resampling - effectively excluding values that are flagged
+        with a flag in to_drop from the resampling process - this means that they also will not be counted in the
+        the `max_consec`/`max_total evaluation`. `to_drop` = ``None`` results in NO flags being dropped initially.
+    freq_check : {None, 'check', 'auto'}, default None
+
+        * ``None``: do not validate frequency-string passed to `freq`
+        * ``'check'``: estimate frequency and log a warning if estimate miss matchs frequency string passed to 'freq', or
+          if no uniform sampling rate could be estimated
+        * ``'auto'``: estimate frequency and use estimate. (Ignores `freq` parameter.)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+
+    data = data.copy()
+    datcol = data[field]
+    flagscol = flagger.getFlags(field)
+    if empty_intervals_flag is None:
+        empty_intervals_flag = flagger.BAD
+
+    drop_mask = dropper(field, to_drop, flagger, [])
+    datcol.drop(datcol[drop_mask].index, inplace=True)
+    freq = evalFreqStr(freq, freq_check, datcol.index)
+    flagscol.drop(flagscol[drop_mask].index, inplace=True)
+    if all_na_2_empty:
+        if datcol.dropna().empty:
+            datcol = pd.Series([], index=pd.DatetimeIndex([]), name=field)
+
+    if datcol.empty:
+        # for consistency reasons - return empty data/flags column when there is no valid data left
+        # after filtering.
+        data[field] = datcol
+        reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+        flagger = flagger.slice(drop=field).merge(reshaped_flagger, subset=[field], inplace=True)
+        return data, flagger
+
+    datcol = aggregate2Freq(
+        datcol,
+        method,
+        freq,
+        agg_func,
+        fill_value=np.nan,
+        max_invalid_total=max_invalid_total_d,
+        max_invalid_consec=max_invalid_consec_d,
+    )
+    flagscol = aggregate2Freq(
+        flagscol,
+        method,
+        freq,
+        flag_agg_func,
+        fill_value=empty_intervals_flag,
+        max_invalid_total=max_invalid_total_f,
+        max_invalid_consec=max_invalid_consec_f,
+    )
+
+    # data/flags reshaping:
+    data[field] = datcol
+    reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+    flagger = flagger.slice(drop=field).merge(reshaped_flagger, subset=[field], inplace=True)
+    return data, flagger
+
+
+@register(masking='field')
+def proc_shift(data, field, flagger, freq, method, to_drop=None, empty_intervals_flag=None, freq_check=None, **kwargs):
+    """
+    Function to shift data points to regular (equidistant) timestamps.
+    Values get shifted according to the keyword passed to the `method` parameter.
+
+    * ``'nshift'``: every grid point gets assigned the nearest value in its range. (range = +/- 0.5 * `freq`)
+    * ``'bshift'``:  every grid point gets assigned its first succeeding value - if there is one available in the
+      succeeding sampling interval.
+    * ``'fshift'``:  every grid point gets assigned its ultimately preceeding value - if there is one available in
+      the preceeding sampling interval.
+
+    Note: all data nans get excluded defaultly from shifting. If `to_drop` is ``None``, - all *BAD* flagged values get
+    excluded as well.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-shifted.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    freq : str
+        An frequency Offset String that will be interpreted as the sampling rate you want the data to be shifted to.
+    method: {'fagg', 'bagg', 'nagg'}, default 'nshift'
+        Specifies if datapoints get propagated forwards, backwards or to the nearest grid timestamp. See function
+        description for more details.
+    empty_intervals_flag : {None, str}, default None
+        A Flag, that you want to assign to grid points, where no values are avaible to be shifted to.
+        Default triggers flagger.BAD to be assigned.
+    to_drop : {None, str, List[str]}, default None
+        Flags that refer to values you want to drop before shifting - effectively, excluding values that are flagged
+        with a flag in to_drop from the shifting process. Default - to_drop = None  - results in flagger.BAD
+        values being dropped initially.
+    freq_check : {None, 'check', 'auto'}, default None
+
+        * ``None``: do not validate frequency-string passed to `freq`
+        * ``'check'``: estimate frequency and log a warning if estimate miss matches frequency string passed to `freq`,
+          or if no uniform sampling rate could be estimated
+        * ``'auto'``: estimate frequency and use estimate. (Ignores `freq` parameter.)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values and shape may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+    data = data.copy()
+    datcol = data[field]
+    flagscol = flagger.getFlags(field)
+
+    if empty_intervals_flag is None:
+        empty_intervals_flag = flagger.BAD
+
+    drop_mask = dropper(field, to_drop, flagger, flagger.BAD)
+    drop_mask |= datcol.isna()
+    datcol[drop_mask] = np.nan
+    datcol.dropna(inplace=True)
+    freq = evalFreqStr(freq, freq_check, datcol.index)
+    if datcol.empty:
+        data[field] = datcol
+        reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+        flagger = flagger.slice(drop=field).merge(reshaped_flagger, subset=[field], inplace=True)
+        return data, flagger
+
+    flagscol.drop(drop_mask[drop_mask].index, inplace=True)
+
+    datcol = shift2Freq(datcol, method, freq, fill_value=np.nan)
+    flagscol = shift2Freq(flagscol, method, freq, fill_value=empty_intervals_flag)
+    data[field] = datcol
+    reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, inplace=True, **kwargs)
+    flagger = flagger.slice(drop=field).merge(reshaped_flagger, subset=[field], inplace=True)
+    return data, flagger
+
+
+@register(masking='field')
+def proc_transform(data, field, flagger, func, **kwargs):
+    """
+    Function to transform data columns with a transformation that maps series onto series of the same length.
+
+    Note, that flags get preserved.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-transformed.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    func : Callable
+        Function to transform data[field] with.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+    """
+    data = data.copy()
+    # NOTE: avoiding pd.Series.transform() in the line below, because transform does process columns element wise
+    # (so interpolati   ons wouldn't work)
+    new_col = pd.Series(func(data[field]), index=data[field].index)
+    data[field] = new_col
+    return data, flagger
+
+
+@register(masking='field')
+def proc_projectFlags(data, field, flagger, method, source, freq=None, to_drop=None, freq_check=None, **kwargs):
+
+    """
+    The Function projects flags of "source" onto flags of "field". Wherever the "field" flags are "better" then the
+    source flags projected on them, they get overridden with this associated source flag value.
+
+    Which "field"-flags are to be projected on which source flags, is controlled by the "method" and "freq"
+    parameters.
+
+    method: (field_flag in associated with "field", source_flags associated with "source")
+
+    'inverse_nagg' - all field_flags within the range +/- freq/2 of a source_flag, get assigned this source flags value.
+        (if source_flag > field_flag)
+    'inverse_bagg' - all field_flags succeeding a source_flag within the range of "freq", get assigned this source flags
+        value. (if source_flag > field_flag)
+    'inverse_fagg' - all field_flags preceeding a source_flag within the range of "freq", get assigned this source flags
+        value. (if source_flag > field_flag)
+
+    'inverse_interpolation' - all field_flags within the range +/- freq of a source_flag, get assigned this source flags value.
+        (if source_flag > field_flag)
+
+    'inverse_nshift' - That field_flag within the range +/- freq/2, that is nearest to a source_flag, gets the source
+        flags value. (if source_flag > field_flag)
+    'inverse_bshift' - That field_flag succeeding a source flag within the range freq, that is nearest to a
+        source_flag, gets assigned this source flags value. (if source_flag > field_flag)
+    'inverse_nshift' - That field_flag preceeding a source flag within the range freq, that is nearest to a
+        source_flag, gets assigned this source flags value. (if source_flag > field_flag)
+
+    'match' - any field_flag with a timestamp matching a source_flags timestamp gets this source_flags value
+    (if source_flag > field_flag)
+
+    Note, to undo or backtrack a resampling/shifting/interpolation that has been performed with a certain method,
+    you can just pass the associated "inverse" method. Also you should pass the same drop flags keyword.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to project the source-flags onto.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    method : {'inverse_fagg', 'inverse_bagg', 'inverse_nagg', 'inverse_fshift', 'inverse_bshift', 'inverse_nshift'}
+        The method used for projection of source flags onto field flags. See description above for more details.
+    source : str
+        The source source of flags projection.
+    freq : {None, str},default None
+        The freq determines the projection range for the projection method. See above description for more details.
+        Defaultly (None), the sampling frequency of source is used.
+    to_drop : {None, str, List[str]}, default None
+        Flags referring to values that are to drop before flags projection. Relevant only when projecting with an
+        inverted shift method. Defaultly flagger.BAD is listed.
+    freq_check : {None, 'check', 'auto'}, default None
+        - None: do not validate frequency-string passed to `freq`
+        - 'check': estimate frequency and log a warning if estimate miss matchs frequency string passed to 'freq', or
+            if no uniform sampling rate could be estimated
+        - 'auto': estimate frequency and use estimate. (Ignores `freq` parameter.)
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values and shape may have changed relatively to the flagger input.
+    """
+    flagscol, metacols = flagger.getFlags(source, full=True)
+    if flagscol.empty:
+        return data, flagger
+    target_datcol = data[field]
+    target_flagscol, target_metacols = flagger.getFlags(field, full=True)
+
+    if (freq is None) and (method != "match"):
+        freq_check = 'auto'
+
+    freq = evalFreqStr(freq, freq_check, flagscol.index)
+
+    if method[-13:] == "interpolation":
+        backprojected = flagscol.reindex(target_flagscol.index, method="bfill", tolerance=freq)
+        fwrdprojected = flagscol.reindex(target_flagscol.index, method="ffill", tolerance=freq)
+        b_replacement_mask = (backprojected > target_flagscol) & (backprojected >= fwrdprojected)
+        f_replacement_mask = (fwrdprojected > target_flagscol) & (fwrdprojected > backprojected)
+        target_flagscol.loc[b_replacement_mask] = backprojected.loc[b_replacement_mask]
+        target_flagscol.loc[f_replacement_mask] = fwrdprojected.loc[f_replacement_mask]
+
+        backprojected_meta = {}
+        fwrdprojected_meta = {}
+        for meta_key in target_metacols.keys():
+            backprojected_meta[meta_key] = metacols[meta_key].reindex(target_metacols[meta_key].index, method='bfill',
+                                                                      tolerance=freq)
+            fwrdprojected_meta[meta_key] = metacols[meta_key].reindex(target_metacols[meta_key].index, method='ffill',
+                                                                      tolerance=freq)
+            target_metacols[meta_key].loc[b_replacement_mask] = backprojected_meta[meta_key].loc[b_replacement_mask]
+            target_metacols[meta_key].loc[f_replacement_mask] = fwrdprojected_meta[meta_key].loc[f_replacement_mask]
+
+    if method[-3:] == "agg" or method == "match":
+        # Aggregation - Inversion
+        projection_method = METHOD2ARGS[method][0]
+        tolerance = METHOD2ARGS[method][1](freq)
+        flagscol = flagscol.reindex(target_flagscol.index, method=projection_method, tolerance=tolerance)
+        replacement_mask = flagscol > target_flagscol
+        target_flagscol.loc[replacement_mask] = flagscol.loc[replacement_mask]
+        for meta_key in target_metacols.keys():
+            metacols[meta_key] = metacols[meta_key].reindex(target_metacols[meta_key].index, method=projection_method,
+                                                            tolerance=tolerance)
+            target_metacols[meta_key].loc[replacement_mask] = metacols[meta_key].loc[replacement_mask]
+
+    if method[-5:] == "shift":
+        # NOTE: although inverting a simple shift seems to be a less complex operation, it has quite some
+        # code assigned to it and appears to be more verbose than inverting aggregation -
+        # that owes itself to the problem of BAD/invalid values blocking a proper
+        # shift inversion and having to be outsorted before shift inversion and re-inserted afterwards.
+        #
+        # starting with the dropping and its memorization:
+
+        drop_mask = dropper(field, to_drop, flagger, flagger.BAD)
+        drop_mask |= target_datcol.isna()
+        target_flagscol_drops = target_flagscol[drop_mask]
+        target_flagscol.drop(drop_mask[drop_mask].index, inplace=True)
+
+        # shift inversion
+        projection_method = METHOD2ARGS[method][0]
+        tolerance = METHOD2ARGS[method][1](freq)
+        flags_merged = pd.merge_asof(
+            flagscol,
+            pd.Series(target_flagscol.index.values, index=target_flagscol.index, name="pre_index"),
+            left_index=True,
+            right_index=True,
+            tolerance=tolerance,
+            direction=projection_method,
+        )
+        flags_merged.dropna(subset=["pre_index"], inplace=True)
+        flags_merged = flags_merged.set_index(["pre_index"]).squeeze()
+
+        # write flags to target
+        replacement_mask = flags_merged > target_flagscol.loc[flags_merged.index]
+        target_flagscol.loc[replacement_mask[replacement_mask].index] = flags_merged.loc[replacement_mask]
+
+        # reinsert drops
+        target_flagscol = target_flagscol.reindex(target_flagscol.index.join(target_flagscol_drops.index, how="outer"))
+        target_flagscol.loc[target_flagscol_drops.index] = target_flagscol_drops.values
+
+        for meta_key in target_metacols.keys():
+            target_metadrops = target_metacols[meta_key][drop_mask]
+            target_metacols[meta_key].drop(drop_mask[drop_mask].index, inplace=True)
+            meta_merged = pd.merge_asof(
+                metacols[meta_key],
+                pd.Series(target_metacols[meta_key].index.values, index=target_metacols[meta_key].index,
+                          name="pre_index"),
+                left_index=True,
+                right_index=True,
+                tolerance=tolerance,
+                direction=projection_method,
+            )
+            meta_merged.dropna(subset=["pre_index"], inplace=True)
+            meta_merged = meta_merged.set_index(["pre_index"]).squeeze()
+            # reinsert drops
+            target_metacols[meta_key][replacement_mask[replacement_mask].index] = meta_merged[replacement_mask]
+            target_metacols[meta_key] = target_metacols[meta_key].reindex(
+                target_metacols[meta_key].index.join(target_metadrops.index, how="outer"))
+            target_metacols[meta_key].loc[target_metadrops.index] = target_metadrops.values
+
+    flagger = flagger.setFlags(field, flag=target_flagscol, with_extra=True, **target_metacols)
+    return data, flagger
+
+
+@register(masking='none')
+def proc_fork(data, field, flagger, suffix=ORIGINAL_SUFFIX, **kwargs):
+    """
+    The function generates a copy of the data "field" and inserts it under the name field + suffix into the existing
+    data.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to fork (copy).
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    suffix: str
+        Substring to append to the forked data variables name.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        data shape may have changed relatively to the flagger input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags shape may have changed relatively to the flagger input.
+    """
+    return proc_copy(data, field, flagger, newfield=str(field) + suffix, **kwargs)
+
+
+@register(masking='none')
+def proc_copy(data, field, flagger, newfield, **kwargs):
+    """
+    The function generates a copy of the data "field" and inserts it under the name field + suffix into the existing
+    data.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to fork (copy).
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    suffix: str
+        Substring to append to the forked data variables name.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        data shape may have changed relatively to the flagger input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags shape may have changed relatively to the flagger input.
+    """
+
+    if newfield in flagger.flags.columns.union(data.columns):
+        raise ValueError(f"{field}: field already exist")
+
+    flags, extras = flagger.getFlags(field, full=True)
+    newflagger = flagger.replaceField(newfield, flags=flags, **extras)
+    newdata = data.copy()
+    newdata[newfield] = data[field].copy()
+    return newdata, newflagger
+
+
+@register(masking='none')
+def proc_drop(data, field, flagger, **kwargs):
+    """
+    The function drops field from the data dios and the flagger.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to drop.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        data shape may have changed relatively to the flagger input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags shape may have changed relatively to the flagger input.
+    """
+
+    data = data.copy()
+    del data[field]
+    flagger = flagger.replaceField(field, flags=None)
+    return data, flagger
+
+
+@register(masking='none')
+def proc_rename(data, field, flagger, new_name, **kwargs):
+    """
+    The function renames field to new name (in both, the flagger and the data).
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to rename.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    new_name : str
+        String, field is to be replaced with.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+    """
+    # store
+    s = data[field]
+    f, e = flagger.getFlags(field, full=True)
+
+    # delete
+    data = data.copy()
+    del data[field]
+    flagger = flagger.replaceField(field, flags=None)
+
+    # insert
+    data[new_name] = s
+    flagger = flagger.replaceField(new_name, inplace=True, flags=f, **e)
+
+    return data, flagger
+
+
+def _drift_fit(x, shift_target, cal_mean):
+    x_index = x.index - x.index[0]
+    x_data = x_index.total_seconds().values
+    x_data = x_data / x_data[-1]
+    y_data = x.values
+    origin_mean = np.mean(y_data[:cal_mean])
+    target_mean = np.mean(y_data[-cal_mean:])
+
+    def modelWrapper(x, c, a=origin_mean, target_mean=target_mean):
+        # final fitted curves val = target mean
+        b = (target_mean - a) / (np.exp(c) - 1)
+        return expModelFunc(x, a, b, c)
+
+    dataFitFunc = functools.partial(modelWrapper, a=origin_mean, target_mean=target_mean)
+
+    try:
+        fitParas, _ = curve_fit(dataFitFunc, x_data, y_data, bounds=([0], [np.inf]))
+        dataFit = dataFitFunc(x_data, fitParas[0])
+        b_val = (shift_target - origin_mean) / (np.exp(fitParas[0]) - 1)
+        dataShiftFunc = functools.partial(expModelFunc, a=origin_mean, b=b_val, c=fitParas[0])
+        dataShift = dataShiftFunc(x_data)
+    except RuntimeError:
+        dataFit = np.array([0] * len(x_data))
+        dataShift = np.array([0] * len(x_data))
+
+    return dataFit, dataShift
+
+
+@register(masking='all')
+def proc_seefoExpDriftCorrecture(data, field, flagger, maint_data_field, cal_mean=5, flag_maint_period=False,
+                                 check_maint='1h', **kwargs):
+    """
+    The function fits an exponential model to chunks of data[field].
+    It is assumed, that between maintenance events, there is a drift effect shifting the meassurements in a way, that
+    can be described by the model M:
+
+    M(t, a, b, c) = a + b(exp(c*t))
+
+    Where as the values y_0 and y_1, describing the mean value directly after the last maintenance event (y_0) and
+    directly before the next maintenance event (y_1), impose the following additional conditions on the drift model:.
+
+    M(0, a, b, c) = y0
+    M(1, a, b, c) = y1
+
+    Solving the equation, one obtains the one-parameter Model:
+
+    M_drift(t, c) = y0 + [(y1 - y0)/(exp(c) - )] * (exp(c*t) - 1)
+
+    For every datachunk in between maintenance events.
+
+    After having found the optimal parameter c*, the correction is performed by bending the fitted curve M_drift(t, c*),
+    in a way that it matches y2 at t=1 (,with y2 being the mean value observed directly after the end of the next
+    maintenance event).
+    This bended curve is given by:
+
+    M_shift(t, c*) = M(t, y0, [(y1 - y0)/(exp(c*) - )], c*)
+
+    And the new values at t are computed via:
+
+    new_vals(t) = old_vals(t) + M_shift(t) - M_drift(t)
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to correct.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    maint_data_field : str
+        The fieldname of the datacolumn holding the maintenance information.
+        The maint data is to expected to have following form:
+        The series' timestamp itself represents the beginning of a
+        maintenance event, wheras the values represent the endings of the maintenance intervals.
+    cal_mean : int, default 5
+        The number of values the mean is computed over, for obtaining the value level directly after and
+        directly before maintenance event. This values are needed for shift calibration. (see above description)
+    flag_maint_period : bool, default False
+        Wheather or not to flag BAD the values directly obtained while maintenance.
+    check_maint : bool, default True
+        Wheather or not to check, if the reported maintenance intervals match are plausible
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+    """
+
+
+    # 1: extract fit intervals:
+    if data[maint_data_field].empty:
+        return data, flagger
+    data = data.copy()
+    to_correct = data[field]
+    maint_data = data[maint_data_field]
+    drift_frame = pd.DataFrame({"drift_group": np.nan, to_correct.name: to_correct.values}, index=to_correct.index)
+
+    # group the drift frame
+    for k in range(0, maint_data.shape[0] - 1):
+        # assign group numbers for the timespans in between one maintenance ending and the beginning of the next
+        # maintenance time itself remains np.nan assigned
+        drift_frame.loc[maint_data.values[k] : pd.Timestamp(maint_data.index[k + 1]), "drift_group"] = k
+    drift_grouper = drift_frame.groupby("drift_group")
+    # define target values for correction
+    shift_targets = drift_grouper.aggregate(lambda x: x[:cal_mean].mean()).shift(-1)
+
+    for k, group in drift_grouper:
+        dataSeries = group[to_correct.name]
+        dataFit, dataShiftTarget = _drift_fit(dataSeries, shift_targets.loc[k, :][0], cal_mean)
+        dataFit = pd.Series(dataFit, index=group.index)
+        dataShiftTarget = pd.Series(dataShiftTarget, index=group.index)
+        dataShiftVektor = dataShiftTarget - dataFit
+        shiftedData = dataSeries + dataShiftVektor
+        to_correct[shiftedData.index] = shiftedData
+
+    if flag_maint_period:
+        to_flag = drift_frame["drift_group"]
+        to_flag = to_flag.drop(to_flag[: maint_data.index[0]].index)
+        to_flag = to_flag[to_flag.isna()]
+        flagger = flagger.setFlags(field, loc=to_flag, **kwargs)
+
+    data[field] = to_correct
+
+    return data, flagger
+
+
+@register
+def proc_seefoLinearDriftCorrecture(data, field, flagger, x_field, y_field, **kwargs):
+    """
+    Train a linear model that predicts data[y_field] by x_1*(data[x_field]) + x_0. (Least squares fit)
+
+    Then correct the data[field] via:
+
+    data[field] = data[field]*x_1 + x_0
+
+    Note, that data[x_field] and data[y_field] must be of equal length.
+    (Also, you might want them to be sampled at same timestamps.)
+
+    Parameters
+    ----------
+    x_field : String
+        Field name of x - data.
+    y_field : String
+        Field name of y - data.
+
+    """
+    data = data.copy()
+    datcol = data[field]
+    reg = LinearRegression()
+    reg.fit(data[x_field].values.reshape(-1,1), data[y_field].values)
+    datcol = (datcol * reg.coef_[0]) + reg.intercept_
+    data[field] = datcol
+    return data, flagger
+
+
+@register(masking='all')
+def proc_correctRegimeAnomaly(data, field, flagger, cluster_field, model, regime_transmission=None, x_date=False):
+    """
+    Function fits the passed model to the different regimes in data[field] and tries to correct
+    those values, that have assigned a negative label by data[cluster_field].
+
+    Currently, the only correction mode supported is the "parameter propagation."
+
+    This means, any regime :math:`z`, labeled negatively and being modeled by the parameters p, gets corrected via:
+
+    :math:`z_{correct} = z + (m(p^*) - m(p))`,
+
+    where :math:`p^*` denotes the parameter set belonging to the fit of the nearest not-negatively labeled cluster.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to correct.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    clusterfield : str
+        A string denoting the field in data, holding the cluster label for the data you want to correct.
+    model : Callable
+        The model function to be fitted to the regimes.
+        It must be a function of the form :math:`f(x, *p)`, where :math:`x` is the ``numpy.array`` holding the
+        independent variables and :math:`p` are the model parameters that are to be obtained by fitting.
+        Depending on the `x_date` parameter, independent variable x will either be the timestamps
+        of every regime transformed to seconds from epoch, or it will be just seconds, counting the regimes length.
+    regime_transmission : {None, str}, default None:
+        If an offset string is passed, a data chunk of length `regime_transimission` right at the
+        start and right at the end is ignored when fitting the model. This is to account for the
+        unreliability of data near the changepoints of regimes.
+    x_date : bool, default False
+        If True, use "seconds from epoch" as x input to the model func, instead of "seconds from regime start".
+
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger
+        The flagger object, holding flags and additional Informations related to `data`.
+    """
+
+    cluster_ser = data[cluster_field]
+    unique_successive = pd.unique(cluster_ser.values)
+    data_ser = data[field]
+    regimes = data_ser.groupby(cluster_ser)
+    para_dict = {}
+    x_dict = {}
+    x_mask = {}
+    if regime_transmission is not None:
+        # get seconds
+        regime_transmission = pd.Timedelta(regime_transmission).total_seconds()
+    for label, regime in regimes:
+        if x_date is False:
+            # get seconds data:
+            xdata = (regime.index - regime.index[0]).to_numpy(dtype=float)*10**(-9)
+        else:
+            # get seconds from epoch data
+            xdata = regime.index.to_numpy(dtype=float)*10**(-9)
+        ydata = regime.values
+        valid_mask = ~np.isnan(ydata)
+        if regime_transmission is not None:
+            valid_mask &= (xdata > xdata[0] + regime_transmission)
+            valid_mask &= (xdata < xdata[-1] - regime_transmission)
+        try:
+            p, pcov = curve_fit(model, xdata[valid_mask], ydata[valid_mask])
+        except (RuntimeError, ValueError):
+            p = np.array([np.nan])
+        para_dict[label] = p
+        x_dict[label] = xdata
+        x_mask[label] = valid_mask
+
+    first_normal = unique_successive > 0
+    first_valid = np.array([~pd.isna(para_dict[unique_successive[i]]).any() for i in range(0, unique_successive.shape[0])])
+    first_valid = np.where(first_normal & first_valid)[0][0]
+    last_valid = 1
+
+    for k in range(0, unique_successive.shape[0]):
+        if unique_successive[k] < 0 & (not pd.isna(para_dict[unique_successive[k]]).any()):
+            ydata = data_ser[regimes.groups[unique_successive[k]]].values
+            xdata = x_dict[unique_successive[k]]
+            ypara = para_dict[unique_successive[k]]
+            if k > 0:
+                target_para = para_dict[unique_successive[k-last_valid]]
+            else:
+                # first regime has no "last valid" to its left, so we use first valid to the right:
+                target_para = para_dict[unique_successive[k + first_valid]]
+            y_shifted = ydata + (model(xdata, *target_para) - model(xdata, *ypara))
+            data_ser[regimes.groups[unique_successive[k]]] = y_shifted
+            if k > 0:
+                last_valid += 1
+        elif pd.isna(para_dict[unique_successive[k]]).any() & (k > 0):
+            last_valid += 1
+        else:
+            last_valid = 1
+
+    data[field] = data_ser
+    return data, flagger
+
+
+@register(masking='all')
+def proc_offsetCorrecture(data, field, flagger, max_mean_jump, normal_spread, search_winsz, min_periods,
+                          regime_transmission=None):
+    """
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the data column, you want to correct.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    max_mean_jump : float
+        when searching for changepoints in mean - this is the threshold a mean difference in the
+        sliding window search must exceed to trigger changepoint detection.
+    normal_spread : float
+        threshold denoting the maximum, regimes are allowed to abolutely differ in their means
+        to form the "normal group" of values.
+    search_winsz : str
+        Size of the adjacent windows that are used to search for the mean changepoints.
+    min_periods : int
+        Minimum number of periods a search window has to contain, for the result of the changepoint
+        detection to be considered valid.
+    regime_transmission : {None, str}, default None:
+        If an offset string is passed, a data chunk of length `regime_transimission` right from the
+        start and right before the end of any regime is ignored when calculating a regimes mean for data correcture.
+        This is to account for the unrelyability of data near the changepoints of regimes.
+
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+        Data values may have changed relatively to the data input.
+    flagger : saqc.flagger
+        The flagger object, holding flags and additional Informations related to `data`.
+
+    """
+
+    data, flagger = proc_fork(data, field, flagger, '_CPcluster')
+    data, flagger = modelling_changePointCluster(data, field + '_CPcluster', flagger,
+                                                 lambda x, y: np.abs(np.mean(x) - np.mean(y)),
+                                                 lambda x, y: max_mean_jump,
+                                                 bwd_window=search_winsz,
+                                                 min_periods_bwd=min_periods)
+    data, flagger = breaks_flagRegimeAnomaly(data, field, flagger, field + '_CPcluster', normal_spread, set_flags=False)
+    data, flagger = proc_correctRegimeAnomaly(data, field, flagger, field + '_CPcluster',
+                                              lambda x, p1: np.array([p1] * x.shape[0]),
+                                              regime_transmission=regime_transmission)
+    data, flagger = proc_drop(data, field + '_CPcluster', flagger)
+
+    return data, flagger
diff --git a/saqc/funcs/register.py b/saqc/funcs/register.py
deleted file mode 100644
index b973b71b6952794e4adacdb55f1b6bb33572fd66..0000000000000000000000000000000000000000
--- a/saqc/funcs/register.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python
-
-from functools import partial
-from inspect import signature, _VAR_KEYWORD
-
-
-class Partial(partial):
-    def __init__(self, func, *args, **kwargs):
-        self._signature = signature(func)
-
-    @property
-    def signature(self):
-        out = []
-        for k, v in self._signature.parameters.items():
-            if v.kind != _VAR_KEYWORD:
-                out.append(k)
-        return tuple(out)
-
-
-# NOTE: will be filled by calls to register
-FUNC_MAP = {}
-
-
-def register():
-    def outer(func):
-        name = func.__name__
-        func = Partial(func, func_name=name)
-        FUNC_MAP[name] = func
-
-        def inner(*args, **kwargs):
-            return func(*args, **kwargs)
-
-        return inner
-
-    return outer
diff --git a/saqc/funcs/soil_moisture_tests.py b/saqc/funcs/soil_moisture_tests.py
index 1147cd662ea1f3f7766fbf8f47405d8491b0fda8..ecbe911a6a76f47e4ab6bf41b30dd95718d4c45f 100644
--- a/saqc/funcs/soil_moisture_tests.py
+++ b/saqc/funcs/soil_moisture_tests.py
@@ -4,16 +4,17 @@
 import numpy as np
 import pandas as pd
 import joblib
+import dios
 from scipy.signal import savgol_filter
 
 from saqc.funcs.breaks_detection import breaks_flagSpektrumBased
 from saqc.funcs.spikes_detection import spikes_flagSpektrumBased
 from saqc.funcs.constants_detection import constants_flagVarianceBased
-from saqc.funcs.register import register
+from saqc.core.register import register
 from saqc.lib.tools import retrieveTrustworthyOriginal
 
 
-@register()
+@register(masking='field')
 def sm_flagSpikes(
     data,
     field,
@@ -25,14 +26,59 @@ def sm_flagSpikes(
     noise_thresh=1,
     smooth_window="3h",
     smooth_poly_deg=2,
-    **kwargs
+    **kwargs,
 ):
 
     """
-    The Function provides just a call to flagSpikes_spektrumBased, with parameter defaults, that refer to:
+    The Function provides just a call to ``flagSpikes_spektrumBased``, with parameter defaults,
+    that refer to References [1].
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    raise_factor : float, default 0.15
+        Minimum relative value difference between two values to consider the latter as a spike candidate.
+        See condition (1) (or reference [2]).
+    deriv_factor : float, default 0.2
+        See condition (2) (or reference [2]).
+    noise_func : {'CoVar', 'rVar'}, default 'CoVar'
+        Function to calculate noisiness of the data surrounding potential spikes.
+        ``'CoVar'``: Coefficient of Variation
+        ``'rVar'``: Relative Variance
+    noise_window : str, default '12h'
+        An offset string that determines the range of the time window of the "surrounding" data of a potential spike.
+        See condition (3) (or reference [2]).
+    noise_thresh : float, default 1
+        Upper threshold for noisiness of data surrounding potential spikes. See condition (3) (or reference [2]).
+    smooth_window : {None, str}, default None
+        Size of the smoothing window of the Savitsky-Golay filter.
+        The default value ``None`` results in a window of two times the sampling rate (i.e. containing three values).
+    smooth_poly_deg : int, default 2
+        Degree of the polynomial used for fitting with the Savitsky-Golay filter.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    References
+    ----------
+    This Function is a generalization of the Spectrum based Spike flagging mechanism as presented in:
+
+    [1] Dorigo, W. et al: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
+
+    [2] https://git.ufz.de/rdm-software/saqc/-/blob/testfuncDocs/docs/funcs/FormalDescriptions.md#spikes_flagspektrumbased
 
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
     """
 
     return spikes_flagSpektrumBased(
@@ -46,11 +92,11 @@ def sm_flagSpikes(
         noise_thresh=noise_thresh,
         smooth_window=smooth_window,
         smooth_poly_deg=smooth_poly_deg,
-        **kwargs
+        **kwargs,
     )
 
 
-@register()
+@register(masking='field')
 def sm_flagBreaks(
     data,
     field,
@@ -64,14 +110,61 @@ def sm_flagBreaks(
     smooth=False,
     smooth_window="3h",
     smooth_poly_deg=2,
-    **kwargs
+    **kwargs,
 ):
 
     """
-    The Function provides just a call to flagBreaks_spektrumBased, with parameter defaults that refer to:
-
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
+    The Function provides just a call to flagBreaks_spektrumBased, with parameter defaults that refer to references [1].
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    thresh_rel : float, default 0.1
+        Float in [0,1]. See (1) of function description above to learn more
+    thresh_abs : float, default 0.01
+        Float > 0. See (2) of function descritpion above to learn more.
+    first_der_factor : float, default 10
+        Float > 0. See (3) of function descritpion above to learn more.
+    first_der_window_range : str, default '12h'
+        Offset string. See (3) of function description to learn more.
+    scnd_der_ratio_margin_1 : float, default 0.05
+        Float in [0,1]. See (4) of function descritpion above to learn more.
+    scnd_der_ratio_margin_2 : float, default 10
+        Float in [0,1]. See (5) of function descritpion above to learn more.
+    smooth : bool, default True
+        Method for obtaining dataseries' derivatives.
+        * False: Just take series step differences (default)
+        * True: Smooth data with a Savitzky Golay Filter before differentiating.
+    smooth_window : {None, str}, default 2
+        Effective only if `smooth` = True
+        Offset string. Size of the filter window, used to calculate the derivatives.
+    smooth_poly_deg : int, default 2
+        Effective only, if `smooth` = True
+        Polynomial order, used for smoothing with savitzk golay filter.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    [1] Dorigo,W. et al.: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
+
+    Find a brief mathematical description of the function here:
+
+    [2] https://git.ufz.de/rdm-software/saqc/-/blob/testfuncDocs/docs/funcs
+        /FormalDescriptions.md#breaks_flagspektrumbased
 
     """
     return breaks_flagSpektrumBased(
@@ -87,35 +180,52 @@ def sm_flagBreaks(
         smooth=smooth,
         smooth_window=smooth_window,
         smooth_poly_deg=smooth_poly_deg,
-        **kwargs
+        **kwargs,
     )
 
 
-@register()
+@register(masking='all')
 def sm_flagFrost(data, field, flagger, soil_temp_variable, window="1h", frost_thresh=0, **kwargs):
 
-    """This Function is an implementation of the soil temperature based Soil Moisture flagging, as presented in:
-
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
+    """
+    This Function is an implementation of the soil temperature based Soil Moisture flagging, as presented in
+    references [1]:
 
     All parameters default to the values, suggested in this publication.
 
     Function flags Soil moisture measurements by evaluating the soil-frost-level in the moment of measurement.
     Soil temperatures below "frost_level" are regarded as denoting frozen soil state.
 
-    :param data:                        The pandas dataframe holding the data-to-be flagged, as well as the reference
-                                        series. Data must be indexed by a datetime series.
-    :param field:                       Fieldname of the Soil moisture measurements field in data.
-    :param flagger:                     A flagger - object.
-                                        like thingies that refer to the data(including datestrings).
-    :param tolerated_deviation:         Offset String. Denoting the maximal temporal deviation,
-                                        the soil frost states timestamp is allowed to have, relative to the
-                                        data point to-be-flagged.
-    :param soil_temp_reference:         A STRING, denoting the fields name in data,
-                                        that holds the data series of soil temperature values,
-                                        the to-be-flagged values shall be checked against.
-    :param frost_level:                 Value level, the flagger shall check against, when evaluating soil frost level.
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    soil_temp_variable : str,
+        An offset string, denoting the fields name in data, that holds the data series of soil temperature values,
+        the to-be-flagged values shall be checked against.
+    window : str
+        An offset string denoting the maximal temporal deviation, the soil frost states timestamp is allowed to have,
+        relative to the data point to-be-flagged.
+    frost_thresh : float
+        Value level, the flagger shall check against, when evaluating soil frost level.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    [1] Dorigo,W. et al.: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
     """
 
     # retrieve reference series
@@ -138,7 +248,7 @@ def sm_flagFrost(data, field, flagger, soil_temp_variable, window="1h", frost_th
     return data, flagger
 
 
-@register()
+@register(masking='all')
 def sm_flagPrecipitation(
     data,
     field,
@@ -151,13 +261,12 @@ def sm_flagPrecipitation(
     std_factor=2,
     std_window="24h",
     ignore_missing=False,
-    **kwargs
+    **kwargs,
 ):
 
-    """This Function is an implementation of the precipitation based Soil Moisture flagging, as presented in:
-
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
+    """
+    This Function is an implementation of the precipitation based Soil Moisture flagging, as presented in
+    references [1].
 
     All parameters default to the values, suggested in this publication. (excluding porosity,sensor accuracy and
     sensor depth)
@@ -171,9 +280,9 @@ def sm_flagPrecipitation(
 
     A data point y_t is flagged an invalid soil moisture raise, if:
 
-    (1) y_t > y_(t-raise_window)
-    (2) y_t - y_(t-"std_factor_range") > "std_factor" * std(y_(t-"std_factor_range"),...,y_t)
-    (3) sum(prec(t-24h),...,prec(t)) > sensor_depth * sensor_accuracy * soil_porosity
+    (1) y_t > y_(t-`raise_window`)
+    (2) y_t - y_(t-`std_factor_range`) > `std_factor` * std(y_(t-`std_factor_range`),...,y_t)
+    (3) sum(prec(t-24h),...,prec(t)) > `sensor_depth` * `sensor_accuracy` * `soil_porosity`
 
     NOTE1: np.nan entries in the input precipitation series will be regarded as susipicious and the test will be
     omited for every 24h interval including a np.nan entrie in the original precipitation sampling rate.
@@ -182,27 +291,57 @@ def sm_flagPrecipitation(
     NOTE2: The function wont test any values that are flagged suspicious anyway - this may change in a future version.
 
 
-    :param data:                        The pandas dataframe holding the data-to-be flagged, as well as the reference
-                                        series. Data must be indexed by a datetime series and be harmonized onto a
-                                        time raster with seconds precision.
-    :param field:                       Fieldname of the Soil moisture measurements field in data.
-    :param flagger:                     A flagger - object. (saqc.flagger.X)
-    :param prec_variable:               Fieldname of the precipitation meassurements column in data.
-    :param sensor_depth:                Measurement depth of the soil moisture sensor, [m].
-    :param sensor_accuracy:             Accuracy of the soil moisture sensor, [-].
-    :param soil_porosity:               Porosity of moisture sensors surrounding soil, [-].
-    :param std_factor:                  The value determines by which rule it is decided, weather a raise in soil
-                                        moisture is significant enough to trigger the flag test or not:
-                                        Significants is assumed, if the raise is  greater then "std_factor" multiplied
-                                        with the last 24 hours standart deviation.
-    :param std_factor_range:            Offset String. Denotes the range over witch the standart deviation is obtained,
-                                        to test condition [2]. (Should be a multiple of the sampling rate)
-    :param raise_window:                Offset String. Denotes the distance to the datapoint, relatively to witch
-                                        it is decided if the current datapoint is a raise or not. Equation [1].
-                                        It defaults to None. When None is passed, raise_window is just the sample
-                                        rate of the data. Any raise reference must be a multiple of the (intended)
-                                        sample rate and below std_factor_range.
-    :param ignore_missing:
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional informations related to `data`.
+    prec_variable : str
+        Fieldname of the precipitation meassurements column in data.
+    raise_window: {None, str}, default None
+        Denotes the distance to the datapoint, relatively to witch
+        it is decided if the current datapoint is a raise or not. Equation [1].
+        It defaults to None. When None is passed, raise_window is just the sample
+        rate of the data. Any raise reference must be a multiple of the (intended)
+        sample rate and below std_factor_range.
+    sensor_depth : float, default 0
+        Measurement depth of the soil moisture sensor, [m].
+    sensor_accuracy : float, default 0
+        Accuracy of the soil moisture sensor, [-].
+    soil_porosity : float, default 0
+        Porosity of moisture sensors surrounding soil, [-].
+    std_factor : int, default 2
+        The value determines by which rule it is decided, weather a raise in soil
+        moisture is significant enough to trigger the flag test or not:
+        Significance is assumed, if the raise is  greater then "std_factor" multiplied
+        with the last 24 hours standart deviation.
+    std_window: str, default '24h'
+        An offset string that denotes the range over witch the standart deviation is obtained,
+        to test condition [2]. (Should be a multiple of the sampling rate)
+    raise_window: str
+        Denotes the distance to the datapoint, relatively to witch
+        it is decided if the current datapoint is a raise or not. Equation [1].
+        It defaults to None. When None is passed, raise_window is just the sample
+        rate of the data. Any raise reference must be a multiple of the (intended)
+        sample rate and below std_factor_range.
+    ignore_missing: bool, default False
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    [1] Dorigo,W. et al.: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
     """
 
     dataseries, moist_rate = retrieveTrustworthyOriginal(data, field, flagger)
@@ -238,14 +377,12 @@ def sm_flagPrecipitation(
 
     dataseries = dataseries[raise_mask & std_mask]
     invalid_indices = prec_count[dataseries.index] <= sensor_depth * sensor_accuracy * soil_porosity
-    invalid_indices = invalid_indices[invalid_indices]
 
-    # set Flags
-    flagger = flagger.setFlags(field, loc=invalid_indices.index, **kwargs)
+    flagger = flagger.setFlags(field, loc=invalid_indices, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def sm_flagConstants(
     data,
     field,
@@ -260,20 +397,66 @@ def sm_flagConstants(
     max_consec_missing=None,
     smooth_window=None,
     smooth_poly_deg=2,
-    **kwargs
+    **kwargs,
 ):
 
     """
-
-    Note, function has to be harmonized to equidistant freq_grid
-
-    Note, in current implementation, it has to hold that: (rainfall_window_range >= plateau_window_min)
-
-    :param data:                        The pandas dataframe holding the data-to-be flagged.
-                                        Data must be indexed by a datetime series and be harmonized onto a
-                                        time raster with seconds precision (skips allowed).
-    :param field:                       Fieldname of the Soil moisture measurements field in data.
-    :param flagger:                     A flagger - object. (saqc.flagger.X)
+    This function flags plateaus/series of constant values in soil moisture data.
+
+    Mentionings of "conditions" in the following explanations refer to references [2].
+
+    The function represents a stricter version of
+    constants_flagVarianceBased.
+
+    The additional constraints (3)-(5), are designed to match the special cases of constant
+    values in soil moisture measurements and basically for preceding precipitation events
+    (conditions (3) and (4)) and certain plateau level (condition (5)).
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    window : str, default '12h'
+        Minimum duration during which values need to identical to become plateau candidates. See condition (1)
+    thresh : float, default 0.0005
+        Maximum variance of a group of values to still consider them constant. See condition (2)
+    precipitation_window : str, default '12h'
+        See condition (3) and (4)
+    tolerance : float, default 0.95
+        Tolerance factor, see condition (5)
+    deriv_max : float, default 0
+        See condition (4)
+    deriv_min : float, default 0.0025
+        See condition (3)
+    max_missing : {None, int}, default None
+        Maximum number of missing values allowed in window, by default this condition is ignored
+    max_consec_missing : {None, int}, default None
+        Maximum number of consecutive missing values allowed in window, by default this condition is ignored
+    smooth_window : {None, str}, default None
+        Size of the smoothing window of the Savitsky-Golay filter. The default value None results in a window of two
+        times the sampling rate (i.e. three values)
+    smooth_poly_deg : int, default 2
+        Degree of the polynomial used for smoothing with the Savitsky-Golay filter
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    [1] Dorigo,W. et al.: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
+
+    [2] https://git.ufz.de/rdm-software/saqc/-/edit/testfuncDocs/docs/funcs/FormalDescriptions.md#sm_flagconstants
     """
 
     # get plateaus:
@@ -339,26 +522,49 @@ def sm_flagConstants(
     return data, flagger
 
 
-@register()
+@register(masking='all')
 def sm_flagRandomForest(data, field, flagger, references, window_values: int, window_flags: int, path: str, **kwargs):
-
-    """This Function uses pre-trained machine-learning model objects for flagging of a specific variable. The model is supposed to be trained using the script provided in "ressources/machine_learning/train_machine_learning.py".
-    For flagging, Inputs to the model are the timeseries of the respective target at one specific sensors, the automatic flags that were assigned by SaQC as well as multiple reference series.
-    Internally, context information for each point is gathered in form of moving windows to improve the flagging algorithm according to user input during model training.
-    For the model to work, the parameters 'references', 'window_values' and 'window_flags' have to be set to the same values as during training.
-    :param data:                        The pandas dataframe holding the data-to-be flagged, as well as the reference series. Data must be indexed by a datetime index.
-    :param flags:                       A dataframe holding the flags
-    :param field:                       Fieldname of the field in data that is to be flagged.
-    :param flagger:                     A flagger - object.
-    :param references:                  A string or list of strings, denoting the fieldnames of the data series that should be used as reference variables
-    :param window_values:               An integer, denoting the window size that is used to derive the gradients of both the field- and reference-series inside the moving window
-    :param window_flags:                An integer, denoting the window size that is used to count the surrounding automatic flags that have been set before
-    :param path:                        A string giving the path to the respective model object, i.e. its name and the respective value of the grouping variable. e.g. "models/model_0.2.pkl"
+    """
+    This Function uses pre-trained machine-learning model objects for flagging of a specific variable. The model is
+    supposed to be trained using the script provided in "ressources/machine_learning/train_machine_learning.py". For
+    flagging, Inputs to the model are the timeseries of the respective target at one specific sensors, the automatic
+    flags that were assigned by SaQC as well as multiple reference series. Internally, context information for each
+    point is gathered in form of moving windows to improve the flagging algorithm according to user input during
+    model training. For the model to work, the parameters 'references', 'window_values' and 'window_flags' have to be
+    set to the same values as during training.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    references : {str, List[str]}
+        List or list of strings, denoting the fieldnames of the data series that should be used as reference variables
+    window_values : int
+        An integer, denoting the window size that is used to derive the gradients of both the field- and
+        reference-series inside the moving window
+    window_flags : int
+        An integer, denoting the window size that is used to count the surrounding automatic flags that have been set
+        before
+    path : str
+        A string giving the path to the respective model object, i.e. its name and
+        the respective value of the grouping variable. e.g. "models/model_0.2.pkl"
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
     """
 
     def _refCalc(reference, window_values):
-        # Helper function for calculation of moving window values
-        outdata = pd.DataFrame()
+        """ Helper function for calculation of moving window values """
+        outdata = dios.DictOfSeries()
         name = reference.name
         # derive gradients from reference series
         outdata[name + "_Dt_1"] = reference - reference.shift(1)  # gradient t vs. t-1
@@ -376,26 +582,32 @@ def sm_flagRandomForest(data, field, flagger, references, window_values: int, wi
     # Create custom df for easier processing
     df = data.loc[:, [field] + references]
     # Create binary column of BAD-Flags
-    df["flag_bin"] = flagger.isFlagged(field, flag=flagger.BAD, comparator="==").astype(
-        "int"
-    )  # get "BAD"-flags and turn into binary
+    df["flag_bin"] = flagger.isFlagged(field, flag=flagger.BAD, comparator="==").astype("int")
 
     # Add context information of flags
-    df["flag_bin_t_1"] = df["flag_bin"] - df["flag_bin"].shift(1)  # Flag at t-1
-    df["flag_bin_t1"] = df["flag_bin"] - df["flag_bin"].shift(-1)  # Flag at t+1
-    df["flag_bin_t_" + str(window_flags)] = (
-        df["flag_bin"].rolling(window_flags + 1, center=False).sum()
-    )  # n Flags in interval t to t-window_flags
-    df["flag_bin_t" + str(window_flags)] = (
-        df["flag_bin"].iloc[::-1].rolling(window_flags + 1, center=False).sum()[::-1]
-    )  # n Flags in interval t to t+window_flags
+    # Flag at t +/-1
+    df["flag_bin_t_1"] = df["flag_bin"] - df["flag_bin"].shift(1)
+    df["flag_bin_t1"] = df["flag_bin"] - df["flag_bin"].shift(-1)
+    # n Flags in interval t to t-window_flags
+    df[f"flag_bin_t_{window_flags}"] = df["flag_bin"].rolling(window_flags + 1, center=False).sum()
+    # n Flags in interval t to t+window_flags
     # forward-orientation not possible, so right-orientation on reversed data an reverse result
+    df[f"flag_bin_t{window_flags}"] = df["flag_bin"].iloc[::-1].rolling(window_flags + 1, center=False).sum()[::-1]
+
+    # TODO: dios.merge() / dios.join() ...
+    # replace the following version with its DictOfSeries -> DataFrame
+    # conversions as soon as merging/joining is available in dios
 
     # Add context information for field+references
+    df = df.to_df()  # df is a dios
     for i in [field] + references:
-        df = pd.concat([df, _refCalc(reference=df[i], window_values=window_values)], axis=1)
+        ref = _refCalc(reference=df[i], window_values=window_values).to_df()
+        df = pd.concat([df, ref], axis=1)
+    # all further actions work on pd.DataFrame. thats ok,
+    # because only the df.index is used to set the actual
+    # flags in the underlining dios.
 
-    # remove rows that contain NAs (new ones occured during predictor calculation)
+    # remove NAN-rows from predictor calculation
     df = df.dropna(axis=0, how="any")
     # drop column of automatic flags at time t
     df = df.drop(columns="flag_bin")
@@ -403,8 +615,6 @@ def sm_flagRandomForest(data, field, flagger, references, window_values: int, wi
     model = joblib.load(path)
     preds = model.predict(df)
 
-    # Get indices of flagged values
     flag_indices = df[preds.astype("bool")].index
-    # set Flags
     flagger = flagger.setFlags(field, loc=flag_indices, **kwargs)
     return data, flagger
diff --git a/saqc/funcs/spikes_detection.py b/saqc/funcs/spikes_detection.py
index e178b55759c5f9dbf6c9d2c5e7140d3a9d7669ab..39e34bf1fa6a44e70c0bb395a9922090cae104c5 100644
--- a/saqc/funcs/spikes_detection.py
+++ b/saqc/funcs/spikes_detection.py
@@ -4,137 +4,511 @@
 
 import numpy as np
 import pandas as pd
-
 from scipy.signal import savgol_filter
 from scipy.stats import zscore
 from scipy.optimize import curve_fit
-from saqc.funcs.register import register
+from saqc.core.register import register
 import numpy.polynomial.polynomial as poly
 import numba
 import saqc.lib.ts_operators as ts_ops
-from saqc.lib.tools import retrieveTrustworthyOriginal, offset2seconds, slidingWindowIndices, findIndex, composeFunction
-
-
-@register()
-def spikes_flagOddWater(
-    data,
-    field,
-    flagger,
-    fields,
-    trafo="normScale",
-    alpha=0.05,
-    bin_frac=10,
-    n_neighbors=2,
-    iter_start=0.5,
+from saqc.lib.tools import (
+    retrieveTrustworthyOriginal,
+    offset2seconds,
+    slidingWindowIndices,
+    findIndex,
+    toSequence,
+    customRoller
+)
+from outliers import smirnov_grubbs
+
+def _stray(
+    val_frame,
+    partition_freq=None,
+    partition_min=11,
     scoring_method="kNNMaxGap",
-    lambda_estimator="gap_average",
-    **kwargs,
-):
-
-    trafo = composeFunction(trafo.split(","))
-    # data fransformation/extraction
-    val_frame = trafo(data[fields[0]])
-
-    for var in fields[1:]:
-        val_frame = pd.merge(val_frame, trafo(data[var]), how="outer", left_index=True, right_index=True)
+    n_neighbors=10,
+    iter_start=0.5,
+    alpha=0.05,
+    trafo=lambda x: x
 
-    data_len = val_frame.index.size
-    val_frame.dropna(inplace=True)
+):
+    """
+    Find outliers in multi dimensional observations.
+
+    The general idea is to assigning scores to every observation based on the observations neighborhood in the space
+    of observations. Then, the gaps between the (greatest) scores are tested for beeing drawn from the same
+    distribution, as the majority of the scores.
+
+    See the References section for a link to a detailed description of the algorithm.
+
+    Note, that the flagging result depends on the size of the partition under test and the distribution of the outliers
+    in it. For "normalish" and/or slightly "erratic" datasets, 5000 - 10000, periods turned out to be a good guess.
+
+    Note, that no normalizations/transformations are applied to the different components (data columns)
+    - those are expected to be applied previously, if necessary.
+
+    Parameters
+    ----------
+    val_frame : (N,M) ndarray
+        Input NxM array of observations, where N is the number of observations and M the number of components per
+        observation.
+    partition_freq : {None, str, int}, default None
+        Determines the size of the data partitions, the data is decomposed into. Each partition is checked seperately
+        for outliers. If a String is passed, it has to be an offset string and it results in partitioning the data into
+        parts of according temporal length. If an integer is passed, the data is simply split up into continous chunks
+        of `partition_freq` periods. if ``None`` is passed (default), all the data will be tested in one run.
+    partition_min : int, default 0
+        Minimum number of periods per partition that have to be present for a valid outlier dettection to be made in
+        this partition. (Only of effect, if `partition_freq` is an integer.) Partition min value must always be
+        greater then the nn_neighbors value.
+    scoring_method : {'kNNSum', 'kNNMaxGap'}, default 'kNNMaxGap'
+        Scoring method applied.
+        `'kNNSum'`: Assign to every point the sum of the distances to its 'n_neighbors' nearest neighbors.
+        `'kNNMaxGap'`: Assign to every point the distance to the neighbor with the "maximum gap" to its predecessor
+        in the hierarchy of the `n_neighbors` nearest neighbors. (see reference section for further descriptions)
+    n_neighbors : int, default 10
+        Number of neighbors included in the scoring process for every datapoint.
+    iter_start : float, default 0.5
+        Float in [0,1] that determines which percentage of data is considered "normal". 0.5 results in the stray
+        algorithm to search only the upper 50 % of the scores for the cut off point. (See reference section for more
+        information)
+    alpha : float, default 0.05
+        Niveau of significance by which it is tested, if a score might be drawn from another distribution, than the
+        majority of the data.
+
+    References
+    ----------
+    Detailed description of the Stray algorithm is covered here:
+
+    [1] Talagala, P. D., Hyndman, R. J., & Smith-Miles, K. (2019). Anomaly detection in high dimensional data.
+        arXiv preprint arXiv:1908.04000.
+    """
 
-    # KNN calculation
     kNNfunc = getattr(ts_ops, scoring_method)
-    resids = kNNfunc(val_frame.values, n_neighbors=n_neighbors, algorithm="ball_tree")
+    # partitioning
+    if not partition_freq:
+        partition_freq = val_frame.shape[0]
 
-    # sorting
-    sorted_i = resids.argsort()
-    resids = resids[sorted_i]
-
-    # iter_start
-
-    if lambda_estimator == "gap_average":
-        sample_size = resids.shape[0]
+    if isinstance(partition_freq, str):
+        partitions = val_frame.groupby(pd.Grouper(freq=partition_freq))
+    else:
+        grouper_series = pd.Series(data=np.arange(0, val_frame.shape[0]), index=val_frame.index)
+        grouper_series = grouper_series.transform(lambda x: int(np.floor(x / partition_freq)))
+        partitions = val_frame.groupby(grouper_series)
+
+    # calculate flags for every partition
+    to_flag = []
+    for _, partition in partitions:
+        if partition.empty | (partition.shape[0] < partition_min):
+            continue
+        partition = partition.apply(trafo)
+        sample_size = partition.shape[0]
+        nn_neighbors = min(n_neighbors, max(sample_size, 2))
+        resids = kNNfunc(partition.values, n_neighbors=nn_neighbors - 1, algorithm="ball_tree")
+        sorted_i = resids.argsort()
+        resids = resids[sorted_i]
         gaps = np.append(0, np.diff(resids))
+
         tail_size = int(max(min(50, np.floor(sample_size / 4)), 2))
         tail_indices = np.arange(2, tail_size + 1)
         i_start = int(max(np.floor(sample_size * iter_start), 1) + 1)
-        sum(tail_indices / (tail_size - 1) * gaps[i_start - tail_indices + 1])
         ghat = np.array([np.nan] * sample_size)
         for i in range(i_start - 1, sample_size):
-            ghat[i] = sum(tail_indices / (tail_size - 1) * gaps[i - tail_indices + 1])
+            ghat[i] = sum((tail_indices / (tail_size - 1)) * gaps[i - tail_indices + 1])
 
         log_alpha = np.log(1 / alpha)
         for iter_index in range(i_start - 1, sample_size):
             if gaps[iter_index] > log_alpha * ghat[iter_index]:
                 break
-    else:
-        # (estimator == 'exponential_fit')
-        iter_index = int(np.floor(resids.size * iter_start))
-        # initialize condition variables:
-        crit_val = np.inf
-        test_val = 0
-        neg_log_alpha = -np.log(alpha)
-
-        # define exponential dist density function:
-        def fit_function(x, lambd):
-            return lambd * np.exp(-lambd * x)
-
-        # initialise sampling bins
+
+        to_flag = np.append(to_flag, list(partition.index[sorted_i[iter_index:]]))
+
+    return to_flag
+
+
+def _expFit(val_frame, scoring_method="kNNMaxGap", n_neighbors=10, iter_start=0.5, alpha=0.05, bin_frac=10):
+    """
+    Find outliers in multi dimensional observations.
+
+    The general idea is to assigning scores to every observation based on the observations neighborhood in the space
+    of observations. Then, the gaps between the (greatest) scores are tested for beeing drawn from the same
+    distribution, as the majority of the scores.
+
+    Note, that no normalizations/transformations are applied to the different components (data columns)
+    - those are expected to be applied previously, if necessary.
+
+    Parameters
+    ----------
+    val_frame : (N,M) ndarray
+        Input NxM array of observations, where N is the number of observations and M the number of components per
+        observation.
+    scoring_method : {'kNNSum', 'kNNMaxGap'}, default 'kNNMaxGap'
+        Scoring method applied.
+        `'kNNSum'`: Assign to every point the sum of the distances to its 'n_neighbors' nearest neighbors.
+        `'kNNMaxGap'`: Assign to every point the distance to the neighbor with the "maximum gap" to its predecessor
+        in the hierarchy of the `n_neighbors` nearest neighbors. (see reference section for further descriptions)
+    n_neighbors : int, default 10
+        Number of neighbors included in the scoring process for every datapoint.
+    iter_start : float, default 0.5
+        Float in [0,1] that determines which percentage of data is considered "normal". 0.5 results in the expfit
+        algorithm to search only the upper 50 % of the scores for the cut off point. (See reference section for more
+        information)
+    alpha : float, default 0.05
+        Niveau of significance by which it is tested, if a score might be drawn from another distribution, than the
+        majority of the data.
+    bin_frac : {int, str}, default 10
+        Controls the binning for the histogram in the fitting step. If an integer is passed, the residues will
+        equidistantly be covered by `bin_frac` bins, ranging from the minimum to the maximum of the residues.
+        If a string is passed, it will be passed on to the ``numpy.histogram_bin_edges`` method.
+    """
+
+    kNNfunc = getattr(ts_ops, scoring_method)
+    resids = kNNfunc(val_frame.values, n_neighbors=n_neighbors, algorithm="ball_tree")
+    data_len = resids.shape[0]
+
+    # sorting
+    sorted_i = resids.argsort()
+    resids = resids[sorted_i]
+    iter_index = int(np.floor(resids.size * iter_start))
+    # initialize condition variables:
+    crit_val = np.inf
+    test_val = 0
+    neg_log_alpha = -np.log(alpha)
+
+    # define exponential dist density function:
+    def fit_function(x, lambd):
+        return lambd * np.exp(-lambd * x)
+
+    # initialise sampling bins
+    if isinstance(bin_frac, int):
         binz = np.linspace(resids[0], resids[-1], 10 * int(np.ceil(data_len / bin_frac)))
-        binzenters = np.array([0.5 * (binz[i] + binz[i + 1]) for i in range(len(binz) - 1)])
-        # inititialize full histogram:
-        full_hist, binz = np.histogram(resids, bins=binz)
-        # check if start index is sufficiently high (pointing at resids value beyond histogram maximum at least):
-        hist_argmax = full_hist.argmax()
-
-        if hist_argmax >= findIndex(binz, resids[iter_index - 1], 0):
-            raise ValueError(
-                "Either the data histogram is too strangely shaped for oddWater OD detection - "
-                "or a too low value for iter_start was passed (iter_start better be greater 0.5)"
-            )
-        # GO!
-        iter_max_bin_index = findIndex(binz, resids[iter_index - 1], 0)
-        upper_tail_index = int(np.floor(0.5 * hist_argmax + 0.5 * iter_max_bin_index))
-        resids_tail_index = findIndex(resids, binz[upper_tail_index], 0)
-        upper_tail_hist, bins = np.histogram(
-            resids[resids_tail_index:iter_index], bins=binz[upper_tail_index : iter_max_bin_index + 1]
+    elif bin_frac in ["auto", "fd", "doane", "scott", "stone", "rice", "sturges", "sqrt"]:
+        binz = np.histogram_bin_edges(resids, bins=bin_frac)
+    else:
+        raise ValueError(f"Can't interpret {bin_frac} as an binning technique.")
+
+    binzenters = np.array([0.5 * (binz[i] + binz[i + 1]) for i in range(len(binz) - 1)])
+    # inititialize full histogram:
+    full_hist, binz = np.histogram(resids, bins=binz)
+    # check if start index is sufficiently high (pointing at resids value beyond histogram maximum at least):
+    hist_argmax = full_hist.argmax()
+
+    if hist_argmax >= findIndex(binz, resids[iter_index - 1], 0):
+        raise ValueError(
+            "Either the data histogram is too strangely shaped for oddWater OD detection - "
+            "or a too low value for 'iter_start' was passed "
+            "(iter_start better be much greater 0.5)"
+        )
+    # GO!
+    iter_max_bin_index = findIndex(binz, resids[iter_index - 1], 0)
+    upper_tail_index = int(np.floor(0.5 * hist_argmax + 0.5 * iter_max_bin_index))
+    resids_tail_index = findIndex(resids, binz[upper_tail_index], 0)
+    upper_tail_hist, bins = np.histogram(
+        resids[resids_tail_index:iter_index], bins=binz[upper_tail_index : iter_max_bin_index + 1]
+    )
+
+    while (test_val < crit_val) & (iter_index < resids.size - 1):
+        iter_index += 1
+        new_iter_max_bin_index = findIndex(binz, resids[iter_index - 1], 0)
+        # following if/else block "manually" expands the data histogram and circumvents calculation of the complete
+        # histogram in any new iteration.
+        if new_iter_max_bin_index == iter_max_bin_index:
+            upper_tail_hist[-1] += 1
+        else:
+            upper_tail_hist = np.append(upper_tail_hist, np.zeros([new_iter_max_bin_index - iter_max_bin_index]))
+            upper_tail_hist[-1] += 1
+            iter_max_bin_index = new_iter_max_bin_index
+            upper_tail_index_new = int(np.floor(0.5 * hist_argmax + 0.5 * iter_max_bin_index))
+            upper_tail_hist = upper_tail_hist[upper_tail_index_new - upper_tail_index :]
+            upper_tail_index = upper_tail_index_new
+
+        # fitting
+
+        lambdA, _ = curve_fit(
+            fit_function,
+            xdata=binzenters[upper_tail_index:iter_max_bin_index],
+            ydata=upper_tail_hist,
+            p0=[-np.log(alpha / resids[iter_index])],
         )
 
-        while (test_val < crit_val) & (iter_index < resids.size - 1):
-            iter_index += 1
-            new_iter_max_bin_index = findIndex(binz, resids[iter_index - 1], 0)
+        crit_val = neg_log_alpha / lambdA
+        test_val = resids[iter_index]
+
+    return val_frame.index[sorted_i[iter_index:]]
+
+
+def _reduceMVflags(
+    val_frame, fields, flagger, to_flag_frame, reduction_range, reduction_drop_flagged=False, reduction_thresh=3.5,
+        reduction_min_periods=1
+):
+    """
+    Function called by "spikes_flagMultivarScores" to reduce the number of false positives that result from
+    the algorithms confinement to only flag complete observations (all of its variables/components).
+
+    The function "reduces" an observations flag to components of it, by applying MAD (See references)
+    test onto every components temporal surrounding.
+
+    Parameters
+    ----------
+    val_frame : (N,M) pd.DataFrame
+        Input NxM DataFrame of observations, where N is the number of observations and M the number of components per
+        observation.
+    fields : str
+        Fieldnames of the components in `val_frame` that are to be tested for outlierishnes.
+    to_flag_frame : (K,M) pd.DataFrame
+        Input dataframe of observations to be tested, where N is the number of observations and M the number
+        of components per observation.
+    reduction_range : str
+        An offset string, denoting the range of the temporal surrounding to include into the MAD testing.
+    reduction_drop_flagged : bool, default False
+        Wheather or not to drop flagged values other than the value under test, from the temporal surrounding
+        before checking the value with MAD.
+    reduction_thresh : float, default 3.5
+        The `critical` value, controlling wheather the MAD score is considered referring to an outlier or not.
+        Higher values result in less rigid flagging. The default value is widely used in the literature. See references
+        section for more details ([1]).
+
+    References
+    ----------
+    [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
+    """
 
-            # following if/else block "manually" expands the data histogram and circumvents calculation of the complete
-            # histogram in any new iteration.
-            if new_iter_max_bin_index == iter_max_bin_index:
-                upper_tail_hist[-1] += 1
+    to_flag_frame[:] = False
+    to_flag_index = to_flag_frame.index
+    for var in fields:
+        for index in enumerate(to_flag_index):
+            index_slice = slice(index[1] - pd.Timedelta(reduction_range), index[1] + pd.Timedelta(reduction_range))
+
+            test_slice = val_frame[var][index_slice].dropna()
+            # check, wheather value under test is sufficiently centered:
+            first_valid = test_slice.first_valid_index()
+            last_valid = test_slice.last_valid_index()
+            min_range = pd.Timedelta(reduction_range)/4
+            polydeg = 2
+            if ((pd.Timedelta(index[1] - first_valid) < min_range) |
+                (pd.Timedelta(last_valid - index[1]) < min_range)):
+                polydeg = 0
+            if reduction_drop_flagged:
+                test_slice = test_slice.drop(to_flag_index, errors='ignore')
+            if test_slice.shape[0] >= reduction_min_periods:
+                x = (test_slice.index.values.astype(float))
+                x_0 = x[0]
+                x = (x - x_0)/10**12
+                polyfitted = poly.polyfit(y=test_slice.values, x=x, deg=polydeg)
+                testval = poly.polyval((float(index[1].to_numpy()) - x_0)/10**12, polyfitted)
+                testval = val_frame[var][index[1]] - testval
+                resids = test_slice.values - poly.polyval(x, polyfitted)
+                med_resids = np.median(resids)
+                MAD = np.median(np.abs(resids - med_resids))
+                crit_val = 0.6745 * (abs(med_resids - testval)) / MAD
+                if crit_val > reduction_thresh:
+                    to_flag_frame.loc[index[1], var] = True
             else:
-                upper_tail_hist = np.append(upper_tail_hist, np.zeros([new_iter_max_bin_index - iter_max_bin_index]))
-                upper_tail_hist[-1] += 1
-                iter_max_bin_index = new_iter_max_bin_index
-                upper_tail_index_new = int(np.floor(0.5 * hist_argmax + 0.5 * iter_max_bin_index))
-                upper_tail_hist = upper_tail_hist[upper_tail_index_new - upper_tail_index :]
-                upper_tail_index = upper_tail_index_new
-
-            # fitting
-            lambdA, _ = curve_fit(
-                fit_function,
-                xdata=binzenters[upper_tail_index:iter_max_bin_index],
-                ydata=upper_tail_hist,
-                p0=[-np.log(alpha / resids[iter_index])],
-            )
+                to_flag_frame.loc[index[1], var] = True
+
+    return to_flag_frame
+
+
+@register(masking='all')
+def spikes_flagMultivarScores(
+    data,
+    field,
+    flagger,
+    fields,
+    trafo=np.log,
+    alpha=0.05,
+    n_neighbors=10,
+    scoring_method="kNNMaxGap",
+    iter_start=0.5,
+    threshing="stray",
+    expfit_binning="auto",
+    stray_partition=None,
+    stray_partition_min=11,
+    post_reduction=False,
+    reduction_range=None,
+    reduction_drop_flagged=False,
+    reduction_thresh=3.5,
+    reduction_min_periods=1,
+    **kwargs,
+):
+    """
+    The algorithm implements a 3-step outlier detection procedure for simultaneously flagging of higher dimensional
+    data (dimensions > 3).
+
+    In references [1], the procedure is introduced and exemplified with an application on hydrological data.
+
+    See the notes section for an overview over the algorithms basic steps.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged. (Here a dummy, for structural reasons)
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    fields : List[str]
+        List of fieldnames, corresponding to the variables that are to be included into the flagging process.
+    trafo : callable, default np.log
+        Transformation to be applied onto every column before scoring. Will likely get deprecated soon. Its better
+        to transform the data in a processing step, preceeeding the call to ``flagMultivarScores``.
+    alpha : float, default 0.05
+        Level of significance by which it is tested, if an observations score might be drawn from another distribution
+        than the majority of the observation.
+    n_neighbors : int, default 10
+        Number of neighbors included in the scoring process for every datapoint.
+    scoring_method : {'kNNSum', 'kNNMaxGap'}, default 'kNNMaxGap'
+        Scoring method applied.
+        ``'kNNSum'``: Assign to every point the sum of the distances to its 'n_neighbors' nearest neighbors.
+        ``'kNNMaxGap'``: Assign to every point the distance to the neighbor with the "maximum gap" to its predecessor
+        in the hierarchy of the `n_neighbors` nearest neighbors. (see reference section for further descriptions)
+    iter_start : float, default 0.5
+        Float in [0,1] that determines which percentage of data is considered "normal". 0.5 results in the threshing
+        algorithm to search only the upper 50 % of the scores for the cut off point. (See reference section for more
+        information)
+    threshing : {'stray', 'expfit'}, default 'stray'
+        A string, denoting the threshing algorithm to be applied on the observations scores.
+        See the documentations of the algorithms (``_stray``, ``_expfit``) and/or the references sections paragraph [2]
+        for more informations on the algorithms.
+    expfit_binning : {int, str}, default 'auto'
+        Controls the binning for the histogram in the ``expfit`` algorithms fitting step.
+        If an integer is passed, the residues will equidistantly be covered by `bin_frac` bins, ranging from the
+        minimum to the maximum of the residues. If a string is passed, it will be passed on to the
+        ``numpy.histogram_bin_edges`` method.
+    stray_partition : {None, str, int}, default None
+        Only effective when `threshing` = 'stray'.
+        Determines the size of the data partitions, the data is decomposed into. Each partition is checked seperately
+        for outliers. If a String is passed, it has to be an offset string and it results in partitioning the data into
+        parts of according temporal length. If an integer is passed, the data is simply split up into continous chunks
+        of `partition_freq` periods. if ``None`` is passed (default), all the data will be tested in one run.
+    stray_partition_min : int, default 0
+        Only effective when `threshing` = 'stray'.
+        Minimum number of periods per partition that have to be present for a valid outlier detection to be made in
+        this partition. (Only of effect, if `stray_partition` is an integer.)
+    post_reduction : bool, default False
+        Wheather or not it should be tried to reduce the flag of an observation to one or more of its components. See
+        documentation of `_reduceMVflags` for more details.
+    reduction_range : {None, str}, default None
+        Only effective when `post_reduction` = True
+        An offset string, denoting the range of the temporal surrounding to include into the MAD testing while trying
+        to reduce flags.
+    reduction_drop_flagged : bool, default False
+        Only effective when `post_reduction` = True
+        Wheather or not to drop flagged values other than the value under test from the temporal surrounding
+        before checking the value with MAD.
+    reduction_thresh : float, default 3.5
+        Only effective when `post_reduction` = True
+        The `critical` value, controlling wheather the MAD score is considered referring to an outlier or not.
+        Higher values result in less rigid flagging. The default value is widely considered apropriate in the
+        literature.
+
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    Notes
+    -----
+    The basic steps are:
+
+    1. transforming
+
+    The different data columns are transformed via timeseries transformations to
+    (a) make them comparable and
+    (b) make outliers more stand out.
+
+    This step is usually subject to a phase of research/try and error. See [1] for more details.
+
+    Note, that the data transformation as an built-in step of the algorithm, will likely get deprecated soon. Its better
+    to transform the data in a processing step, preceeding the multivariate flagging process. Also, by doing so, one
+    gets mutch more control and variety in the transformation applied, since the `trafo` parameter only allows for
+    application of the same transformation to all of the variables involved.
+
+    2. scoring
+
+    Every observation gets assigned a score depending on its k nearest neighbors. See the `scoring_method` parameter
+    description for details on the different scoring methods. Furthermore [1], [2] may give some insight in the
+    pro and cons of the different methods.
+
+    3. threshing
+
+    The gaps between the (greatest) scores are tested for beeing drawn from the same
+    distribution as the majority of the scores. If a gap is encountered, that, with sufficient significance, can be
+    said to not be drawn from the same distribution as the one all the smaller gaps are drawn from, than
+    the observation belonging to this gap, and all the observations belonging to gaps larger then this gap, get flagged
+    outliers. See description of the `threshing` parameter for more details. Although [2] gives a fully detailed
+    overview over the `stray` algorithm.
+
+    References
+    ----------
+    Odd Water Algorithm:
+
+    [1] Talagala, P.D. et al (2019): A Feature-Based Procedure for Detecting Technical Outliers in Water-Quality Data
+        From In Situ Sensors. Water Ressources Research, 55(11), 8547-8568.
+
+    A detailed description of the stray algorithm:
+
+    [2] Talagala, P. D., Hyndman, R. J., & Smith-Miles, K. (2019). Anomaly detection in high dimensional data.
+        arXiv preprint arXiv:1908.04000.
+
+    A detailed description of the MAD outlier scoring:
+
+    [3] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
+    """
+
+    # data fransformation/extraction
+    data = data.copy()
+    fields = toSequence(fields)
+    val_frame = data[fields]
+    val_frame = val_frame.loc[val_frame.index_of("shared")].to_df()
+    val_frame.dropna(inplace=True)
+    val_frame = val_frame.apply(trafo)
+
+    if val_frame.empty:
+        return data, flagger
+
+    if threshing == "stray":
+        to_flag_index = _stray(
+            val_frame,
+            partition_freq=stray_partition,
+            partition_min=stray_partition_min,
+            scoring_method=scoring_method,
+            n_neighbors=n_neighbors,
+            iter_start=iter_start,
+            alpha=alpha
+        )
+
+    else:
+        val_frame = val_frame.apply(trafo)
+        to_flag_index = _expFit(val_frame,
+                                scoring_method=scoring_method,
+                                n_neighbors=n_neighbors,
+                                iter_start=iter_start,
+                                alpha=alpha,
+                                bin_frac=expfit_binning)
+
+    to_flag_frame = pd.DataFrame({var_name: True for var_name in fields}, index=to_flag_index)
+    if post_reduction:
+        val_frame = data[toSequence(fields)].to_df()
+        to_flag_frame = _reduceMVflags(val_frame, fields, flagger, to_flag_frame, reduction_range,
+                                       reduction_drop_flagged=reduction_drop_flagged,
+                                       reduction_thresh=reduction_thresh,
+                                       reduction_min_periods=reduction_min_periods)
 
-            crit_val = neg_log_alpha / lambdA
-            test_val = resids[iter_index]
 
-    # flag them!
-    to_flag_index = val_frame.index[sorted_i[iter_index:]]
     for var in fields:
-        flagger = flagger.setFlags(var, to_flag_index, **kwargs)
+        to_flag_ind = to_flag_frame.loc[:, var]
+        to_flag_ind = to_flag_ind[to_flag_ind].index
+        flagger = flagger.setFlags(var, to_flag_ind, **kwargs)
 
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def spikes_flagRaise(
     data,
     field,
@@ -149,13 +523,73 @@ def spikes_flagRaise(
     numba_boost=True,
     **kwargs,
 ):
+    """
+    The function flags raises and drops in value courses, that exceed a certain threshold
+    within a certain timespan.
+
+    The parameter variety of the function is owned to the intriguing
+    case of values, that "return" from outlierish or anomalious value levels and
+    thus exceed the threshold, while actually being usual values.
+
+    NOTE, the dataset is NOT supposed to be harmonized to a time series with an
+    equidistant frequency grid.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    thresh : float
+        The threshold, for the total rise (thresh > 0), or total drop (thresh < 0), value courses must
+        not exceed within a timespan of length `raise_window`.
+    raise_window : str
+        An offset string, determining the timespan, the rise/drop thresholding refers to. Window is inclusively defined.
+    intended_freq : str
+        An offset string, determining The frequency, the timeseries to-be-flagged is supposed to be sampled at.
+        The window is inclusively defined.
+    average_window : {None, str}, default None
+        See condition (2) of the description linked in the references. Window is inclusively defined.
+        The window defaults to 1.5 times the size of `raise_window`
+    mean_raise_factor : float, default 2
+        See second condition listed in the notes below.
+    min_slope : {None, float}, default None
+        See third condition listed in the notes below.
+    min_slope_weight : float, default 0.8
+        See third condition listed in the notes below.
+    numba_boost : bool, default True
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    Notes
+    -----
+    The value :math:`x_{k}` of a time series :math:`x` with associated
+    timestamps :math:`t_i`, is flagged a raise, if:
+
+    * There is any value :math:`x_{s}`, preceeding :math:`x_{k}` within `raise_window` range, so that:
+
+      * :math:`M = |x_k - x_s | >`  `thresh` :math:`> 0`
+
+    * The weighted average :math:`\\mu^{*}` of the values, preceding :math:`x_{k}` within `average_window`
+      range indicates, that :math:`x_{k}` does not return from an "outlierish" value course, meaning that:
+
+      * :math:`x_k > \\mu^* + ( M` / `mean_raise_factor` :math:`)`
+
+    * Additionally, if `min_slope` is not `None`, :math:`x_{k}` is checked for being sufficiently divergent from its
+      very predecessor :max:`x_{k-1}`$, meaning that, it is additionally checked if:
+
+      * :math:`x_k - x_{k-1} >` `min_slope`
+      * :math:`t_k - t_{k-1} >` `min_slope_weight` :math:`\\times` `intended_freq`
 
-    # NOTE1: this implementation accounts for the case of "pseudo" spikes that result from checking against outliers
-    # NOTE2: the test is designed to work on raw data as well as on regularized
-    #
-    # See saqc documentation at:
-    # https://git.ufz.de/rdm-software/saqc/blob/develop/docs/funcs/SpikeDetection.md
-    # for more details
+    """
 
     # prepare input args
     dataseries = data[field].dropna()
@@ -180,10 +614,10 @@ def spikes_flagRaise(
             return np.nan
 
     def custom_rolling_mean(x):
-        return np.mean(x[:-1])
+        return np.sum(x[:-1])
 
     # get invalid-raise/drop mask:
-    raise_series = dataseries.rolling(raise_window, min_periods=2)
+    raise_series = dataseries.rolling(raise_window, min_periods=2, closed="both")
 
     if numba_boost:
         raise_check = numba.jit(raise_check, nopython=True)
@@ -194,7 +628,7 @@ def spikes_flagRaise(
     if raise_series.isna().all():
         return data, flagger
 
-    # "unflag" values of unsifficient deviation to theire predecessors
+    # "unflag" values of insufficient deviation to their predecessors
     if min_slope is not None:
         w_mask = (
             pd.Series(dataseries.index).diff().dt.total_seconds() / intended_freq.total_seconds()
@@ -217,16 +651,21 @@ def spikes_flagRaise(
     )
 
     weights[weights > 1.5] = 1.5
-    weighted_data = dataseries.mul(weights.values)
+    weights.index = dataseries.index
+    weighted_data = dataseries.mul(weights)
 
     # rolling weighted mean calculation
     weighted_rolling_mean = weighted_data.rolling(average_window, min_periods=2, closed="both")
+    weights_rolling_sum = weights.rolling(average_window, min_periods=2, closed="both")
     if numba_boost:
         custom_rolling_mean = numba.jit(custom_rolling_mean, nopython=True)
         weighted_rolling_mean = weighted_rolling_mean.apply(custom_rolling_mean, raw=True, engine="numba")
+        weights_rolling_sum = weights_rolling_sum.apply(custom_rolling_mean, raw=True, engine="numba")
     else:
         weighted_rolling_mean = weighted_rolling_mean.apply(custom_rolling_mean, raw=True)
+        weights_rolling_sum = weights_rolling_sum.apply(custom_rolling_mean, raw=True, engine="numba")
 
+    weighted_rolling_mean = weighted_rolling_mean / weights_rolling_sum
     # check means against critical raise value:
     to_flag = dataseries >= weighted_rolling_mean + (raise_series / mean_raise_factor)
     to_flag &= raise_series.notna()
@@ -235,11 +674,12 @@ def spikes_flagRaise(
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def spikes_flagSlidingZscore(
     data, field, flagger, window, offset, count=1, polydeg=1, z=3.5, method="modZ", **kwargs,
 ):
-    """ A outlier detection in a sliding window. The method for detection can be a simple Z-score or the more robust
+    """
+    An outlier detection in a sliding window. The method for detection can be a simple Z-score or the more robust
     modified Z-score, as introduced here [1].
 
     The steps are:
@@ -250,20 +690,40 @@ def spikes_flagSlidingZscore(
     5.  processing continue at 1. until end of data.
     6.  all potential outlier, that are detected `count`-many times, are promoted to real outlier and flagged by the `flagger`
 
-    :param data:        pandas dataframe. holding the data
-    :param field:       fieldname in `data`, which holds the relevant infos
-    :param flagger:     flagger.
-    :param window:      int or time-offset string (see [2]). The size of the window the outlier detection is run in. default: 1h
-    :param offset:      int or time-offset string (see [2]). Stepsize the window is set further. default: 1h
-    :param method:      str. `modZ`  or `zscore`. see [1] at section `Z-Scores and Modified Z-Scores`
-    :param count:       int. this many times, a datapoint needs to be detected in different windows, to be finally
-                        flagged as outlier
-    :param polydeg:     The degree for the polynomial fit, to calculate the residuum
-    :param z:           float. the value the (mod.) Z-score is tested against. Defaulting to 3.5 (Recommendation of [1])
-
-    Links:
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    window: {int, str}
+        Integer or offset string (see [2]). The size of the window the outlier detection is run in.
+    offset: {int, str}
+        Integer or offset string (see [2]). Stepsize the window is set further. default: 1h
+    count: int, default 1
+        Number of times a value has to be classified an outlier in different windows, to be finally flagged an outlier.
+    polydeg : int, default 1
+        The degree for the polynomial that is fitted to the data in order to calculate the residues.
+    z : float, default 3.5
+        The value the (mod.) Z-score is tested against. Defaulting to 3.5 (Recommendation of [1])
+    method: {'modZ', zscore}, default  'modZ'
+        See section `Z-Scores and Modified Z-Scores` in [1].
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
     [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
     [2] https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
+
     """
 
     use_offset = False
@@ -319,6 +779,8 @@ def spikes_flagSlidingZscore(
 
     # prepare data, work on numpy arrays for the fulfilling pleasure of performance
     d = data[field].dropna()
+    if d.empty:
+        return data, flagger
     all_indices = np.arange(len(d.index))
     x = (d.index - d.index[0]).total_seconds().values
     y = d.values
@@ -359,21 +821,42 @@ def spikes_flagSlidingZscore(
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def spikes_flagMad(data, field, flagger, window, z=3.5, **kwargs):
-    """ The function represents an implementation of the modyfied Z-score outlier detection method, as introduced here:
 
-    [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
+    """
 
-    The test needs the input data to be harmonized to an equidustant time stamp series (=have frequencie))
+    The function represents an implementation of the modyfied Z-score outlier detection method.
+
+    See references [1] for more details on the algorithm.
+
+    Note, that the test needs the input data to be sampled regularly (fixed sampling rate).
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged. (Here a dummy, for structural reasons)
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    window : str
+       Offset string. Denoting the windows size that the "Z-scored" values have to lie in.
+    z: float, default 3.5
+        The value the Z-score is tested against. Defaulting to 3.5 (Recommendation of [1])
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
+    [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
 
-    :param data:        The pandas dataframe holding the data-to-be flagged.
-                        Data must be indexed by a datetime series and be harmonized onto a
-                        time raster with seconds precision.
-    :param field:       Fieldname of the Soil moisture measurements field in data.
-    :param flagger:     A flagger - object. (saqc.flagger.X)
-    :param winsz:      Offset String. Denoting the windows size that that th "Z-scored" values have to lie in.
-    :param z:           Float. The value the Z-score is tested against. Defaulting to 3.5 (Recommendation of [1])
     """
     d = data[field].copy().mask(flagger.isFlagged(field))
     median = d.rolling(window=window, closed="both").median()
@@ -397,93 +880,112 @@ def spikes_flagMad(data, field, flagger, window, z=3.5, **kwargs):
     return data, flagger
 
 
-@register()
-def spikes_flagBasic(data, field, flagger, thresh=7, tolerance=0, window="15min", **kwargs):
+@register(masking='field')
+def spikes_flagBasic(data, field, flagger, thresh, tolerance, window, numba_kickin=200000, **kwargs):
     """
     A basic outlier test that is designed to work for harmonized and not harmonized data.
 
-    Values x(n), x(n+1), .... , x(n+k) of a timeseries x are considered spikes, if
+    The test classifies values/value courses as outliers by detecting not only a rise in value, but also,
+    checking for a return to the initial value level.
+
+    Values :math:`x_n, x_{n+1}, .... , x_{n+k}` of a timeseries :math:`x` with associated timestamps
+    :math:`t_n, t_{n+1}, .... , t_{n+k}` are considered spikes, if
 
-    (1) |x(n-1) - x(n + s)| > "thresh", for all s in [0,1,2,...,k]
+    1. :math:`|x_{n-1} - x_{n + s}| >` `thresh`, for all :math:`s \\in [0,1,2,...,k]`
 
-    (2) |x(n-1) - x(n+k+1)| < tol
+    2. :math:`|x_{n-1} - x_{n+k+1}| <` `tolerance`
 
-    (3) |x(n-1).index - x(n+k+1).index| < length
+    3. :math:`|t_{n-1} - t_{n+k+1}| <` `window`
 
     Note, that this definition of a "spike" not only includes one-value outliers, but also plateau-ish value courses.
 
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged. (Here a dummy, for structural reasons)
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    thresh : float, default 7
+        Minimum difference between to values, to consider the latter one as a spike. See condition (1)
+    tolerance : float, default 0
+        Maximum difference between pre-spike and post-spike values. See condition (2)
+    window : str, default '15min'
+        Maximum length of "spiky" value courses. See condition (3)
+    numba_kickin : int, default 200000
+        When there are detected more than `numba_kickin` incidents of potential spikes,
+        the pandas.rolling - part of computation gets "jitted" with numba.
+        Default value hast proven to be around the break even point between "jit-boost" and "jit-costs".
+
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed, relatively to the flagger input.
+
+    References
+    ----------
     The implementation is a time-window based version of an outlier test from the UFZ Python library,
     that can be found here:
 
     https://git.ufz.de/chs/python/blob/master/ufz/level1/spike.py
 
-
-    :param data:    Pandas-like. The pandas dataframe holding the data-to-be flagged.
-    :param field:   String. Fieldname of the data column to be tested.
-    :param flagger: saqc.flagger. A flagger - object.
-    :param thresh:  Float. The lower bound for a value jump, to be considered as initialising a spike.
-                    (see condition (1) in function description).
-    :param tolerance: Float. Tolerance value.  (see condition (2) in function description)
-    :param window_size:  Offset String. The time span in wich the values of a spikey course have to return to the normal
-                    value course (see condition (3) in function description).
-    :return:
     """
 
     dataseries = data[field].dropna()
     # get all the entries preceding a significant jump
-    pre_jumps = dataseries.diff(periods=-1).abs() > thresh
-    pre_jumps = pre_jumps[pre_jumps]
-    if pre_jumps.empty:
+    post_jumps = dataseries.diff().abs() > thresh
+    post_jumps = post_jumps[post_jumps]
+    if post_jumps.empty:
         return data, flagger
     # get all the entries preceeding a significant jump and its successors within "length" range
-    to_roll = pre_jumps.reindex(dataseries.index, method="ffill", tolerance=window, fill_value=False).dropna()
+    to_roll = post_jumps.reindex(dataseries.index, method="bfill", tolerance=window, fill_value=False).dropna()
 
     # define spike testing function to roll with:
-    def spike_tester(chunk, pre_jumps_index, thresh, tol):
-        if not chunk.index[-1] in pre_jumps_index:
+    def spikeTester(chunk, thresh=thresh, tol=tolerance):
+        # signum change!!!
+        chunk_stair = (np.sign(chunk[-2] - chunk[-1])*(chunk - chunk[-1]) < thresh)[::-1].cumsum()
+        initial = np.searchsorted(chunk_stair, 2)
+        if initial == len(chunk):
             return 0
+        if np.abs(chunk[- initial - 1] - chunk[-1]) < tol:
+            return initial - 1
         else:
-            # signum change!!!
-            chunk_stair = (abs(chunk - chunk[-1]) < thresh)[::-1].cumsum()
-            first_return = chunk_stair[(chunk_stair == 2)]
-            if first_return.sum() == 0:
-                return 0
-            if abs(chunk[first_return.index[0]] - chunk[-1]) < tol:
-                return (chunk_stair == 1).sum() - 1
-            else:
-                return 0
+            return 0
 
-    # since .rolling does neither support windows, defined by left starting points, nor rolling over monotonically
-    # decreasing indices, we have to trick the method by inverting the timeseries and transforming the resulting index
-    # to pseudo-increase.
     to_roll = dataseries[to_roll]
-    original_index = to_roll.index
-    to_roll = to_roll[::-1]
-    pre_jump_reversed_index = to_roll.index[0] - pre_jumps.index
-    to_roll.index = to_roll.index[0] - to_roll.index
-
-    # now lets roll:
-    to_roll = (
-        to_roll.rolling(window, closed="both")
-        .apply(spike_tester, args=(pre_jump_reversed_index, thresh, tolerance), raw=False)
-        .astype(int)
-    )
-    # reconstruct original index and sequence
-    to_roll = to_roll[::-1]
-    to_roll.index = original_index
-    to_write = to_roll[to_roll != 0]
-    to_flag = pd.Index([])
-    # here comes a loop...):
-    for row in to_write.iteritems():
-        loc = to_roll.index.get_loc(row[0])
-        to_flag = to_flag.append(to_roll.iloc[loc + 1 : loc + row[1] + 1].index)
-
-    to_flag = to_flag.drop_duplicates(keep="first")
-    flagger = flagger.setFlags(field, to_flag, **kwargs)
+    roll_mask = pd.Series(False, index=to_roll.index)
+    roll_mask[post_jumps.index] = True
+
+    roller = customRoller(to_roll, window=window, mask=roll_mask, min_periods=2, closed='both')
+    engine = None if roll_mask.sum() < numba_kickin else 'numba'
+    result = roller.apply(spikeTester, raw=True, engine=engine)
+
+    # correct the result: only those values define plateaus, that do not have
+    # values at their left starting point, that belong to other plateaus themself:
+    def calcResult(result):
+        var_num = result.shape[0]
+        flag_scopes = np.zeros(var_num, dtype=bool)
+        for k in range(var_num):
+            if result[k] > 0:
+                k_r = int(result[k])
+                # validity check: plateuas start isnt another plateaus end:
+                if not flag_scopes[k - k_r - 1]:
+                    flag_scopes[(k - k_r):k] = True
+        return pd.Series(flag_scopes, index=result.index)
+
+    cresult = calcResult(result)
+    cresult = cresult[cresult].index
+    flagger = flagger.setFlags(field, cresult, **kwargs)
     return data, flagger
 
 
-@register()
+@register(masking='field')
 def spikes_flagSpektrumBased(
     data,
     field,
@@ -498,69 +1000,89 @@ def spikes_flagSpektrumBased(
     **kwargs,
 ):
     """
-    This Function is a generalization of the Spectrum based Spike flagging mechanism as presented in:
-
-    Dorigo,W,.... Global Automated Quality Control of In Situ Soil Moisture Data from the international
-    Soil Moisture Network. 2013. Vadoze Zone J. doi:10.2136/vzj2012.0097.
 
     Function detects and flags spikes in input data series by evaluating its derivatives and applying some
     conditions to it. A datapoint is considered a spike, if:
 
     (1) the quotient to its preceeding datapoint exceeds a certain bound
-    (controlled by param "raise_factor")
+    (controlled by param `raise_factor`)
     (2) the quotient of the datas second derivate at the preceeding and subsequent timestamps is close enough to 1.
-    (controlled by param "deriv_factor")
+    (controlled by param `deriv_factor`)
     (3) the surrounding data is not too noisy. (Coefficient of Variation[+/- noise_window] < 1)
-    (controlled by param "noise_thresh")
-
-    Some things you should be conscious about when applying this test:
-
-       NOTE1: You should run less complex tests, especially range-tests, or absolute spike tests previously to this one,
-       since the spike check for any potential, unflagged spike, is relatively costly
-       (1 x smoothing + 2 x deviating + 2 x condition application).
-
-       NOTE2: Due to inconsistency in the paper that provided the concept of this test [paper:], its not really clear
-       weather to use the coefficient of variance or the relative variance for noise testing.
-       Since the relative variance was explicitly denoted in the formulas, the function defaults to relative variance,
-       but can be switched to coefficient of variance, by assignment to parameter "noise statistic".
-
-
-       :param data:                 The pandas dataframe holding the data-to-be flagged.
-                                    Data must be indexed by a datetime series and be harmonized onto a
-                                    time raster with seconds precision.
-       :param field:                Fieldname of the Soil moisture measurements field in data.
-       :param flagger:              A flagger - object. (saqc.flagger.X)
-       :param smooth_window:        Offset string. Size of the filter window, used to calculate the derivatives.
-                                    (relevant only, if: diff_method='savgol')
-       :param smooth_poly_deg:      Integer. Polynomial order, used for smoothing with savitzk golay filter.
-                                    (relevant only, if: diff_method='savgol')
-       :param raise_factor:         A float, determinating the bound, the quotient of two consecutive values
-                                    has to exceed, to be regarded as potentially spike. A value of 0.x will
-                                    trigger the spike test for value y_t, if:
-                                    (y_t)/(y_t-1) > 1 + x or:
-                                    (y_t)/(y_t-1) < 1 - x.
-       :param deriv_factor:         A float, determining the interval, the quotient of the datas second derivate
-                                    around a potential spike has to be part of, to trigger spike flagging for
-                                    this value. A datapoint y_t will pass this spike condition if,
-                                    for deriv_factor = 0.x, and the second derivative y'' of y, the condition:
-                                    1 - x < abs((y''_t-1)/(y''_t+1)) < 1 + x
-                                    holds
-       :param noise_thresh:         A float, determining the bound, the data noisy-ness around a potential spike
-                                    must not exceed, in order to guarantee a justifyed judgement:
-                                    Therefor the coefficient selected by parameter noise_func (COVA),
-                                    of all values within t +/- param "noise_window",
-                                    but excluding the point y_t itself, is evaluated and tested
-                                    for: COVA < noise_thresh.
-       :param noise_window:         Offset string, determining the size of the window, the coefficient of
-                                    variation is calculated of, to determine data noisy-ness around a potential
-                                    spike.
-                                    The potential spike y_t will be centered in a window of expansion:
-                                    [y_t - noise_window_size, y_t + noise_window_size].
-       :param noise_func:           String. Determines, wheather to use
-                                    "relative variance" or "coefficient of variation" to check against the noise
-                                    barrier.
-                                    'CoVar' -> "Coefficient of variation"
-                                    'rVar'  -> "relative Variance"
+    (controlled by param `noise_thresh`)
+
+    Note, that the data-to-be-flagged is supposed to be sampled at an equidistant frequency grid
+
+    Note, that the derivative is calculated after applying a Savitsky-Golay filter to the data.
+
+    Parameters
+    ----------
+
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    raise_factor : float, default 0.15
+        Minimum relative value difference between two values to consider the latter as a spike candidate.
+        See condition (1) (or reference [2]).
+    deriv_factor : float, default 0.2
+        See condition (2) (or reference [2]).
+    noise_func : {'CoVar', 'rVar'}, default 'CoVar'
+        Function to calculate noisiness of the data surrounding potential spikes.
+
+        * ``'CoVar'``: Coefficient of Variation
+        * ``'rVar'``: Relative Variance
+
+    noise_window : str, default '12h'
+        An offset string that determines the range of the time window of the "surrounding" data of a potential spike.
+        See condition (3) (or reference [2]).
+    noise_thresh : float, default 1
+        Upper threshold for noisiness of data surrounding potential spikes. See condition (3) (or reference [2]).
+    smooth_window : {None, str}, default None
+        Size of the smoothing window of the Savitsky-Golay filter.
+        The default value ``None`` results in a window of two times the sampling rate (i.e. containing three values).
+    smooth_poly_deg : int, default 2
+        Degree of the polynomial used for fitting with the Savitsky-Golay filter.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    References
+    ----------
+    This Function is a generalization of the Spectrum based Spike flagging mechanism as presented in:
+
+    [1] Dorigo, W. et al: Global Automated Quality Control of In Situ Soil Moisture
+        Data from the international Soil Moisture Network. 2013. Vadoze Zone J.
+        doi:10.2136/vzj2012.0097.
+
+    Notes
+    -----
+    A value is flagged a spike, if:
+
+    * The quotient to its preceding data point exceeds a certain bound:
+
+      * :math:`|\\frac{x_k}{x_{k-1}}| > 1 +` ``raise_factor``, or
+      * :math:`|\\frac{x_k}{x_{k-1}}| < 1 -` ``raise_factor``
+
+    * The quotient of the second derivative :math:`x''`, at the preceding
+      and subsequent timestamps is close enough to 1:
+
+      * :math:`|\\frac{x''_{k-1}}{x''_{k+1}} | > 1 -` ``deriv_factor``, and
+      * :math:`|\\frac{x''_{k-1}}{x''_{k+1}} | < 1 +` ``deriv_factor``
+
+    * The dataset :math:`X = x_i, ..., x_{k-1}, x_{k+1}, ..., x_j`, with
+      :math:`|t_{k-1} - t_i| = |t_j - t_{k+1}| =` ``noise_window`` fulfills the
+      following condition:
+
+      * ``noise_func``:math:`(X) <` ``noise_thresh``
+
     """
 
     dataseries, data_rate = retrieveTrustworthyOriginal(data, field, flagger)
@@ -619,3 +1141,91 @@ def spikes_flagSpektrumBased(
 
     flagger = flagger.setFlags(field, spikes.index, **kwargs)
     return data, flagger
+
+
+@register(masking='field')
+def spikes_flagGrubbs(data, field, flagger, winsz, alpha=0.05, min_periods=8, check_lagged=False, **kwargs):
+    """
+    The function flags values that are regarded outliers due to the grubbs test.
+
+    See reference [1] for more information on the grubbs tests definition.
+
+    The (two-sided) test gets applied onto data chunks of size "winsz". The tests application  will
+    be iterated on each data-chunk under test, till no more outliers are detected in that chunk.
+
+    Note, that the test performs poorely for small data chunks (resulting in heavy overflagging).
+    Therefor you should select "winsz" so that every window contains at least > 8 values and also
+    adjust the min_periods values accordingly.
+
+    Note, that the data to be tested by the grubbs test are expected to be distributed "normalish".
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, holding the data-to-be-flagged.
+    flagger : saqc.flagger.BaseFlagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    winsz : {int, str}
+        The size of the window you want to use for outlier testing. If an integer is passed, the size
+        refers to the number of periods of every testing window. If a string is passed, it has to be an offset string,
+        and will denote the total temporal extension of every window.
+    alpha : float, default 0.05
+        The level of significance, the grubbs test is to be performed at. (between 0 and 1)
+    min_periods : int, default 8
+        The minimum number of values that have to be present in an interval under test, for a grubbs test result to be
+        accepted. Only makes sence in case `winsz` is an offset string.
+    check_lagged: boolean, default False
+        If True, every value gets checked twice for being an outlier. Ones in the initial rolling window and one more
+        time in a rolling window that is lagged by half the windows delimeter (winsz/2). Recommended for avoiding false
+        positives at the window edges. Only available when rolling with integer defined window size.
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger.BaseFlagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    References
+    ----------
+    introduction to the grubbs test:
+
+    [1] https://en.wikipedia.org/wiki/Grubbs%27s_test_for_outliers
+
+    """
+
+    data = data.copy()
+    datcol = data[field]
+    to_group = pd.DataFrame(data={"ts": datcol.index, "data": datcol})
+    to_flag = pd.Series(False, index=datcol.index)
+    if isinstance(winsz, int):
+        # period number defined test intervals
+        grouper_series = pd.Series(data=np.arange(0, datcol.shape[0]), index=datcol.index)
+        grouper_series_lagged = grouper_series + (winsz / 2)
+        grouper_series = grouper_series.transform(lambda x: int(np.floor(x / winsz)))
+        grouper_series_lagged = grouper_series_lagged.transform(lambda x: int(np.floor(x / winsz)))
+        partitions = to_group.groupby(grouper_series)
+        partitions_lagged = to_group.groupby(grouper_series_lagged)
+    else:
+        # offset defined test intervals:
+        partitions = to_group.groupby(pd.Grouper(freq=winsz))
+    for _, partition in partitions:
+        if partition.shape[0] > min_periods:
+            detected = smirnov_grubbs.two_sided_test_indices(partition["data"].values, alpha=alpha)
+            detected = partition["ts"].iloc[detected]
+            to_flag[detected.index] = True
+
+    if check_lagged & isinstance(winsz, int):
+        to_flag_lagged = pd.Series(False, index=datcol.index)
+        for _, partition in partitions_lagged:
+            if partition.shape[0] > min_periods:
+                detected = smirnov_grubbs.two_sided_test_indices(partition["data"].values, alpha=alpha)
+                detected = partition["ts"].iloc[detected]
+                to_flag_lagged[detected.index] = True
+        to_flag = to_flag & to_flag_lagged
+
+    flagger = flagger.setFlags(field, loc=to_flag, **kwargs)
+    return data, flagger
diff --git a/saqc/lib/plotting.py b/saqc/lib/plotting.py
index ca0629b25b483cd007b6e14f5432af4bf11d0dcd..0a9ac3066895e89575a84232dbef171d85ca12f4 100644
--- a/saqc/lib/plotting.py
+++ b/saqc/lib/plotting.py
@@ -5,206 +5,745 @@ import logging
 
 import numpy as np
 import pandas as pd
-
+import dios
+import matplotlib.pyplot as plt
+from typing import List, Dict, Optional
 from saqc.flagger import BaseFlagger
 
 
-logger = logging.getLogger("SaQC")
+def __importHelper():
+    import matplotlib as mpl
+    from pandas.plotting import register_matplotlib_converters
+
+    # needed for datetime conversion
+    register_matplotlib_converters()
+
+    if not _interactive:
+        # Import plot libs without interactivity, if not needed.
+        # This ensures that we can produce an plot.png even if
+        # tkinter is not installed. E.g. if one want to run this
+        # on machines without X-Server aka. graphic interface.
+        mpl.use("Agg")
 
-__plotvars = []
 
-_colors = {
-    "unflagged": "silver",
-    "good": "seagreen",
-    "bad": "firebrick",
-    "suspicious": "gold",
+# global switches - use is read-only
+_interactive = True
+_figsize = (16, 9)
+_layout_data_to_table_ratio = [5, 1]
+_show_info_table = True
+
+# order is important, because
+# latter may overwrite former
+_cols = [
+    # data - not mutually distinct
+    "data",
+    "data-nans",
+    # flags - mutually distinct
+    "unflagged",
+    "good",
+    "suspicious",
+    "bad",
+    "flag-nans",  # currently ignored
+    # special flags - mutually distinct
+    "unchanged",
+    "changed",
+]
+
+_plotstyle: Dict[str, dict] = {
+    # flags
+    "unflagged": dict(marker=".", ls="none", c="silver", label="UNFLAGGED"),
+    "good": dict(marker=".", fillstyle="none", ls="none", c="seagreen", label="GOOD"),
+    "bad": dict(marker=".", fillstyle="none", ls="none", c="firebrick", label="BAD"),
+    "suspicious": dict(marker=".", fillstyle="none", ls="none", c="gold", label="SUSPICIOUS"),
+    "old-flags": dict(marker=".", fillstyle="none", ls="none", c="black", label="old-flags"),
+    # data
+    "data": dict(c="silver", ls="-", label="data"),
+    "data-nans": dict(marker=".", fillstyle="none", ls="none", c="lightsteelblue", label="NaN"),
 }
 
-_figsize = (10, 4)
+
+def _show():
+    if _interactive:
+        plt.show()
 
 
-def plotAllHook(data, flagger, plot_nans=False):
-    if __plotvars:
-        _plot(data, flagger, True, __plotvars, plot_nans=plot_nans)
+def plotAllHook(
+    data, flagger, targets=None, show_info_table: bool = True, annotations: Optional[dios.DictOfSeries] = None,
+):
+    __importHelper()
+    targets = flagger.flags.columns if targets is None else targets
+    _plotMultipleVariables(
+        data_old=None,
+        flagger_old=None,
+        data_new=data,
+        flagger_new=flagger,
+        targets=targets,
+        show_info_table=show_info_table,
+        annotations=annotations,
+    )
+    plt.tight_layout()
+    _show()
 
 
 def plotHook(
-    data: pd.DataFrame,
-    flagger_old: BaseFlagger,
+    data_old: Optional[dios.DictOfSeries],
+    data_new: dios.DictOfSeries,
+    flagger_old: Optional[BaseFlagger],
     flagger_new: BaseFlagger,
-    varname: str,
-    flag_test: str,
-    plot_nans: bool = False,
+    sources: List[str],
+    targets: List[str],
+    plot_name: str = "",
+    annotations: Optional[dios.DictOfSeries] = None,
 ):
+    assert len(targets) > 0
+    __importHelper()
+
+    args = dict(
+        data_old=data_old,
+        data_new=data_new,
+        flagger_old=flagger_old,
+        flagger_new=flagger_new,
+        targets=targets,
+        show_info_table=_show_info_table,
+        annotations=annotations,
+    )
 
-    # if data was harmonized, nans may occur in flags
-    harm_nans = flagger_new.getFlags(varname).isna() | flagger_old.getFlags(varname).isna()
-
-    # clean data from harmonisation nans
-    if harm_nans.any():
-        data = data.loc[~harm_nans, varname]
-
-    if isinstance(data, pd.Series):
-        data = data.to_frame()
-
-    # clean flags from harmonisation nans
-    try:
-        flagger_old = flagger_old.getFlagger(varname, loc=data.index)
-    except ValueError:
-        # this might fail if someone want to plot the harmonisation itself,
-        # but then we just plot the 'new' flags, ignoring the diff to the old ones
-        mask = True
+    if len(targets) == 1:
+        _plotSingleVariable(**args, sources=sources, show_reference_data=True, plot_name=plot_name)
     else:
-        flagger_new = flagger_new.getFlagger(varname, loc=data.index)
-
-        # cannot use getFlags here, because if a flag was set (e.g. with force) the
-        # flag may be the same, but any additional row (e.g. comment-field) would differ
-        flags_old = flagger_old._flags[varname]
-        flags_new = flagger_new._flags[varname]
-        if len(flags_old) != len(flags_new):
-            # NOTE:
-            # we are just getting the result of an
-            # harmonization, nothing to see here
-            return
-
-        mask = flags_old != flags_new
-        if isinstance(mask, pd.DataFrame):
-            mask = mask.any(axis=1)
+        _plotMultipleVariables(**args)
 
-    __plotvars.append(varname)
-    _plot(data, flagger_new, mask, varname, title=flag_test, plot_nans=plot_nans)
+    _show()
 
 
-def _plot(
-    data, flagger, flagmask, varname, interactive_backend=True, title="Data Plot", plot_nans=True,
+def _plotMultipleVariables(
+    data_old: Optional[dios.DictOfSeries],
+    data_new: dios.DictOfSeries,
+    flagger_old: Optional[BaseFlagger],
+    flagger_new: BaseFlagger,
+    targets: List[str],
+    show_info_table: bool = True,
+    annotations=None,
 ):
+    """
+    Plot data and flags for a multiple target-variables.
+
+    For each variable specified in targets a own plot is generated.
+    If specified, a table with quantity information is shown on the
+    right of each plot. If more than 4 vars are specified always
+    four plots are combined and shown in a single window (figure).
+    Nevertheless the x-axis between all figures are joint together.
+    This allows to still zoom or scroll all plots simultaneously.
+
+    Parameters
+    ----------
+    data_old
+        data from the good old times
+    data_new
+        current state of data
+    flagger_old
+        flagger that hold flags corresponding to data_old
+    flagger_new
+        flagger that hold flags corresponding to data_new
+    targets
+        a list of strings, each indicating a column in flagger_new.flags
+    show_info_table
+        Show a info-table on the right of reference-data and data or not
+
+    Returns
+    -------
+    None
+    """
+    show_tab = show_info_table
+    tlen = len(targets)
+    tgen = (t for t in targets)
+
+    nfig, ncols_rest = divmod(tlen, 4)
+    ncols = [4] * nfig
+    if ncols_rest:
+        nfig += 1
+        ncols += [ncols_rest]
+
+    gs_kw = dict(width_ratios=_layout_data_to_table_ratio)
+    layout = dict(
+        figsize=_figsize,
+        sharex=True,
+        tight_layout=True,
+        squeeze=False,
+        gridspec_kw=gs_kw if show_tab else {}
+    )
 
-    # todo: try catch warn (once) return
-    # only import if plotting is requested by the user
-    import matplotlib as mpl
-    import matplotlib.pyplot as plt
-    from pandas.plotting import register_matplotlib_converters
-
-    # needed for datetime conversion
-    register_matplotlib_converters()
-
-    if not interactive_backend:
-        # Import plot libs without interactivity, if not needed. This ensures that this can
-        # produce an plot.png even if tkinter is not installed. E.g. if one want to run this
-        # on machines without X-Server aka. graphic interface.
-        mpl.use("Agg")
-    else:
-        mpl.use("TkAgg")
+    # plot max. 4 plots per figure
+    allaxs = []
+    for n in range(nfig):
 
-    if not isinstance(varname, (list, set)):
-        varname = [varname]
-    varname = set(varname)
+        fig, axs = plt.subplots(nrows=ncols[n], ncols=2 if show_tab else 1, **layout)
 
-    # filter out variables to which no data is associated (e.g. freshly assigned vars)
-    tmp = []
-    for var in varname:
-        if var in data.columns:
-            tmp.append(var)
-        else:
-            logger.warning(f"Cannot plot column '{var}', because it is not present in data.")
-    if not tmp:
-        return
-    varnames = tmp
-
-    plots = len(varnames)
-    if plots > 1:
-        fig, axes = plt.subplots(plots, 1, sharex=True, figsize=_figsize)
-        axes[0].set_title(title)
-        for i, v in enumerate(varnames):
-            _plotByQualityFlag(data, v, flagger, flagmask, axes[i], plot_nans)
-    else:
-        fig, ax = plt.subplots(figsize=_figsize)
-        plt.title(title)
-        _plotByQualityFlag(data, varnames.pop(), flagger, flagmask, ax, plot_nans)
+        for ax in axs:
+            var = next(tgen)
+            tar, _ = _getDataFromVar(data_old, data_new, flagger_old, flagger_new, var)
 
-    # dummy plot for the label `missing` see _plotVline for more info
-    if plot_nans:
-        plt.plot([], [], ":", color="silver", label="missing data")
+            if show_tab:
+                plot_ax, tab_ax = ax
+                _plotInfoTable(tab_ax, tar, _plotstyle, len(tar["data"]))
+            else:
+                plot_ax = ax[0]
 
-    plt.xlabel("time")
-    plt.legend()
+            _plotFromDicts(plot_ax, tar, _plotstyle)
 
-    if interactive_backend:
-        plt.show()
+            if annotations is not None and var in annotations:
+                _annotate(plot_ax, tar, annotations[var])
 
+            plot_ax.set_title(str(var))
+            allaxs.append(plot_ax)
 
-def _plotByQualityFlag(data, varname, flagger, flagmask, ax, plot_nans):
-    ax.set_ylabel(varname)
+    # we join all x-axis together. Surprisingly
+    # this also works between different figures :D
+    ax0 = allaxs[0]
+    for ax in allaxs:
+        ax.get_shared_x_axes().join(ax, ax0)
+        ax.autoscale()
 
-    if flagmask is True:
-        flagmask = pd.Series(data=np.ones(len(data), dtype=bool), index=data.index)
 
-    data = data[varname]
-    if not plot_nans:
-        data = data.dropna()
-        flagmask = flagmask.loc[data.index]
+def simplePlot(
+    data: dios.DictOfSeries,
+    flagger: BaseFlagger,
+    field: str,
+    plot_name=None,
+    show_info_table: bool = True,
+    annotations=None,
+):
+    __importHelper()
+    _plotSingleVariable(
+        data_old=None,
+        data_new=data,
+        flagger_old=None,
+        flagger_new=flagger,
+        sources=[],
+        targets=[field],
+        show_reference_data=False,
+        show_info_table=show_info_table,
+        plot_name=plot_name or str(field),
+        annotations=annotations,
+    )
+    _show()
 
-    flagger = flagger.getFlagger(varname, loc=data.index)
 
-    # base plot: show all(!) data
-    ax.plot(
-        data,
-        # NOTE: no lines to flagged points
-        # data.index, np.ma.array(data.values, mask=flagger.isFlagged(varname).values),
-        "-",
-        color="silver",
-        label="data",
+def _plotSingleVariable(
+    data_old: dios.DictOfSeries,
+    data_new: dios.DictOfSeries,
+    flagger_old: BaseFlagger,
+    flagger_new: BaseFlagger,
+    sources: List[str],
+    targets: List[str],
+    show_reference_data=True,
+    show_info_table: bool = True,
+    plot_name="current data",
+    annotations=None,
+):
+    """
+    Plot data and flags for a single target-variable.
+
+    The resulting plot (the whole thing) can have up to 3 areas.
+
+    - The first **optional upper area** show up to 4 sources, if given.
+    - The **middle optional area** show the reference-plot, that show
+      the target variable in the state before the last test was run.
+      If specified, a table with quantity information is shown on the
+      right.
+    - The last **non-optional lower area**  shows the current data with
+      its flags. If specified, a table with quantity information is shown
+      on the right.
+
+    Parameters
+    ----------
+    data_old
+        data from the good old times
+    data_new
+        current state of data
+    flagger_old
+        flagger that hold flags corresponding to data_old
+    flagger_new
+        flagger that hold flags corresponding to data_new
+    sources
+        all sources that was used to change new to old
+    targets
+        a single(!) string that indicates flags in flagger_new.flags
+    show_reference_data
+        Show reference (aka. old) data, or not
+    show_info_table
+        Show a info-table on the right of reference-data and data or not
+    plot_name
+        The name of the data-plot
+
+    Returns
+    -------
+    None
+
+    """
+    assert len(targets) == 1
+    var = targets[0]
+    slen = len(sources)
+
+    curr, ref = _getDataFromVar(data_old, data_new, flagger_old, flagger_new, var)
+
+    show_ref = show_reference_data and ref is not None
+    show_tab = show_info_table
+    show_srces = slen > 0
+
+    nrows = 1
+    if show_ref:
+        nrows += 1
+    if show_srces:
+        nrows += 1
+        if slen > 4:
+            # possible future-fix: make own figure(s) with shared-x-axis for
+            # all sources. axis can be shared between figures !
+            logging.warning(f"plotting: only first 4 of {slen} sources are shown.")
+            slen = 4
+
+    fig = plt.figure(constrained_layout=True, figsize=_figsize,)
+    outer_gs = fig.add_gridspec(ncols=1, nrows=nrows)
+    gs_count = 0
+    allaxs = []
+
+    # plot srces
+    if show_srces:
+        srcs_gs_arr = outer_gs[gs_count].subgridspec(ncols=slen, nrows=1)
+        gs_count += 1
+        # NOTE: i implicit assume that all sources are available before the test run.
+        # if this ever fails, one could use data instead of ref. but i can't imagine
+        # any case, where this could happen -- bert.palm@ufz.de
+        for i, gs in enumerate(srcs_gs_arr):
+            ax = fig.add_subplot(gs)
+            v = sources[i]
+            _, src = _getDataFromVar(data_old, data_new, flagger_old, flagger_new, v)
+            _plotFromDicts(ax, src, _plotstyle)
+            ax.set_title(f"src{i + 1}: {v}")
+            allaxs.append(ax)
+
+    # plot reference data (the data as it was before the test)
+    if ref and show_ref:
+        ax = _plotDataWithTable(fig, outer_gs[gs_count], ref, show_tab=show_tab)
+        ax.set_title(f"Reference data (before the test)")
+        allaxs.append(ax)
+        gs_count += 1
+
+    # plot data
+    ax = _plotDataWithTable(fig, outer_gs[gs_count], curr, show_tab=show_tab)
+    ax.set_title(f"{plot_name}")
+    # also share y-axis with ref
+    if ref and show_ref:
+        ax.get_shared_y_axes().join(ax, allaxs[-1])
+    allaxs.append(ax)
+    gs_count += 1
+
+    if annotations is not None and var in annotations:
+        _annotate(ax, curr, annotations[var])
+
+    # share all x-axis
+    ax0 = allaxs[0]
+    for ax in allaxs:
+        ax.get_shared_x_axes().join(ax, ax0)
+        ax.autoscale()
+
+    # use all space
+    outer_gs.tight_layout(fig)
+
+
+def _getDataFromVar(
+    data_old: dios.DictOfSeries,
+    data_new: dios.DictOfSeries,
+    flagger_old: BaseFlagger,
+    flagger_new: BaseFlagger,
+    varname: str,
+):
+    """
+    Extract flag and data information and store them in separate pd.Series.
+
+    This is a helper that extract all relevant information from the flagger
+    and data and prepare those information, so it can be plotted easily.
+    This means, each information is stored in a separate pd.Series, whereby
+    its index is always a subset of the `data`-series index (which is always
+    be present). Also all info is projected to the y-coordinate of the data,
+    so plotting all info in the same plot, will result in a data-plot with
+    visible flags at the actual position.
+
+    Hard constrains:
+     0. var needs to be present in ``flagger_new.flags``
+     1. iff var is present in data_xxx, then var need to
+        be present in flags_xxx (``flagger_xxx.flags``)
+
+    Conditions:
+     2. if var is present in flags_new, but not in data_new, dummy-data is created
+     3. if var is present in data_old, (see also 1.) reference info is generated
+
+
+    Returns
+    -------
+    dict, {dict or None}
+        Returns two dictionaries, the first holds the infos corresponding
+        to the actual data and flags (from flagger_new), the second hold
+        the infos from the state before the last test run. The second is
+        ``None`` if condition 3. is not fulfilled.
+
+        Each dict have the following keys, and hold pd.Series as values:
+
+        - 'data': all data (with nan's if present) [3]
+        - 'data-nans': nan's projected on locations from interpolated data
+        - 'unflagged': flags that indicate unflagged [1][3]
+        - 'good':  flags that indicate good's [1][3]
+        - 'suspicious': flags that indicate suspicious'es [1][3]
+        - 'bad': flags that indicate bad's [1][3]
+        - 'flag-nans': nan's in flags [1][3]
+        - 'unchanged': flags that kept unchanged during the last test [2]
+        - 'changed': flags that did changed during the last test [2]
+
+        Series marked with [1] are completely distinct to others marked with [1],
+        and all [1]'s sum up to all flags, same apply for [2].
+        The series marked with [3] could be empty, if the infos are not present.
+        All infos are projected to the data locations.
+    """
+    var = varname
+    assert var in flagger_new.flags
+    flags_new: pd.Series = flagger_new.flags[var]
+    plotdict = _getPlotdict(data_new, flags_new, flagger_new, var)
+    ref_plotdict = None
+
+    # prepare flags
+    if flagger_old is not None and var in flagger_old.flags:
+        flags_old = flagger_old.flags[var]
+        ref_plotdict = _getPlotdict(data_old, flags_old, flagger_old, var)
+
+        # check flags-index changes:
+        # if we want to know locations, where the flags has changed between old and new,
+        # the index must match, otherwise, this could lead to wrong placed flags. Even
+        # though the calculations would work.
+        if flags_old.index.equals(flags_new.index):
+            unchanged, changed = _splitOldAndNew(flags_old, flags_new)
+            unchanged, changed = _projectFlagsOntoData([unchanged, changed], plotdict["data"])
+            plotdict["unchanged"] = unchanged
+            plotdict["changed"] = changed
+
+            # calculate old-flags and update flags, like BADs,
+            # to show only freshly new set values
+            unflagged = plotdict["unflagged"]
+            diff = unchanged.index.difference(unflagged.index)
+            plotdict["old-flags"] = unchanged.loc[diff]
+            for field in ["bad", "suspicious", "good"]:
+                data = plotdict[field]
+                isect = changed.index & data.index
+                plotdict[field] = data.loc[isect]
+
+    return plotdict, ref_plotdict
+
+
+def _getPlotdict(data: dios.DictOfSeries, flags: pd.Series, flagger, var):
+    """
+    Collect info and put them in a dict and creates dummy data if no data present.
+
+    The collected info include nan-data (projected to interpolated locations) and
+    flag-info for BAD, SUSP., GOOD, UNFLAGGED, and flag-nans. Except the flag-nans
+    all info is projected to the data-locations. E.g a BAD at the position N is
+    projected to the data's x- and y- location at the very same position.
+
+    Parameters
+    ----------
+    data: dios.DictOfSeries
+        holds the data. If data hold a series in `var` it is used,
+        otherwise a dummy series is created from flags.
+
+    flags: pd.Series
+        hold the flags.
+
+    flagger: saqc.Flagger
+        flagger object, used for get flaginfo via ``flagger.isFlagged()``
+
+    var: str
+        identifies the data-series in ``data`` that correspond to ``flags``
+
+    Returns
+    -------
+    dict
+        Returns a dictionary with the following keys, that hold pd.Series as values:
+
+        - 'data': all data (with nan's if present)
+        - 'data-nans': nan's projected on locations from interpolated data
+        - 'unflagged': flags that indicate unflagged [1]
+        - 'good':  flags that indicate good's [1]
+        - 'suspicious': flags that indicate suspicious'es [1]
+        - 'bad': flags that indicate bad's [1]
+        - 'flag-nans': nan's in flags [1]
+        - 'unchanged': flags that kept unchanged during the last test [2]
+        - 'changed': flags that did changed during the last test [2]
+
+        Flags marked with [1] are completely distinct, and sum up to all flags,
+        same apply for [2].
+
+    """
+    pdict = dios.DictOfSeries(columns=_cols)
+
+    # fill data
+    dat, nans = _getData(data, flags, var)
+    assert dat.index.equals(flags.index)
+    pdict["data"] = dat
+    pdict["data-nans"] = nans
+
+    # fill flags
+    tup = _splitByFlag(flags, flagger, var)
+    assert sum(map(len, tup)) == len(flags)
+    g, s, b, u, n = _projectFlagsOntoData(list(tup), dat)
+    pdict["good"] = g
+    pdict["suspicious"] = s
+    pdict["bad"] = b
+    pdict["unflagged"] = u
+    pdict["flag-nans"] = n
+
+    return pdict
+
+
+def _getData(data: dios.DictOfSeries, flags: pd.Series, var: str):
+    """
+    Get data from a dios or create a dummy data.
+
+    A pd.Series is taken from `data` by `var`. If the
+    data does not hold such series, a dummy series is
+    created from flags, which have no y-information.
+    If the series indeed was present, also the nan-location
+    are extracted and projected to interpolated locations
+    in data.
+
+    Returns
+    -------
+    pd.Series, pd.Series
+        the data-series and nan-locations
+    """
+    if var in data:
+        dat = data[var]
+        nans = dat.interpolate().loc[dat.isna()]
+    else:
+        # create dummy data
+        dat = pd.Series(0, index=flags.index)
+        nans = pd.Series([], index=pd.DatetimeIndex([]))
+    return dat, nans
+
+
+def _splitOldAndNew(old: pd.Series, new: pd.Series):
+    """
+    Split new in two distinct series of equality and non-equality with old.
+
+    Returns
+    -------
+        Two distinct series, one with locations, where the old and new data(!)
+        are equal (including nans at same positions), the other with the rest
+        of locations seen from new. This means, the rest marks locations, that
+        are present(!) in new, but its data differs from old.
+    """
+    idx = old.index & new.index
+    both_nan = old.loc[idx].isna() & new.loc[idx].isna()
+    mask = (new.loc[idx] == old[idx]) | both_nan
+    old_idx = mask[mask].index
+    new_idx = new.index.difference(old_idx)
+    return new.loc[old_idx], new.loc[new_idx]
+
+
+def _splitByFlag(flags: pd.Series, flagger, var: str):
+    """
+    Splits flags in the five distinct bins: GOOD, SUSPICIOUS, BAD, UNFLAGGED and NaNs.
+    """
+    n = flags.isna()
+    loc = flags.dropna().index
+    g = flagger.isFlagged(field=var, loc=loc, flag=flagger.GOOD, comparator="==")
+    b = flagger.isFlagged(field=var, loc=loc, flag=flagger.BAD, comparator="==")
+    u = flagger.isFlagged(field=var, loc=loc, flag=flagger.UNFLAGGED, comparator="==")
+    s = flagger.isFlagged(field=var, loc=loc, flag=flagger.BAD, comparator="<")
+    s = flagger.isFlagged(field=var, loc=loc, flag=flagger.GOOD, comparator=">") & s
+    return g[g], s[s], b[b], u[u], n[n]
+
+
+def _projectFlagsOntoData(idxlist: List[pd.Series], data: pd.Series):
+    """ Project flags to a xy-location, based on data. """
+    res = []
+    for item in idxlist:
+        res.append(data.loc[item.index])
+    return tuple(res)
+
+
+def _plotDataWithTable(fig, gs, pdict, show_tab=True):
+    """
+    Plot multiple series from a dict and optionally create a info table
+
+    Parameters
+    ----------
+    fig : matplotlib.figure
+        figure object to place the plot and info-table in
+
+    gs : matplotlib.GridSpec
+        gridspec object which is devided in two subgridspec's,
+        where the first will hold the plot the second the info-
+        table. If `show_tab` is False, the plot is directly
+        places in the given gridspec.
+
+    pdict: dict or dict-like
+        holds pd.Series with plotting-data.
+
+    show_tab : bool, default True
+        if True, show a table with quantity information of the data
+        if False, no table is shown
+
+    Returns
+    -------
+    matplotlib.Axes
+        the axes object from the plot
+
+    See Also
+    --------
+        _plotFromDicts()
+        _plotInfoTable()
+    """
+    if show_tab:
+        plot_gs, tab_gs = gs.subgridspec(ncols=2, nrows=1, width_ratios=_layout_data_to_table_ratio)
+        ax = fig.add_subplot(tab_gs)
+        _plotInfoTable(ax, pdict, _plotstyle, len(pdict["data"]))
+        ax = fig.add_subplot(plot_gs)
+    else:
+        ax = fig.add_subplot(gs)
+    _plotFromDicts(ax, pdict, _plotstyle)
+    return ax
+
+
+def _plotFromDicts(ax, plotdict, styledict):
+    """
+    Plot multiple data from a dict in the same plot.
+
+    Each data stored in the plot dict is added to
+    the very same axes (plot) with its own plot-
+    Parameters that come from the styledict. If a
+    key is not present in the styledict the
+    corresponding data is ignored.
+
+    Parameters
+    ----------
+    ax: matplotlib.Axes
+        axes object to add the plot to
+
+    plotdict: dict or dict-like
+        holds pd.Series with plotting-data.
+
+    styledict: dict
+        holds dicts of kwargs that will passed to plot.
+
+    Notes
+    -----
+     - changes the axes
+     - styledict and plotdict must have same keys
+
+    """
+    for field in plotdict:
+        data = plotdict[field]
+        style = styledict.get(field, False)
+        if style and len(data) > 0:
+            ax.plot(data, **style)
+
+
+def _annotate(ax, plotdict, txtseries: pd.Series):
+    for x, txt in txtseries.iteritems():
+        try:
+            y = plotdict['data'].loc[x]
+            if np.isnan(y):
+                y = plotdict['data-nans'].loc[x]
+        except KeyError:
+            continue
+        ax.annotate(txt, xy=(x, y), rotation=45)
+
+
+def _plotInfoTable(ax, plotdict, styledict, total):
+    """
+    Make a nice table with information about the quantity of elements.
+
+    Makes a table from data in plotdict, which indicated, how many
+    elements each series in data have. The count is show as number
+    and in percent from total.
+
+    Parameters
+    ----------
+    ax: matplotlib.Axes
+        axes object to add the table to
+
+    plotdict: dict or dict-like
+        holds pd.Series with plotting-data. only the length of the
+        series is evaluated.
+
+    styledict: dict
+        holds dicts of kwargs that can passed to plot. currently only
+        the `color`-kw (or just `c`) is evaluated.
+
+    total: int/float
+        total count used to calculate percentage
+
+    Returns
+    -------
+        instance of matplotlib.table
+
+    Notes
+    -----
+     changes the axes object
+
+    """
+    cols = ["color", "name", "[#]", "[%]"]
+    tab = pd.DataFrame(columns=cols)
+
+    # extract counts and color
+    for field in plotdict:
+        data = plotdict[field]
+        style = styledict.get(field, {})
+        color = style.get("color", None) or style.get("c", "white")
+        if total == 0:
+            length = percent = 0
+        else:
+            length = len(data)
+            percent = length / total * 100
+        tab.loc[len(tab), :] = [color, field, length, round(percent, 2)]
+
+    # nested list of cell-colors
+    ccs = np.full([len(tab.columns) - 1, len(tab)], fill_value="white", dtype=object)
+    ccs[0] = tab["color"]
+    del tab["color"]
+
+    # disable the plot as we just
+    # want to have the table
+    ax.axis("tight")
+    ax.axis("off")
+
+    # create and format layout
+    tab_obj = ax.table(
+        cellColours=ccs.transpose(),
+        cellText=tab.iloc[:, :].values,
+        colLabels=tab.columns[:],
+        colWidths=[0.4, 0.3, 0.3],
+        in_layout=True,
+        loc="center",
+        # make the table a bit smaller than the plot
+        bbox=[0.0, 0.1, 0.95, 0.8],
     )
 
-    # ANY OLD FLAG
-    # plot all(!) data that are already flagged in black
-    flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator=">=")
-    oldflags = flagged & ~flagmask
-
-    ax.plot(data[oldflags], ".", color="black", label="flagged by other test")
-    if plot_nans:
-        _plotNans(data[oldflags], "black", ax)
-
-    # now we just want to show data that was flagged
-    data = data.loc[flagmask[flagmask].index]
-    flagger = flagger.getFlagger(varname, loc=data.index)
-
-    if data.empty:
-        return
-
-    plots = [
-        (flagger.UNFLAGGED, _colors["unflagged"]),
-        (flagger.GOOD, _colors["good"]),
-        (flagger.BAD, _colors["bad"]),
-    ]
-
-    for flag, color in plots:
-        flagged = flagger.isFlagged(varname, flag=flag, comparator="==")
-        if not data[flagged].empty:
-            ax.plot(data[flagged], ".", color=color, label=f"flag: {flag}")
-        if plot_nans:
-            _plotNans(data[flagged], color, ax)
-
-    # plot SUSPICIOS
-    color = _colors["suspicious"]
-    flagged = flagger.isFlagged(varname, flag=flagger.GOOD, comparator=">")
-    flagged &= flagger.isFlagged(varname, flag=flagger.BAD, comparator="<")
-    if not data[flagged].empty:
-        ax.plot(
-            data[flagged], ".", color=color, label=f"{flagger.GOOD} < flag < {flagger.BAD}",
-        )
-    if plot_nans:
-        _plotNans(data[flagged], color, ax)
-
-
-def _plotNans(y, color, ax):
-    nans = y.isna()
-    _plotVline(ax, y[nans].index, color=color)
-
-
-def _plotVline(ax, points, color="blue"):
-    # workaround for ax.vlines() as this work unexpected
-    # normally this should work like so:
-    #   ax.vlines(idx, *ylim, linestyles=':', color='silver', label="missing")
-    for point in points:
-        ax.axvline(point, color=color, linestyle=":")
+    # Somehow the automatic font resizing doesen't work - the
+    # font only can ahrink, not rise. There was a issue [1] in
+    # matplotlib, but it is closed in favor of a new project [2].
+    # Nevertheless i wasn't able to integrate it. Also it seems
+    # that it also does **not** fix the problem, even though the
+    # Readme promise else. See here:
+    # [1] https://github.com/matplotlib/matplotlib/pull/14344
+    # [2] https://github.com/swfiua/blume/
+    # As a suitable workaround, we use a fixed font size.
+    tab_obj.auto_set_column_width(False)
+    tab_obj.auto_set_font_size(False)
+    tab_obj.set_fontsize(10)
+
+    # color fix - use white text color if background is dark
+    # sa: https://www.w3.org/TR/WCAG20/#relativeluminancedef
+    thresh = 0.5
+    for k, cell in tab_obj.get_celld().items():
+        r, g, b, a = cell.get_facecolor()
+        if 0.2126 * r + 0.7152 * g + 0.0722 * b < thresh:
+            cell.set_text_props(c="white")
diff --git a/saqc/lib/rolling.py b/saqc/lib/rolling.py
new file mode 100644
index 0000000000000000000000000000000000000000..4af3e7cb4e4f5c5272c4ee07b0f6f27f039abaf3
--- /dev/null
+++ b/saqc/lib/rolling.py
@@ -0,0 +1,401 @@
+#!/usr/bin/env python
+
+__author__ = "Bert Palm"
+__email__ = "bert.palm@ufz.de"
+__copyright__ = "Copyright 2020, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
+
+# We need to implement the
+# - calculation/skipping of min_periods,
+# because `calculate_center_offset` does ignore those and we cannot rely on rolling(min_periods), as
+# pointed out in customRoller. Also we need to implement
+# - centering of windows for fixed windows,
+# for variable windows this is not allowed (similar to pandas).
+# The close-param, for variable windows is already implemented in `calculate_center_offset`,
+# and we dont allow it for fixed windows (similar to pandas). We also want to
+# - fix the strange ramp-up behavior,
+# which occur if the window is shifted in the data but yet is not fully inside the data. In this
+# case we want to spit out nan's instead of results calculated by less than window-size many values.
+# This is slightly different than the min_periods parameter, because this mainly should control Nan-behavior
+# for fixed windows, and minimum needed observations (also excluding Nans) in a offset window, but should not apply
+# if window-size many values couldn't be even possible due to technical reasons. This is mainly because one
+# cannot know (except one knows the exact (and fixed) frequency) the number(!) of observations that can occur in a
+# given offset window. That's why rolling should spit out Nan's as long as the window is not fully shifted in the data.
+
+import numpy as np
+from typing import Union
+from pandas.api.types import is_integer, is_bool
+from pandas.api.indexers import BaseIndexer
+from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame
+from pandas.core.window.indexers import calculate_variable_window_bounds
+from pandas.core.window.rolling import Rolling, Window
+
+
+def is_slice(k): return isinstance(k, slice)
+
+
+class _CustomBaseIndexer(BaseIndexer):
+    is_datetimelike = None
+
+    def __init__(self, index_array, window_size, center=False, forward=False,
+                 expand=False, step=None, mask=None):
+        super().__init__()
+        self.index_array = index_array
+        self.window_size = window_size
+        self._center = center
+        self.forward = forward
+        self.expand = expand
+        self.step = step
+        self.skip = mask
+        self.validate()
+
+    def validate(self) -> None:
+        if self._center is None:
+            self._center = False
+        if not is_bool(self._center):
+            raise ValueError("center must be a boolean")
+        if not is_bool(self.forward):
+            raise ValueError("forward must be a boolean")
+        if not is_bool(self.expand):
+            raise ValueError("expand must be a boolean")
+
+        if is_integer(self.step) or self.step is None:
+            self.step = slice(None, None, self.step or None)
+        if not is_slice(self.step):
+            raise TypeError('step must be integer or slice.')
+        if self.step == slice(None):
+            self.step = None
+
+        if self.skip is not None:
+            if len(self.index_array) != len(self.skip):
+                raise ValueError('mask must have same length as data to roll over.')
+            self.skip = np.array(self.skip)
+            if self.skip.dtype != bool:
+                raise TypeError('mask must have boolean values only.')
+            self.skip = ~self.skip
+
+    def get_window_bounds(self, num_values=0, min_periods=None, center=None, closed=None):
+        if min_periods is None:
+            assert self.is_datetimelike is False
+            min_periods = 1
+
+        # if one call us directly, one may pass a center value we should consider.
+        # pandas instead (via customRoller) will always pass None and the correct
+        # center value is set in __init__. This is because pandas cannot center on
+        # dt_like windows and would fail before even call us.
+        if center is None:
+            center = self._center
+
+        start, end = self._get_bounds(num_values, min_periods, center, closed)
+
+        # ensure correct length
+        start, end = start[:num_values], end[:num_values]
+
+        start, end = self._apply_skipmask(start, end)
+        start, end = self._apply_steps(start, end, num_values)
+        start, end = self._prepare_min_periods_masking(start, end, num_values)
+        return start, end
+
+    def _prepare_min_periods_masking(self, start, end, num_values):
+        # correction for min_periods calculation
+        end[end > num_values] = num_values
+
+        # this is the same as .rolling will do, so leave the work to them ;)
+        # additional they are able to count the nans in each window, we couldn't.
+        # end[end - start < self.min_periods] = 0
+        return start, end
+
+    def _get_center_window_sizes(self, center, winsz):
+        ws1 = ws2 = winsz
+        if center:
+            # centering of dtlike windows is just looking left and right
+            # with half amount of window-size
+            ws2, ws1 = divmod(winsz, 2)
+            ws1 += ws2
+            if self.forward:
+                ws1, ws2 = ws2, ws1
+        return ws1, ws2
+
+    def _apply_skipmask(self, start, end):
+        if self.skip is not None:
+            end[self.skip] = 0
+        return start, end
+
+    def _apply_steps(self, start, end, num_values):
+        if self.step is not None:
+            m = np.full(num_values, 1)
+            m[self.step] = 0
+            m = m.astype(bool)
+            end[m] = 0
+        return start, end
+
+    def _get_bounds(self, num_values=0, min_periods=None, center=False, closed=None):
+        raise NotImplementedError
+
+
+class _FixedWindowDirectionIndexer(_CustomBaseIndexer):
+    # automatically added in super call to init
+    index_array: np.array
+    window_size: int
+    # set here
+    is_datetimelike = False
+
+    def _get_bounds(self, num_values=0, min_periods=None, center=False, closed=None):
+        # closed is always ignored and handled as 'both' other cases not implemented
+        offset = 0
+        if center:
+            offset = (self.window_size - 1) // 2
+        num_values += offset
+
+        if self.forward:
+            start, end = self._fw(num_values, offset)
+        else:
+            start, end = self._bw(num_values, offset)
+
+        if center:
+            start, end = self._center_result(start, end, offset)
+            num_values -= offset
+
+        if not self.expand:
+            start, end = self._remove_ramps(start, end, center)
+
+        return start, end
+
+    def _center_result(self, start, end, offset):
+        # cut N values at the front that was inserted in _fw()
+        # or cut N values at the end if _bw()
+        if offset > 0:
+            if self.forward:
+                start = start[:-offset]
+                end = end[:-offset]
+            else:
+                start = start[offset:]
+                end = end[offset:]
+        return start, end
+
+    def _remove_ramps(self, start, end, center):
+        fw, bw = self.forward, not self.forward
+        ramp_l, ramp_r = self._get_center_window_sizes(center, self.window_size - 1)
+        if center:
+            fw = bw = True
+
+        if bw and ramp_l > 0:
+            end[:ramp_l] = 0
+        if fw and ramp_r > 0:
+            end[-ramp_r:] = 0
+
+        return start, end
+
+    def _bw(self, num_values=0, offset=0):
+        start = np.arange(-self.window_size, num_values + offset, dtype="int64") + 1
+        end = start + self.window_size
+        start[:self.window_size] = 0
+        return start, end
+
+    def _fw(self, num_values=0, offset=0):
+        start = np.arange(-offset, num_values, dtype="int64")
+        end = start + self.window_size
+        start[:offset] = 0
+        return start, end
+
+
+class _VariableWindowDirectionIndexer(_CustomBaseIndexer):
+    # automatically added in super call to init
+    index_array: np.array
+    window_size: int
+    # set here
+    is_datetimelike = True
+
+    def _get_bounds(self, num_values=0, min_periods=None, center=False, closed=None):
+        ws_bw, ws_fw = self._get_center_window_sizes(center, self.window_size)
+        if center:
+            c1 = c2 = closed
+            if closed == 'neither':
+                c1, c2 = 'right', 'left'
+
+            start, _ = self._bw(num_values, ws_bw, c1)
+            _, end = self._fw(num_values, ws_fw, c2)
+
+        elif self.forward:
+            start, end = self._fw(num_values, ws_fw, closed)
+        else:
+            start, end = self._bw(num_values, ws_bw, closed)
+
+        if not self.expand:
+            start, end = self._remove_ramps(start, end, center)
+
+        return start, end
+
+    def _remove_ramps(self, start, end, center):
+        ws_bw, ws_fw = self._get_center_window_sizes(center, self.window_size)
+
+        if center or not self.forward:
+            # remove (up) ramp
+            # we dont want this: [1,1,1,1,1].rolling(window='2min').sum() -> [1,   2,   3, 3, 3]
+            # instead we want:   [1,1,1,1,1].rolling(window='2min').sum() -> [nan, nan, 3, 3, 3]
+            tresh = self.index_array[0] + ws_bw
+            mask = self.index_array < tresh
+            end[mask] = 0
+
+        if center or self.forward:
+            # remove (down) ramp
+            # we dont want this: [1,1,1,1,1].rolling(window='2min', forward=True).sum() -> [3, 3, 3,  2,  1  ]
+            # instead we want:   [1,1,1,1,1].rolling(window='2min', forward=True).sum() -> [3, 3, 3, nan, nan]
+            tresh = self.index_array[-1] - ws_fw
+            mask = self.index_array > tresh
+            end[mask] = 0
+
+        return start, end
+
+    def _bw(self, num_values, window_size, closed):
+        arr = self.index_array
+        start, end = calculate_variable_window_bounds(num_values, window_size, None, None, closed, arr)
+        return start, end
+
+    def _fw(self, num_values, window_size, closed):
+        arr = self.index_array[::-1]
+        s, _ = calculate_variable_window_bounds(num_values, window_size, None, None, closed, arr)
+        start = np.arange(num_values)
+        end = num_values - s[::-1]
+
+        if closed in ['left', 'neither']:
+            start += 1
+        return start, end
+
+
+def customRoller(obj, window, min_periods=None,  # aka minimum non-nan values
+                 center=False, win_type=None, on=None, axis=0, closed=None,
+                 forward=False, expand=True, step=None, mask=None) -> Union[Rolling, Window]:
+    """
+    A custom rolling implementation, using pandas as base.
+
+    Parameters
+    ----------
+    obj : pd.Series (or pd.DataFrame)
+        The object to roll over. DataFrame is currently still experimental.
+
+    window : int or offset
+        Size of the moving window. This is the number of observations used for calculating the statistic.
+        Each window will be a fixed size.
+        If its an offset then this will be the time period of each window. Each window will be a variable sized
+        based on the observations included in the time-period. This is only valid for datetimelike indexes.
+
+    min_periods : int, default None
+        Minimum number of observations in window required to have a value (otherwise result is NA).
+        For a window that is specified by an offset, min_periods will default to 1. Otherwise, min_periods
+        will default to the size of the window.
+
+    center : bool, default False
+        Set the labels at the center of the window. Also works for offset-based windows (in contrary to pandas).
+
+    win_type : str, default None
+        Not implemented. Raise NotImplementedError if not None.
+
+    on : str, optional
+        For a DataFrame, a datetime-like column or MultiIndex level on which to calculate the rolling window,
+        rather than the DataFrame’s index. Provided integer column is ignored and excluded from result since
+        an integer index is not used to calculate the rolling window.
+    
+    axis : int or str, default 0
+
+    closed : str, default None
+        Make the interval closed on the ‘right’, ‘left’, ‘both’ or ‘neither’ endpoints. For offset-based windows,
+        it defaults to ‘right’. For fixed windows, defaults to ‘both’. Remaining cases not implemented for fixed
+        windows.
+
+    forward : bool, default False
+        By default a window is 'looking' backwards (in time). If True the window is looking forward in time.
+
+    expand : bool, default True
+        If True the window expands/shrink up to its final window size while shifted in the data or shifted out
+        respectively.
+        For (normal) backward-windows it only expands at the left border, for forward-windows it shrinks on
+        the right border and for centered windows both apply.
+
+        Also bear in mind that even if this is True, an many as `min_periods` values are necessary to get a
+        valid value, see there for more info.
+
+
+    step : int, slice or None, default None
+        If given, only every n'th step a window is calculated starting from the very first. One can
+        give a slice if one want to start from eg. the second (`slice(2,None,n)`) or similar.
+
+    mask : boolean array-like
+        Only calculate the window if the mask is True, otherwise skip it.
+
+    Returns
+    -------
+    a Window or Rolling sub-classed for the particular operation
+
+
+    Notes
+    -----
+    If for some reason the start and end numeric indices of the window are needed, one can call
+    `start, end = customRoller(obj, window).window.get_window_bounds(num_values, min_periods)`,
+    which return two np.arrays, that are holding the start and end indices. Fill at least all
+    parameter which are shown in the example.
+
+    See Also
+    --------
+    pandas.Series.rolling
+    pandas.DataFrame.rolling
+    """
+    num_params = len(locals()) - 2  # do not count window and obj
+    if not isinstance(obj, (ABCSeries, ABCDataFrame)):
+        raise TypeError(f"invalid type: {type(obj)}")
+
+    # center is the only param from the pandas rolling implementation
+    # that we advance, namely we allow center=True on dt-indexed data
+    # that's why we take it as ours
+    theirs = dict(min_periods=min_periods, win_type=win_type, on=on, axis=axis, closed=closed)
+    ours = dict(center=center, forward=forward, expand=expand, step=step, mask=mask)
+    assert len(theirs) + len(ours) == num_params, "not all params covert (!)"
+
+    # use .rolling to do all the checks like if closed is one of [left, right, neither, both],
+    # closed not allowed for integer windows, index is monotonic (in- or decreasing), if freq-based
+    # windows can be transformed to nanoseconds (eg. fails for `1y` - it could have 364 or 365 days), etc.
+    # Also it converts window and the index to numpy-arrays (so we don't have to do it :D).
+    try:
+        x = obj.rolling(window, **theirs)
+    except Exception:
+        raise
+
+    indexer = _VariableWindowDirectionIndexer if x.is_freq_type else _FixedWindowDirectionIndexer
+    indexer = indexer(index_array=x._on.asi8, window_size=x.window, **ours)
+
+    # Centering is fully done in our own indexers. So we do not pass center to rolling(). Especially because
+    # we also allow centering on dt-based indexes. Also centering would fail in forward windows, because of
+    # pandas internal centering magic (append nans at the end of array, later cut values from beginning of the
+    # result).
+    # min_periods is also quite tricky. Especially if None is passed. For dt-based windows min_periods defaults to 1
+    # and is set during rolling setup (-> if r=obj.rolling() is called). For numeric windows instead, it keeps None
+    # during setup and defaults to indexer.window_size if a rolling-method is called (-> if r.sum()). Thats a bit
+    # odd and quite hard to find. So we are good if we pass the already calculated x.min_periods as this will just
+    # hold the correct initialised or not initialised value. (It gets even trickier if one evaluates which value is
+    # actually passed to the function that actually thrown them out; i leave that to the reader to find out. start
+    # @ pandas.core.window.rolling:_Window._apply)
+    # Lastly, it is necessary to pass min_periods at all (!) and do not set it to a fix value (1, 0, None,...). This
+    # is, because we cannot throw out values by ourself in the indexer, because min_periods also evaluates NA values
+    # in its count and we have no control over the actual values, just their indexes.
+    theirs.update(min_periods=x.min_periods)
+    roller = obj.rolling(indexer, center=None, **theirs)
+
+    # ----- count hack -------
+    # Unfortunately pandas calls count differently if a BaseIndexer
+    # instance is given. IMO, the intention behind this is to call
+    # count different for dt-like windows, but if a user pass a own
+    # indexer we also end up in this case /:
+    # The only possibility is to monkey-patch pandas...
+    def new_count():
+        self = roller
+        if not x.is_freq_type:
+            obj_new = obj.notna().astype(int)
+            if min_periods is None:
+                theirs.update(min_periods=0)
+            return obj_new.rolling(indexer, center=None, **theirs).sum()
+        return self._old_count()
+
+    roller._old_count = roller.count
+    roller.count = new_count
+    # ----- count hack -------
+
+    return roller
diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py
index d0d81bb4cf2f5f7f25f15f56540eeb4be65439ba..3cbe5ab766a7bdb0f58324307d36a9981d7f98a6 100644
--- a/saqc/lib/tools.py
+++ b/saqc/lib/tools.py
@@ -1,81 +1,25 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import re
 from typing import Sequence, Union, Any, Iterator
 
+import itertools
 import numpy as np
-import pandas as pd
 import numba as nb
-import saqc.lib.ts_operators as ts_ops
-import scipy
-from functools import reduce, partial
-from saqc.lib.types import T, PandasLike
-
-SAQC_OPERATORS = {
-    "exp": np.exp,
-    "log": np.log,
-    "sum": np.sum,
-    "var": np.var,
-    "std": np.std,
-    "mean": np.mean,
-    "median": np.median,
-    "min": np.min,
-    "max": np.max,
-    "first": pd.Series(np.nan, index=pd.DatetimeIndex([])).resample("0min").first,
-    "last": pd.Series(np.nan, index=pd.DatetimeIndex([])).resample("0min").last,
-    "delta_t": ts_ops.deltaT,
-    "id": ts_ops.identity,
-    "diff": ts_ops.difference,
-    "relDiff": ts_ops.relativeDifference,
-    "deriv": ts_ops.derivative,
-    "roc": ts_ops.rateOfChange,
-    "scale": ts_ops.scale,
-    "normScale": ts_ops.normScale,
-}
-
-
-OP_MODULES = {"pd": pd, "np": np, "scipy": scipy}
-
-
-def evalFuncString(func_string):
-    if not isinstance(func_string, str):
-        return func_string
-    module_dot = func_string.find(".")
-    first, *rest = func_string.split(".")
-    if rest:
-        module = func_string[:module_dot]
-        try:
-            return reduce(lambda m, f: getattr(m, f), rest, OP_MODULES[first])
-        except KeyError:
-            availability_list = [f"'{k}' (= {s.__name__})" for k, s in OP_MODULES.items()]
-            availability_list = " \n".join(availability_list)
-            raise ValueError(
-                f'The external-module alias "{module}" is not known to the internal operators dispatcher. '
-                f"\n Please select from: \n{availability_list}"
-            )
-
-    else:
-        if func_string in SAQC_OPERATORS:
-            return SAQC_OPERATORS[func_string]
-        else:
-            availability_list = [f"'{k}' (= {s.__name__})" for k, s in SAQC_OPERATORS.items()]
-            availability_list = " \n".join(availability_list)
-            raise ValueError(
-                f'The external-module alias "{func_string}" is not known to the internal operators '
-                f"dispatcher. \n Please select from: \n{availability_list}"
-            )
-
+import pandas as pd
+from scipy import fft
+import logging
+import dios
 
-def composeFunction(functions):
-    if callable(functions):
-        return functions
-    functions = toSequence(functions)
-    functions = [evalFuncString(f) for f in functions]
+import collections
+from scipy.cluster.hierarchy import linkage, fcluster
+from saqc.lib.types import T
 
-    def composed(ts, funcs=functions):
-        return reduce(lambda x, f: f(x), reversed(funcs), ts)
+# keep this for external imports
+from saqc.lib.rolling import customRoller
 
-    return partial(composed, funcs=functions)
+logger = logging.getLogger("SaQC")
 
 
 def assertScalar(name, value, optional=False):
@@ -134,9 +78,10 @@ def slidingWindowIndices(dates, window_size, iter_delta=None):
       relying on the size of the window (sum, mean, median)
     """
 
+    if not isinstance(dates, pd.DatetimeIndex):
+        raise TypeError("Must pass pd.DatetimeIndex")
+
     # lets work on numpy data structures for performance reasons
-    if isinstance(dates, (pd.DataFrame, pd.Series)):
-        dates = dates.index
     dates = np.array(dates, dtype=np.int64)
 
     if np.any(np.diff(dates) <= 0):
@@ -167,29 +112,13 @@ def slidingWindowIndices(dates, window_size, iter_delta=None):
         start_date = dates[start_idx]
 
 
-def inferFrequency(data: PandasLike) -> pd.DateOffset:
+def inferFrequency(data: pd.Series) -> pd.DateOffset:
     return pd.tseries.frequencies.to_offset(pd.infer_freq(data.index))
 
 
-def combineDataFrames(left: pd.DataFrame, right: pd.DataFrame, fill_value: float = np.nan) -> pd.DataFrame:
-    """
-    Combine the given DataFrames 'left' and 'right' such that, the
-    output is union of the indices and the columns of both. In case
-    of duplicated values, 'left' is overwritten by 'right'
-    """
-    combined = left.reindex(
-        index=left.index.union(right.index),
-        columns=left.columns.union(right.columns, sort=False),
-        fill_value=fill_value,
-    )
-
-    for key, values in right.iteritems():
-        combined.loc[right.index, key] = values
-
-    return combined
-
-
-def retrieveTrustworthyOriginal(data: pd.DataFrame, field: str, flagger=None, level: Any = None) -> pd.DataFrame:
+def retrieveTrustworthyOriginal(
+    data: dios.DictOfSeries, field: str, flagger=None, level: Any = None
+) -> dios.DictOfSeries:
     """Columns of data passed to the saqc runner may not be sampled to its original sampling rate - thus
     differenciating between missng value - nans und fillvalue nans is impossible.
 
@@ -247,108 +176,108 @@ def offset2seconds(offset):
     return pd.Timedelta.total_seconds(pd.Timedelta(offset))
 
 
-def flagWindow(flagger_old, flagger_new, field, direction="fw", window=0, **kwargs) -> pd.Series:
-
-    if window == 0 or window == "":
-        return flagger_new
-
-    fw, bw = False, False
-    mask = flagger_old.getFlags(field) != flagger_new.getFlags(field)
-    f = flagger_new.isFlagged(field) & mask
-
-    if not mask.any():
-        # nothing was flagged, so nothing need to be flagged additional
-        return flagger_new
-
-    if isinstance(window, int):
-        x = f.rolling(window=window + 1).sum()
-        if direction in ["fw", "both"]:
-            fw = x.fillna(method="bfill").astype(bool)
-        if direction in ["bw", "both"]:
-            bw = x.shift(-window).fillna(method="bfill").astype(bool)
-    else:
-        # time-based windows
-        if direction in ["bw", "both"]:
-            # todo: implement time-based backward rolling
-            raise NotImplementedError
-        fw = f.rolling(window=window, closed="both").sum().astype(bool)
-
-    fmask = bw | fw
-    return flagger_new.setFlags(field, fmask, **kwargs)
-
-
-def sesonalMask(dtindex, month0=1, day0=1, month1=12, day1=None):
+def seasonalMask(dtindex, season_start, season_end, include_bounds):
+    """
+    This function generates date-periodic/seasonal masks from an index passed.
+
+    For example you could mask all the values of an index, that are sampled in winter, or between 6 and 9 o'clock.
+    See the examples section for more details.
+
+    Parameters
+    ----------
+    dtindex : pandas.DatetimeIndex
+        The index according to wich you want to generate a mask.
+        (=resulting mask will be indexed with 'dtindex')
+    season_start : str
+        String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
+        Has to be of same length as `season_end` parameter.
+        See examples section below for some examples.
+    season_end : str
+        String denoting starting point of every period. Formally, it has to be a truncated instance of "mm-ddTHH:MM:SS".
+        Has to be of same length as `season_end` parameter.
+        See examples section below for some examples.
+    include_bounds : boolean
+        Wheather or not to include the mask defining bounds to the mask.
+
+    Returns
+    -------
+    to_mask : pandas.Series[bool]
+        A series, indexed with the input index and having value `True` for all the values that are to be masked.
+
+    Examples
+    --------
+    The `season_start` and `season_end` parameters provide a conveniant way to generate seasonal / date-periodic masks.
+    They have to be strings of the forms: "mm-ddTHH:MM:SS", "ddTHH:MM:SS" , "HH:MM:SS", "MM:SS" or "SS"
+    (mm=month, dd=day, HH=hour, MM=minute, SS=second)
+    Single digit specifications have to be given with leading zeros.
+    `season_start` and `seas   on_end` strings have to be of same length (refer to the same periodicity)
+    The highest date unit gives the period.
+    For example:
+
+    >>> season_start = "01T15:00:00"
+    >>> season_end = "13T17:30:00"
+
+    Will result in all values sampled between 15:00 at the first and  17:30 at the 13th of every month get masked
+
+    >>> season_start = "01:00"
+    >>> season_end = "04:00"
+
+    All the values between the first and 4th minute of every hour get masked.
+
+    >>> season_start = "01-01T00:00:00"
+    >>> season_end = "01-03T00:00:00"
+
+    Mask january and february of evcomprosed in theery year. masking is inclusive always, so in this case the mask will
+    include 00:00:00 at the first of march. To exclude this one, pass:
+
+    >>> season_start = "01-01T00:00:00"
+    >>> season_end = "02-28T23:59:59"
+
+    To mask intervals that lap over a seasons frame, like nights, or winter, exchange sequence of season start and
+    season end. For example, to mask night hours between 22:00:00 in the evening and 06:00:00 in the morning, pass:
+
+    >>> season_start = "22:00:00"
+    >>> season_end = "06:00:00"
+
+    When inclusive_selection="season", all above examples work the same way, only that you now
+    determine wich values NOT TO mask (=wich values are to constitute the "seasons").
     """
-    This function provide a mask for a sesonal time range in the given dtindex.
-    This means the interval is applied again on every year and even over the change of a year.
-    Note that both edges are inclusive.
+    def _replaceBuilder(stamp):
+        keys = ("second", "minute", "hour", "day", "month", "year")
+        stamp_list = map(int, re.split(r"[-T:]", stamp)[::-1])
+        stamp_kwargs = dict(zip(keys, stamp_list))
 
-    Examples:
-        sesonal(dtindex, 1, 1, 3, 1)  -> [jan-mar]
-        sesonal(dtindex, 8, 1, 8, 15) -> [1.aug-15.aug]
+        def _replace(index):
+            if "day" in stamp_kwargs:
+                stamp_kwargs["day"] = min(stamp_kwargs["day"], index[0].daysinmonth)
 
+            out = index[0].replace(**stamp_kwargs)
+            return out.strftime("%Y-%m-%dT%H:%M:%S")
 
-    This also works, if the second border is smaller then the first
+        return _replace
 
-    Examples:
-        sesonal(dtindex, 10, 1, 2, 1) -> [1.nov-1.feb (following year)]
-        sesonal(dtindex, 1, 10, 1, 1)  -> [10.jan-1.jan(following year)] like everything except ]1.jan-10.jan[
+    mask = pd.Series(include_bounds, index=dtindex)
 
-    """
-    if day1 is None:
-        day1 = 31 if month1 in [1, 3, 5, 7, 8, 10, 12] else 29 if month1 == 2 else 30
-
-    # test plausibility of date
-    try:
-        f = "%Y-%m-%d"
-        t0 = pd.to_datetime(f"2001-{month0}-{day0}", format=f)
-        t1 = pd.to_datetime(f"2001-{month1}-{day1}", format=f)
-    except ValueError:
-        raise ValueError("Given datelike parameter not logical")
-
-    # swap
-    if t1 < t0:
-        # we create the same mask as we would do if not inverted
-        # but the borders need special treatment..
-        # ===end]....................[start====
-        # ======]end+1........start-1[=========
-        # ......[end+1========start-1]......... + invert
-        # ......[start`========= end`]......... + invert
-        t0 -= pd.to_timedelta("1d")
-        t1 += pd.to_timedelta("1d")
-        invert = True
-        # only swap id condition is still true
-        t0, t1 = t1, t0 if t1 < t0 else (t0, t1)
-
-        month0, day0 = t0.month, t0.day
-        month1, day1 = t1.month, t1.day
-    else:
-        invert = False
+    start_replacer = _replaceBuilder(season_start)
+    end_replacer = _replaceBuilder(season_end)
 
-    month = [m for m in range(month0, month1 + 1)]
-
-    # make a mask for [start:end]
-    mask = dtindex.month.isin(month)
-    if day0 > 1:
-        exclude = [d for d in range(1, day0)]
-        mask &= ~(dtindex.month.isin([month0]) & dtindex.day.isin(exclude))
-    if day1 < 31:
-        exclude = [d for d in range(day1 + 1, 31 + 1)]
-        mask &= ~(dtindex.month.isin([month1]) & dtindex.day.isin(exclude))
-
-    if invert:
-        return ~mask
+    if pd.Timestamp(start_replacer(dtindex)) <= pd.Timestamp(end_replacer(dtindex)):
+        def _selector(x, base_bool=include_bounds):
+            x[start_replacer(x.index):end_replacer(x.index)] = not base_bool
+            return x
     else:
-        return mask
+        def _selector(x, base_bool=include_bounds):
+            x[:end_replacer(x.index)] = not base_bool
+            x[start_replacer(x.index):] = not base_bool
+            return x
 
+    freq = '1' + 'mmmhhhdddMMMYYY'[len(season_start)]
+    return mask.groupby(pd.Grouper(freq=freq)).transform(_selector)
 
-def assertDataFrame(df: Any, argname: str = "arg", allow_multiindex: bool = True) -> None:
-    if not isinstance(df, pd.DataFrame):
-        raise TypeError(f"{argname} must be of type pd.DataFrame, {type(df)} was given")
-    if not allow_multiindex:
-        assertSingleColumns(df, argname)
-    if not df.columns.is_unique:
-        raise TypeError(f"{argname} must have unique columns")
+
+def assertDictOfSeries(df: Any, argname: str = "arg") -> None:
+    if not isinstance(df, dios.DictOfSeries):
+        raise TypeError(f"{argname} must be of type dios.DictOfSeries, {type(df)} was given")
 
 
 def assertSeries(srs: Any, argname: str = "arg") -> None:
@@ -356,45 +285,6 @@ def assertSeries(srs: Any, argname: str = "arg") -> None:
         raise TypeError(f"{argname} must be of type pd.Series, {type(srs)} was given")
 
 
-def assertPandas(pdlike: PandasLike, argname: str = "arg", allow_multiindex: bool = True) -> None:
-    if not isinstance(pdlike, pd.Series) and not isinstance(pdlike, pd.DataFrame):
-        raise TypeError(f"{argname} must be of type pd.DataFrame or pd.Series, {type(pdlike)} was given")
-    if not allow_multiindex:
-        assertSingleColumns(pdlike, argname)
-
-
-def assertMultiColumns(dfmi: pd.DataFrame, argname: str = "") -> None:
-    assertDataFrame(dfmi, argname, allow_multiindex=True)
-    if not isinstance(dfmi.columns, pd.MultiIndex):
-        raise TypeError(
-            f"given pd.DataFrame ({argname}) need to have a muliindex on columns, "
-            f"instead it has a {type(dfmi.columns)}"
-        )
-
-
-def assertSingleColumns(df: PandasLike, argname: str = "") -> None:
-    if isinstance(df, pd.DataFrame) and isinstance(df.columns, pd.MultiIndex):
-        raise TypeError(f"given pd.DataFrame {argname} is not allowed to have a muliindex on columns")
-
-
-def getFuncFromInput(func):
-    """
-    Aggregation functions passed by the user, are selected by looking them up in the STRING_2_DICT dictionary -
-    But since there are wrappers, that dynamically generate aggregation functions and pass those on ,the parameter
-    interfaces must as well be capable of processing real functions passed. This function does that.
-
-    :param func: A key to the STRING_2_FUNC dict, or an actual function
-    """
-    # if input is a callable - than just pass it:
-    if hasattr(func, "__call__"):
-        if (func.__name__ == "aggregator") & (func.__module__ == "saqc.funcs.harm_functions"):
-            return func
-        else:
-            raise ValueError("The function you passed is suspicious!")
-    else:
-        return evalFuncString(func)
-
-
 @nb.jit(nopython=True, cache=True)
 def otherIndex(values: np.ndarray, start: int = 0) -> int:
     """
@@ -409,13 +299,11 @@ def otherIndex(values: np.ndarray, start: int = 0) -> int:
 
 
 def groupConsecutives(series: pd.Series) -> Iterator[pd.Series]:
-
     """
     group consecutive values into distinct pd.Series
     """
     index = series.index
     values = series.values
-    target = values[0]
 
     start = 0
     while True:
@@ -424,3 +312,259 @@ def groupConsecutives(series: pd.Series) -> Iterator[pd.Series]:
             break
         yield pd.Series(data=values[start:stop], index=index[start:stop])
         start = stop
+
+
+def mergeDios(left, right, subset=None, join="merge"):
+    # use dios.merge() as soon as it implemented
+    # see https://git.ufz.de/rdm/dios/issues/15
+
+    merged = left.copy()
+    if subset is not None:
+        right_subset_cols = right.columns.intersection(subset)
+    else:
+        right_subset_cols = right.columns
+
+    shared_cols = left.columns.intersection(right_subset_cols)
+
+    for c in shared_cols:
+        l, r = left[c], right[c]
+        if join == "merge":
+            # NOTE:
+            # our merge behavior is nothing more than an
+            # outer join, where the right join argument
+            # overwrites the left at the shared indices,
+            # while on a normal outer join common indices
+            # hold the values from the left join argument
+            r, l = l.align(r, join="outer")
+        else:
+            l, r = l.align(r, join=join)
+        merged[c] = l.combine_first(r)
+
+    newcols = right_subset_cols.difference(left.columns)
+    for c in newcols:
+        merged[c] = right[c].copy()
+
+    return merged
+
+
+def isQuoted(string):
+    return bool(re.search(r"'.*'|\".*\"", string))
+
+
+def dropper(field, to_drop, flagger, default):
+    drop_mask = pd.Series(False, flagger.getFlags(field).index)
+    if to_drop is None:
+        to_drop = default
+    to_drop = toSequence(to_drop)
+    if len(to_drop) > 0:
+        drop_mask |= flagger.isFlagged(field, flag=to_drop)
+    return drop_mask
+
+
+def mutateIndex(index, old_name, new_name):
+    pos = index.get_loc(old_name)
+    index = index.drop(index[pos])
+    index = index.insert(pos, new_name)
+    return index
+
+
+def estimateFrequency(index, delta_precision=-1, max_rate="10s", min_rate="1D", optimize=True,
+                      min_energy=0.2, max_freqs=10, bins=None):
+
+    """
+    Function to estimate the sampling rate of an index.
+
+    The function comes with some optional overhead.
+    The default options detect sampling rates from 10 seconds to 1 day with a 10 seconds precision
+    for sampling rates below one minute, and a one minute precision for rates between 1 minute and
+    one day.
+
+    The function is designed to detect mixed sampling ratess as well as rate changes.
+    In boh situations, all the sampling rates detected, are returned, together with their
+    greatest common rate.
+
+    Note, that there is a certain lower bound of index length,
+    beneath which frequency leaking and Niquist limit take over and mess up the fourier
+    transform.
+
+    Parameters
+    ----------
+    index : {pandas.DateTimeIndex}
+        The index of wich the sampling rates shall be estimated
+    delta_precision : int, default -1
+        determines detection precision. Precision equals: seconds*10**(-1-delta_precision).
+        A too high precision attempt can lead to performance loss and doesnt necessarily result in
+        a more precise result. Especially when the samples deviation from their mean rate
+        is high compared to the delta_precision.
+    max_rate : str, default "10s"
+        Maximum rate that can be detected.
+    min_rate : str, default "1D"
+        Minimum detectable sampling rate.
+    optimize : bool, default True
+        Wheather or not to speed up fft application by zero padding the derived response series to
+        an optimal length. (Length = 2**N)
+    min_energy : float, default 0.2
+        min_energy : percentage of energy a sampling rate must represent at least, to be detected. Lower values
+        result in higher sensibillity - but as well increas detection rate of mix products. Default proofed to be
+        stable.
+    max_freqs : int, default 10
+        Maximum number of frequencies collected from the index. Mainly a value to prevent the frequency
+        collection loop from collecting noise and running endlessly.
+    bins : {None, List[float]} : default None
+
+    Returns
+    -------
+        freq : {None, str}
+            Either the sampling rate that was detected in the sample index (if uniform). Or
+            the greates common rate of all the sampling rates detected. Equals `None` if
+            detection failed and `"empty"`, if input index was empty.
+        freqs : List[str]
+            List of detected sampling rates.
+
+    """
+    index_n = index.to_numpy(float)
+    if index.empty:
+        return 'empty', []
+
+    index_n = (index_n - index_n[0])*10**(-9 + delta_precision)
+    delta = np.zeros(int(index_n[-1])+1)
+    delta[index_n.astype(int)] = 1
+    if optimize:
+        delta_f = np.abs(fft.rfft(delta, fft.next_fast_len(len(delta))))
+    else:
+        delta_f = np.abs(fft.rfft(delta))
+
+    len_f = len(delta_f)*2
+    min_energy = delta_f[0]*min_energy
+    # calc/assign low/high freq cut offs (makes life easier):
+    min_rate_i = int(len_f/(pd.Timedelta(min_rate).total_seconds()*(10**delta_precision)))
+    delta_f[:min_rate_i] = 0
+    max_rate_i = int(len_f/(pd.Timedelta(max_rate).total_seconds()*(10**delta_precision)))
+    hf_cutoff = min(max_rate_i, len_f//2)
+    delta_f[hf_cutoff:] = 0
+    delta_f[delta_f < min_energy] = 0
+
+    # find frequencies present:
+    freqs = []
+    f_i = np.argmax(delta_f)
+    while (f_i > 0) & (len(freqs) < max_freqs):
+        f = (len_f / f_i)/(60*10**(delta_precision))
+        freqs.append(f)
+        for i in range(1, hf_cutoff//f_i + 1):
+            delta_f[(i*f_i) - min_rate_i:(i*f_i) + min_rate_i] = 0
+        f_i = np.argmax(delta_f)
+
+    if len(freqs) == 0:
+        return None, []
+
+    if bins is None:
+        r = range(0, int(pd.Timedelta(min_rate).total_seconds()/60))
+        bins = [0, 0.1, 0.2, 0.3, 0.4] + [i + 0.5 for i in r]
+
+    f_hist, bins = np.histogram(freqs, bins=bins)
+    freqs = np.ceil(bins[:-1][f_hist >= 1])
+    gcd_freq = np.gcd.reduce((10*freqs).astype(int))/10
+
+    return str(int(gcd_freq)) + 'min', [str(int(i)) + 'min' for i in freqs]
+
+
+def evalFreqStr(freq, check, index):
+    if check in ['check', 'auto']:
+        f_passed = freq
+        freq = index.inferred_freq
+        freqs = [freq]
+        if freq is None:
+            freq, freqs = estimateFrequency(index)
+        if freq is None:
+            logging.warning('Sampling rate could not be estimated.')
+        if len(freqs) > 1:
+            logging.warning(f"Sampling rate seems to be not uniform!."
+                            f"Detected: {freqs}")
+
+        if check == 'check':
+            f_passed_seconds = pd.Timedelta(f_passed).total_seconds()
+            freq_seconds = pd.Timedelta(freq).total_seconds()
+            if (f_passed_seconds != freq_seconds):
+                logging.warning(f"Sampling rate estimate ({freq}) missmatches passed frequency ({f_passed}).")
+        elif check == 'auto':
+            if freq is None:
+                raise ValueError('Frequency estimation for non-empty series failed with no fall back frequency passed.')
+            f_passed = freq
+    else:
+        f_passed = freq
+    return f_passed
+
+
+def detectDeviants(data, metric, norm_spread, norm_frac, linkage_method='single', population='variables'):
+    """
+    Helper function for carrying out the repeatedly upcoming task,
+    of detecting variables a group of variables.
+
+    "Normality" is determined in terms of a maximum spreading distance, that members of a normal group must not exceed
+    in respect to a certain metric and linkage method.
+
+    In addition, only a group is considered "normal" if it contains more then `norm_frac` percent of the
+    variables in "fields".
+
+    Note, that the function also can be used to detect anormal regimes in a variable by assigning the different regimes
+    dios.DictOfSeries columns and passing this dios.
+
+    Parameters
+    ----------
+    data : {pandas.DataFrame, dios.DictOfSeries}
+        Input data
+    metric : Callable[[numpy.array, numpy.array], float]
+        A metric function that for calculating the dissimilarity between 2 variables.
+    norm_spread : float
+        A threshold denoting the distance, members of the "normal" group must not exceed to each other (in terms of the
+        metric passed) to qualify their group as the "normal" group.
+    norm_frac : float, default 0.5
+        Has to be in [0,1]. Determines the minimum percentage of variables or samples,
+        the "normal" group has to comprise to be the normal group actually.
+    linkage_method : {"single", "complete", "average", "weighted", "centroid", "median", "ward"}, default "single"
+        The linkage method used for hierarchical (agglomerative) clustering of the variables.
+    population : {"variables", "samples"}
+        Wheather to relate the minimum percentage of values needed to form a normal group, to the total number of
+        variables or to the total number of samples.
+
+    Returns
+    -------
+    deviants : List
+        A list containing the column positions of deviant variables in the input frame/dios.
+
+    """
+    var_num = len(data.columns)
+    if var_num <= 1:
+        return []
+    dist_mat = np.zeros((var_num, var_num))
+    combs = list(itertools.combinations(range(0, var_num), 2))
+    for i, j in combs:
+        dist = metric(data.iloc[:, i].values, data.iloc[:, j].values)
+        dist_mat[i, j] = dist
+
+    condensed = np.abs(dist_mat[tuple(zip(*combs))])
+    Z = linkage(condensed, method=linkage_method)
+    cluster = fcluster(Z, norm_spread, criterion='distance')
+    if population == 'variables':
+        counts = collections.Counter(cluster)
+        pop_num = var_num
+    elif population == 'samples':
+        counts = {cluster[j]: 0 for j in range(0,var_num)}
+        for c in range(var_num):
+            counts[cluster[c]] += data.iloc[:, c].dropna().shape[0]
+        pop_num = np.sum(list(counts.values()))
+    else:
+        raise ValueError("Not a valid normality criteria keyword passed. Pass either 'variables' or 'population'.")
+    norm_cluster = -1
+
+    for item in counts.items():
+        if item[1] > norm_frac * pop_num:
+            norm_cluster = item[0]
+            break
+
+    if norm_cluster == -1 or counts[norm_cluster] == pop_num:
+        return []
+    else:
+        return [i for i, x in enumerate(cluster) if x != norm_cluster]
+
+
diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py
index acec38e95473e896053a1c3e01f0935eb889da65..30ce15899d4d5c21999f1e686f2ec2bf83598bbb 100644
--- a/saqc/lib/ts_operators.py
+++ b/saqc/lib/ts_operators.py
@@ -1,90 +1,124 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+"""
+The module gathers all kinds of timeseries tranformations.
+"""
+import logging
+
+import re
+
 import pandas as pd
 import numpy as np
-from sklearn.neighbors import NearestNeighbors
-
+import numba as nb
 
-def _isValid(data, max_nan_total, max_nan_consec):
-    if (max_nan_total is np.inf) & (max_nan_consec is np.inf):
-        return True
+from sklearn.neighbors import NearestNeighbors
+from scipy.stats import iqr
+import numpy.polynomial.polynomial as poly
 
-    nan_mask = data.isna()
 
-    if nan_mask.sum() <= max_nan_total:
-        if max_nan_consec is np.inf:
-            return True
-        elif nan_mask.rolling(window=max_nan_consec + 1).sum().max() <= max_nan_consec:
-            return True
-        else:
-            return False
-    else:
-        return False
+logger = logging.getLogger("SaQC")
 
 
-# ts_transformations
 def identity(ts):
+    # identity function
     return ts
 
 
-def difference(ts):
-    return pd.Series.diff(ts)
+def count(ts):
+    # count is a dummy to trigger according built in count method of
+    # resamplers when passed to aggregate2freq. For consistency reasons, it works accordingly when
+    # applied directly:
+    return ts.count()
+
+
+def first(ts):
+    # first is a dummy to trigger according built in count method of
+    # resamplers when passed to aggregate2freq. For consistency reasons, it works accordingly when
+    # applied directly:
+    return ts.first()
+
+
+def last(ts):
+    # last is a dummy to trigger according built in count method of
+    # resamplers when passed to aggregate2freq. For consistency reasons, it works accordingly when
+    # applied directly:
+    return ts.last()
+
+
+def zeroLog(ts):
+    # zero log returns np.nan instead of -np.inf, when passed 0. Usefull, because
+    # in internal processing, you only have to check for nan values if you need to
+    # remove "invalidish" values from the data.
+    log_ts = np.log(ts)
+    log_ts[log_ts == -np.inf] = np.nan
+    return log_ts
 
 
 def derivative(ts, unit="1min"):
+    # calculates derivative of timeseries, expressed in slope per "unit"
     return ts / (deltaT(ts, unit=unit))
 
 
 def deltaT(ts, unit="1min"):
+    # calculates series of time gaps in ts
     return ts.index.to_series().diff().dt.total_seconds() / pd.Timedelta(unit).total_seconds()
 
 
+def difference(ts):
+    # NOTE: index of input series gets lost!
+    return np.diff(ts, prepend=np.nan)
+
+
 def rateOfChange(ts):
-    return ts.diff / ts
+    return difference(ts) / ts
 
 
 def relativeDifference(ts):
-    return ts - 0.5 * (ts.shift(+1) + ts.shift(-1))
+    res = ts - 0.5 * (np.roll(ts, +1) + np.roll(ts, -1))
+    res[0] = np.nan
+    res[-1] = np.nan
+    return res
 
 
 def scale(ts, target_range=1, projection_point=None):
+    # scales input series to have values ranging from - target_rang to + target_range
     if not projection_point:
-        projection_point = ts.abs().max()
+        projection_point = np.max(np.abs(ts))
     return (ts / projection_point) * target_range
 
 
 def normScale(ts):
+    # func scales series to [0,1] interval and projects constant series onto 0.5
     ts_min = ts.min()
-    return (ts - ts_min) / (ts.max() - ts_min)
-
-
-def nBallClustering(in_arr, ball_radius=None):
-    x_len = in_arr.shape[0]
-    x_cols = in_arr.shape[1]
-
-    if not ball_radius:
-        ball_radius = 0.1 / np.log(x_len) ** (1 / x_cols)
-    exemplars = [in_arr[0, :]]
-    members = [[]]
-    for index, point in in_arr:
-        dists = np.linalg.norm(point - np.array(exemplars), axis=1)
-        min_index = dists.argmin()
-        if dists[min_index] < ball_radius:
-            members[min_index].append(index)
-        else:
-            exemplars.append(in_arr[index])
-            members.append([index])
-    ex_indices = [x[0] for x in members]
-    return exemplars, members, ex_indices
+    ts_max = ts.max()
+    if ts_min == ts_max:
+        return pd.Series(data=0.5, index=ts.index)
+    else:
+        return (ts - ts_min) / (ts.max() - ts_min)
+
+
+def standardizeByMean(ts):
+    # standardization with mean and probe variance
+    return (ts - np.mean(ts)) / np.std(ts, ddof=1)
+
+
+def standardizeByMedian(ts):
+    # standardization with median and interquartile range
+    return (ts - np.median(ts)) / iqr(ts, nan_policy="omit")
 
 
 def kNN(in_arr, n_neighbors, algorithm="ball_tree"):
-    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm=algorithm).fit(in_arr)
+    # k-nearest-neighbor search
+    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm=algorithm).fit(in_arr.reshape(in_arr.shape[0], -1))
     return nbrs.kneighbors()
 
 
-def kNNMaxGap(in_arr, n_neighbors, algorithm="ball_tree"):
+def kNNMaxGap(in_arr, n_neighbors=10, algorithm="ball_tree"):
+    # searches for the "n_neighbors" nearest neighbors of every value in "in_arr"
+    # and then returns the distance to the neighbor with the "maximum" Gap to its
+    # predecessor in the neighbor hierarchy
+    in_arr = np.asarray(in_arr)
     dist, *_ = kNN(in_arr, n_neighbors, algorithm=algorithm)
     sample_size = dist.shape[0]
     to_gap = np.append(np.array([[0] * sample_size]).T, dist, axis=1)
@@ -92,48 +126,315 @@ def kNNMaxGap(in_arr, n_neighbors, algorithm="ball_tree"):
     return dist[range(0, sample_size), max_gap_ind]
 
 
-def kNNSum(in_arr, n_neighbors, algorithm="ball_tree"):
+def kNNSum(in_arr, n_neighbors=10, algorithm="ball_tree"):
+    # searches for the "n_neighbors" nearest neighbors of every value in "in_arr"
+    # and assigns that value the summed up distances to this neighbors
+    in_arr = np.asarray(in_arr)
     dist, *_ = kNN(in_arr, n_neighbors, algorithm=algorithm)
     return dist.sum(axis=1)
 
 
-def stdQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
-    """Pandas built in function for statistical moments have quite poor nan- control, so here comes a wrapper that
-    will return the standart deviation for a given series input, if the total number of nans in the series does
-    not exceed "max_nan_total" and the number of consecutive nans does not exceed max_nan_consec.
+@nb.njit
+def _maxConsecutiveNan(arr, max_consec):
+    # checks if arr (boolean array) has not more then "max_consec" consecutive True values
+    current = 0
+    idx = 0
+    while idx < arr.size:
+        while idx < arr.size and arr[idx]:
+            current += 1
+            idx += 1
+        if current > max_consec:
+            return False
+        current = 0
+        idx += 1
+    return True
 
-    :param data             Pandas Series. The data series, the standart deviation shall be calculated of.
-    :param max_nan_total    Integer. Number of np.nan entries allowed to be contained in the series
-    :param max_nan_consec   Integer. Maximal number of consecutive nan entries allowed to occure in data.
-    """
-    if _isValid(data, max_nan_total, max_nan_consec):
-        return data.std()
-    return np.nan
 
+def validationTrafo(data, max_nan_total, max_nan_consec):
+    # data has to be boolean. False=Valid Value, True=invalid Value
+    # function returns True-array of input array size for invalid input arrays False array for valid ones
+    data = data.copy()
+    if (max_nan_total is np.inf) & (max_nan_consec is np.inf):
+        return data
 
-def varQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
-    """Pandas built in function for statistical moments have quite poor nan- control, so here comes a wrapper that
-    will return the variance for a given series input, if the total number of nans in the series does
-    not exceed "max_nan_total" and the number of consecutive nans does not exceed max_nan_consec.
+    # nan_mask = np.isnan(data)
 
-    :param data             Pandas Series. The data series, the variance shall be calculated of.
-    :param max_nan_total    Integer. Number of np.nan entries allowed to be contained in the series
-    :param max_nan_consec   Integer. Maximal number of consecutive nan entries allowed to occure in data.
-    """
-    if _isValid(data, max_nan_total, max_nan_consec):
-        return data.var()
-    return np.nan
+    if data.sum() <= max_nan_total:
+        if max_nan_consec is np.inf:
+            data[:] = False
+            return data
+        elif _maxConsecutiveNan(np.asarray(data), max_nan_consec):
+            data[:] = False
+        else:
+            data[:] = True
+    else:
+        data[:] = True
+
+    return data
+
+
+def stdQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
+    return np.nanstd(data[~validationTrafo(data.isna(), max_nan_total, max_nan_consec)], ddof=1)
+
+
+def varQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
+    return np.nanvar(data[~validationTrafo(data.isna(), max_nan_total, max_nan_consec)], ddof=1)
 
 
 def meanQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
-    """Pandas built in function for statistical moments have quite poor nan- control, so here comes a wrapper that
-    will return the mean for a given series input, if the total number of nans in the series does
-    not exceed "max_nan_total" and the number of consecutive nans does not exceed max_nan_consec.
+    return np.nanmean(data[~validationTrafo(data.isna(), max_nan_total, max_nan_consec)])
+
 
-    :param data             Pandas Series. The data series, the mean shall be calculated of.
-    :param max_nan_total    Integer. Number of np.nan entries allowed to be contained in the series
-    :param max_nan_consec   Integer. Maximal number of consecutive nan entries allowed to occure in data.
+def interpolateNANs(data, method, order=2, inter_limit=2, downgrade_interpolation=False, return_chunk_bounds=False):
     """
-    if _isValid(data, max_nan_total, max_nan_consec):
-        return data.mean()
-    return np.nan
+    The function interpolates nan-values (and nan-grids) in timeseries data. It can be passed all the method keywords
+    from the pd.Series.interpolate method and will than apply this very methods. Note, that the inter_limit keyword
+    really restricts the interpolation to chunks, not containing more than "inter_limit" nan entries
+    (thereby not being identical to the "limit" keyword of pd.Series.interpolate).
+
+    :param data:                    pd.Series or np.array. The data series to be interpolated
+    :param method:                  String. Method keyword designating interpolation method to use.
+    :param order:                   Integer. If your desired interpolation method needs an order to be passed -
+                                    here you pass it.
+    :param inter_limit:             Integer. Default = 2. Limit up to which consecutive nan - values in the data get
+                                    replaced by interpolation.
+                                    Its default value suits an interpolation that only will apply to points of an
+                                    inserted frequency grid. (regularization by interpolation)
+                                    Gaps wider than "inter_limit" will NOT be interpolated at all.
+    :param downgrade_interpolation:  Boolean. Default False. If True:
+                                    If a data chunk not contains enough values for interpolation of the order "order",
+                                    the highest order possible will be selected for that chunks interpolation.
+    :param return_chunk_bounds:     Boolean. Default False. If True:
+                                    Additionally to the interpolated data, the start and ending points of data chunks
+                                    not containing no series consisting of more then "inter_limit" nan values,
+                                    are calculated and returned.
+                                    (This option fits requirements of the "interpolateNANs" functions use in the
+                                    context of saqc harmonization mainly.)
+
+    :return:
+    """
+    inter_limit = int(inter_limit)
+    data = pd.Series(data).copy()
+    gap_mask = (data.rolling(inter_limit, min_periods=0).apply(lambda x: np.sum(np.isnan(x)), raw=True)) != inter_limit
+
+    if inter_limit == 2:
+        gap_mask = gap_mask & gap_mask.shift(-1, fill_value=True)
+    else:
+        gap_mask = (
+            gap_mask.replace(True, np.nan).fillna(method="bfill", limit=inter_limit).replace(np.nan, True).astype(bool)
+        )
+
+    if return_chunk_bounds:
+        # start end ending points of interpolation chunks have to be memorized to block their flagging:
+        chunk_switches = gap_mask.astype(int).diff()
+        chunk_starts = chunk_switches[chunk_switches == -1].index
+        chunk_ends = chunk_switches[(chunk_switches.shift(-1) == 1)].index
+        chunk_bounds = chunk_starts.join(chunk_ends, how="outer", sort=True)
+
+    pre_index = data.index
+    data = data[gap_mask]
+
+    if method in ["linear", "time"]:
+
+        data.interpolate(method=method, inplace=True, limit=inter_limit - 1, limit_area="inside")
+
+    else:
+        dat_name = data.name
+        gap_mask = (~gap_mask).cumsum()
+        data = pd.merge(gap_mask, data, how="inner", left_index=True, right_index=True)
+
+        def _interpolWrapper(x, wrap_order=order, wrap_method=method):
+            if x.count() > wrap_order:
+                try:
+                    return x.interpolate(method=wrap_method, order=int(wrap_order))
+                except (NotImplementedError, ValueError):
+                    logger.warning(
+                        f"Interpolation with method {method} is not supported at order {wrap_order}. "
+                        f"and will be performed at order {wrap_order-1}"
+                    )
+                    return _interpolWrapper(x, int(wrap_order - 1), wrap_method)
+            elif x.size < 3:
+                return x
+            else:
+                if downgrade_interpolation:
+                    return _interpolWrapper(x, int(x.count() - 1), wrap_method)
+                else:
+                    return x
+
+        data = data.groupby(data.columns[0]).transform(_interpolWrapper)
+        # squeezing the 1-dimensional frame resulting from groupby for consistency reasons
+        data = data.squeeze(axis=1)
+        data.name = dat_name
+    data = data.reindex(pre_index)
+    if return_chunk_bounds:
+        return data, chunk_bounds
+    else: return data
+
+
+def aggregate2Freq(
+    data, method, freq, agg_func, fill_value=np.nan, max_invalid_total=np.inf, max_invalid_consec=np.inf
+):
+    # The function aggregates values to an equidistant frequency grid with agg_func.
+    # Timestamps that have no values projected on them, get "fill_value" assigned. Also,
+    # "fill_value" serves as replacement for "invalid" intervals
+
+    methods = {
+        "nagg": lambda seconds_total: (seconds_total/2, "left", "left"),
+        "bagg": lambda _: (0, "left", "left"),
+        "fagg": lambda _: (0, "right", "right"),
+    }
+
+    # filter data for invalid patterns (since filtering is expensive we pre-check if it is demanded)
+    if (max_invalid_total is not np.inf) | (max_invalid_consec is not np.inf):
+        if pd.isnull(fill_value):
+            temp_mask = data.isna()
+        else:
+            temp_mask = data == fill_value
+
+        temp_mask = temp_mask.groupby(pd.Grouper(freq=freq)).transform(
+            validationTrafo, max_nan_total=max_invalid_total, max_nan_consec=max_invalid_consec
+        )
+        data[temp_mask] = fill_value
+
+    seconds_total = pd.Timedelta(freq).total_seconds()
+    base, label, closed = methods[method](seconds_total)
+
+    # In the following, we check for empty intervals outside resample.apply, because:
+    # - resample AND groupBy do insert value zero for empty intervals if resampling with any kind of "sum" application -
+    #   we want "fill_value" to be inserted
+    # - we are aggregating data and flags with this function and empty intervals usually would get assigned flagger.BAD
+    #   flag (where resample inserts np.nan or 0)
+
+    data_resampler = data.resample(f"{seconds_total:.0f}s", base=base, closed=closed, label=label)
+
+    empty_intervals = data_resampler.count() == 0
+    # great performance gain can be achieved, when avoiding .apply and using pd.resampler
+    # methods instead. (this covers all the basic func aggregations, such as median, mean, sum, count, ...)
+    try:
+        check_name = re.sub("^nan", "", agg_func.__name__)
+        # a nasty special case: if function "count" was passed, we not want empty intervals to be replaced by nan:
+        if check_name == 'count':
+            empty_intervals[:] = False
+        data = getattr(data_resampler, check_name)()
+    except AttributeError:
+        data = data_resampler.apply(agg_func)
+
+    # since loffset keyword of pandas.resample "discharges" after one use of the resampler (pandas logic) - we correct the
+    # resampled labels offset manually, if necessary.
+    if method == "nagg":
+        data.index = data.index.shift(freq=pd.Timedelta(freq) / 2)
+        empty_intervals.index = empty_intervals.index.shift(freq=pd.Timedelta(freq) / 2)
+    data[empty_intervals] = fill_value
+
+    return data
+
+
+def shift2Freq(data, method, freq, fill_value=np.nan):
+    # shift timestamps backwards/forwards in order to allign them with an equidistant
+    # frequencie grid.
+
+    methods = {
+        "fshift": lambda freq: ("ffill", pd.Timedelta(freq)),
+        "bshift": lambda freq: ("bfill", pd.Timedelta(freq)),
+        "nshift": lambda freq: ("nearest", pd.Timedelta(freq)/2),
+    }
+    direction, tolerance = methods[method](freq)
+    target_ind = pd.date_range(
+        start=data.index[0].floor(freq), end=data.index[-1].ceil(freq),
+        freq=freq,
+        name=data.index.name
+    )
+    return data.reindex(target_ind, method=direction, tolerance=tolerance, fill_value=fill_value)
+
+
+@nb.njit
+def _coeffMat(x, deg):
+    # helper function to construct numba-compatible polynomial fit function
+    mat_ = np.zeros(shape=(x.shape[0], deg + 1))
+    const = np.ones_like(x)
+    mat_[:, 0] = const
+    mat_[:, 1] = x
+    if deg > 1:
+        for n in range(2, deg + 1):
+            mat_[:, n] = x ** n
+    return mat_
+
+
+@nb.jit
+def _fitX(a, b):
+    # helper function to construct numba-compatible polynomial fit function
+    # linalg solves ax = b
+    det_ = np.linalg.lstsq(a, b)[0]
+    return det_
+
+
+@nb.jit
+def _fitPoly(x, y, deg):
+    # a numba compatible polynomial fit function
+    a = _coeffMat(x, deg)
+    p = _fitX(a, y)
+    # Reverse order so p[0] is coefficient of highest order
+    return p[::-1]
+
+
+@nb.jit
+def evalPolynomial(P, x):
+    # a numba compatible polynomial evaluator
+    result = 0
+    for coeff in P:
+        result = x * result + coeff
+    return result
+
+
+def polyRollerNumba(in_slice, miss_marker, val_range, center_index, poly_deg):
+    # numba compatible function to roll with when modelling data with polynomial model
+    miss_mask = in_slice == miss_marker
+    x_data = val_range[~miss_mask]
+    y_data = in_slice[~miss_mask]
+    fitted = _fitPoly(x_data, y_data, deg=poly_deg)
+    return evalPolynomial(fitted, center_index)
+
+
+def polyRollerNoMissingNumba(in_slice, val_range, center_index, poly_deg):
+    # numba compatible function to roll with when modelling data with polynomial model -
+    # it is assumed, that in slice is an equidistant sample
+    fitted = _fitPoly(val_range, in_slice, deg=poly_deg)
+    return evalPolynomial(fitted, center_index)
+
+
+def polyRoller(in_slice, miss_marker, val_range, center_index, poly_deg):
+    # function to roll with when modelling data with polynomial model
+    miss_mask = in_slice == miss_marker
+    x_data = val_range[~miss_mask]
+    y_data = in_slice[~miss_mask]
+    fitted = poly.polyfit(x=x_data, y=y_data, deg=poly_deg)
+    return poly.polyval(center_index, fitted)
+
+
+def polyRollerNoMissing(in_slice, val_range, center_index, poly_deg):
+    # function to roll with when modelling data with polynomial model
+    # it is assumed, that in slice is an equidistant sample
+    fitted = poly.polyfit(x=val_range, y=in_slice, deg=poly_deg)
+    return poly.polyval(center_index, fitted)
+
+
+def polyRollerIrregular(in_slice, center_index_ser, poly_deg):
+    # a function to roll with, for polynomial fitting of data not having an equidistant frequency grid.
+    # (expects to get passed pandas timeseries), so raw parameter of rolling.apply should be set to False.
+    x_data = ((in_slice.index - in_slice.index[0]).total_seconds()) / 60
+    fitted = poly.polyfit(x_data, in_slice.values, poly_deg)
+    center_pos = int(len(in_slice) - center_index_ser[in_slice.index[-1]])
+    return poly.polyval(x_data[center_pos], fitted)
+
+
+def expModelFunc(x, a=0, b=0, c=0):
+    # exponential model function, used in optimization contexts (drift correction)
+    return a + b * (np.exp(c * x) - 1)
+
+
+def linearInterpolation(data, inter_limit=2):
+    return interpolateNANs(data, "time", inter_limit=inter_limit)
+
+
+def polynomialInterpolation(data, inter_limit=2, inter_order=2):
+    return interpolateNANs(data, "polynomial", inter_limit=inter_limit, order=inter_order)
diff --git a/saqc/lib/types.py b/saqc/lib/types.py
index 7e27f6d48bc39d702cc40a788b1a2ef3e7f87859..facebe59987a4d3a74352b7146a0ab2320f8a73f 100644
--- a/saqc/lib/types.py
+++ b/saqc/lib/types.py
@@ -1,11 +1,13 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from typing import TypeVar
+from typing import TypeVar, Union
 
 import numpy as np
 import pandas as pd
+import dios
 
 T = TypeVar("T")
 ArrayLike = TypeVar("ArrayLike", np.ndarray, pd.Series, pd.DataFrame)
-PandasLike = TypeVar("PandasLike", pd.Series, pd.DataFrame)
+PandasLike = TypeVar("PandasLike", pd.Series, pd.DataFrame, dios.DictOfSeries)
+DiosLikeT = Union[dios.DictOfSeries, pd.DataFrame]
diff --git a/setup.py b/setup.py
index 28dc64c8ba650fb232fca9cd5eff288ca746dc13..0048952077db64fb09076749b32a99b8894e10e4 100644
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@ setup(
     long_description_content_type="text/markdown",
     url="https://git.ufz.de/rdm-software/saqc",
     packages=find_packages(),
+    python_requires='>=3.7',
     install_requires=[
         "numpy",
         "pandas",
@@ -25,6 +26,7 @@ setup(
         "pyarrow",
         "python-intervals",
         "astor",
+        "dios"
     ],
     license="GPLv3",
     entry_points={"console_scripts": ["saqc=saqc.__main__:main"],},
diff --git a/sphinx-doc/.gitignore b/sphinx-doc/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..16f05fe5bcb986845d46c131c0e10447581670b2
--- /dev/null
+++ b/sphinx-doc/.gitignore
@@ -0,0 +1,6 @@
+
+_api/
+_build/
+_static/
+*.automodsumm
+_static/*
\ No newline at end of file
diff --git a/sphinx-doc/FlagFunctions.rst b/sphinx-doc/FlagFunctions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..584d0dc5f48d0434a02d86ecbf884e47188d7236
--- /dev/null
+++ b/sphinx-doc/FlagFunctions.rst
@@ -0,0 +1,7 @@
+
+Functions
+=========
+
+.. automodapi:: saqc.funcs
+   :skip: register
+
diff --git a/sphinx-doc/Makefile b/sphinx-doc/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..efdfe91d931f29bf94f8b1f08c1b6f3d0661c8ab
--- /dev/null
+++ b/sphinx-doc/Makefile
@@ -0,0 +1,27 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile clean
+
+# clean sphinx generated stuff
+clean:
+	rm -rf _build _static _api
+	rm -f *.automodsumm
+	mkdir _static
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
diff --git a/sphinx-doc/conf.py b/sphinx-doc/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..77bdd67bc2a821b18f35e06557be2ea665b6a3c2
--- /dev/null
+++ b/sphinx-doc/conf.py
@@ -0,0 +1,111 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'SaQC'
+copyright = '2020, Bert Palm, David Schäfer, Peter Lünenschloß, Lennart Schmidt, Juliane Geller'
+author = 'Bert Palm, David Schäfer, Peter Lünenschloß, Lennart Schmidt, Juliane Geller'
+
+# The full version, including alpha/beta/rc tags
+release = 'develop'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    # "sphinx.ext.doctest",
+    # "sphinx.ext.extlinks",
+    # "sphinx.ext.todo",
+    # "sphinx.ext.intersphinx",
+    # "sphinx.ext.coverage",
+    # "sphinx.ext.mathjax",
+    # "sphinx.ext.ifconfig",
+    "sphinx.ext.autosectionlabel",
+
+    # link source code
+    "sphinx.ext.viewcode",
+
+    # add suupport for NumPy style docstrings
+    "sphinx.ext.napoleon",
+
+    # Doc a whole module
+    # see https://sphinx-automodapi.readthedocs.io/en/latest/
+    'sphinx_automodapi.automodapi',
+    # 'sphinx_automodapi.smart_resolver',
+    # see https://sphinxcontrib-fulltoc.readthedocs.io/en/latest/
+    'sphinxcontrib.fulltoc',
+
+    # Markdown sources support
+    # https://recommonmark.readthedocs.io/en/latest/
+    'recommonmark',
+    # https://github.com/ryanfox/sphinx-markdown-tables
+    'sphinx_markdown_tables',
+]
+
+
+# -- Params of the extensions ------------------------------------------------
+
+numpydoc_show_class_members = False
+
+automodsumm_inherited_members = True
+# write out the files generated by automodapi, mainly for debugging
+automodsumm_writereprocessed = True
+
+automodapi_inheritance_diagram = False
+automodapi_toctreedirnm = '_api'
+autosectionlabel_prefix_document = True
+
+
+# -- Other options -----------------------------------------------------------
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+source_suffix = ['.rst', '.md']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = "nature"
+
+# use pandas theme
+# html_theme = "pydata_sphinx_theme"
+
+
+# html_theme_options = {
+# }
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
\ No newline at end of file
diff --git a/sphinx-doc/flagger.rst b/sphinx-doc/flagger.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d8536aa3e39c53d92aa11b9057b92ceef84b7535
--- /dev/null
+++ b/sphinx-doc/flagger.rst
@@ -0,0 +1,11 @@
+
+Flagger
+=======
+
+.. automodapi:: saqc.flagger
+   :include-all-objects:
+   :no-heading:
+
+
+
+
diff --git a/sphinx-doc/index.rst b/sphinx-doc/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c8d40fbd90e200486bdc243c67445ccd0e690015
--- /dev/null
+++ b/sphinx-doc/index.rst
@@ -0,0 +1,31 @@
+.. SaQC documentation master file, created by
+   sphinx-quickstart on Mon Aug 17 12:11:29 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to SaQC's documentation!
+================================
+
+Saqc is a great tool to clean data from rubbish.
+
+.. toctree::
+   :hidden:
+
+   Repository <https://git.ufz.de/rdm-software/saqc>
+
+.. toctree::
+   :maxdepth: 2
+
+   flagger
+
+.. toctree::
+   :maxdepth: 2
+
+   FlagFunctions
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/sphinx-doc/make.bat b/sphinx-doc/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..6247f7e231716482115f34084ac61030743e0715
--- /dev/null
+++ b/sphinx-doc/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/sphinx-doc/requirements_sphinx.txt b/sphinx-doc/requirements_sphinx.txt
new file mode 100644
index 0000000000000000000000000000000000000000..511d1671395df732730a0d0807e394fea54e0569
--- /dev/null
+++ b/sphinx-doc/requirements_sphinx.txt
@@ -0,0 +1,5 @@
+recommonmark
+sphinx
+sphinx-automodapi
+sphinxcontrib-fulltoc
+sphinx-markdown-tables
diff --git a/test/common.py b/test/common.py
index bc0440d16f1133aeca87c714598b004e6bf1c0b1..d5867e94476b9e7744826fd6f7d40f88770a0fbd 100644
--- a/test/common.py
+++ b/test/common.py
@@ -2,14 +2,13 @@
 # -*- coding: utf-8 -*-
 
 import io
-import re
 
 import numpy as np
 import pandas as pd
+import dios
 
-from saqc.core.core import readConfig
 from saqc.flagger import (
-    ContinuousFlagger,
+    PositionalFlagger,
     CategoricalFlagger,
     SimpleFlagger,
     DmpFlagger,
@@ -23,34 +22,25 @@ TESTFLAGGER = (
     CategoricalFlagger(["NIL", "GOOD", "BAD"]),
     SimpleFlagger(),
     DmpFlagger(),
-    ContinuousFlagger(),
 )
 
+def flagAll(data, field, flagger, **kwargs):
+    # NOTE: remember to rename flag -> flag_values
+    return data, flagger.setFlags(field=field, flag=flagger.BAD)
 
-def initData(cols=2, start_date="2017-01-01", end_date="2017-12-31", freq="1h"):
-    dates = pd.date_range(start=start_date, end=end_date, freq=freq)
-    data = {}
-    dummy = np.arange(len(dates))
-    for col in range(1, cols + 1):
-        data[f"var{col}"] = dummy * (col)
-    return pd.DataFrame(data, index=dates)
 
+def initData(cols=2, start_date="2017-01-01", end_date="2017-12-31", freq=None, rows=None):
+    if rows is None:
+        freq = freq or "1h"
 
-def initMetaString(metastring, data):
-    cleaned = re.sub(r"\s*,\s*", r",", re.sub(r"\|", r";", re.sub(r"\n[ \t]+", r"\n", metastring)))
-    fobj = io.StringIO(cleaned.strip())
-    config = readConfig(fobj, data)
-    fobj.seek(0)
-    return fobj, config
+    di = dios.DictOfSeries(itype=dios.DtItype)
+    dates = pd.date_range(start=start_date, end=end_date, freq=freq, periods=rows)
+    dummy = np.arange(len(dates))
 
+    for col in range(1, cols + 1):
+        di[f"var{col}"] = pd.Series(data=dummy * col, index=dates)
 
-def _getKeys(metadict):
-    keys = list(metadict[0].keys())
-    for row in metadict[1:]:
-        for k in row.keys():
-            if k not in keys:
-                keys.append(k)
-    return keys
+    return di
 
 
 def writeIO(content):
@@ -58,13 +48,3 @@ def writeIO(content):
     f.write(content)
     f.seek(0)
     return f
-
-
-def initMetaDict(config_dict, data):
-    df = pd.DataFrame(config_dict)[_getKeys(config_dict)]
-    fobj = io.StringIO()
-    df.fillna("").to_csv(fobj, index=False, sep=";")
-    fobj.seek(0)
-    config = readConfig(fobj, data)
-    fobj.seek(0)
-    return fobj, config
diff --git a/test/core/__init__.py b/test/core/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e1f7e6e8bf04d3d59cff68b83d91fbf791f0faa2 100644
--- a/test/core/__init__.py
+++ b/test/core/__init__.py
@@ -0,0 +1,2 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
diff --git a/test/core/test_core.py b/test/core/test_core.py
index 81e79fc9178f8698d6dea67e76292d08c07f08ec..55a75a227bf880d30084ad7a0e6956d1611d9d93 100644
--- a/test/core/test_core.py
+++ b/test/core/test_core.py
@@ -7,11 +7,10 @@ import pytest
 import numpy as np
 import pandas as pd
 
-from saqc.funcs import register, flagRange
-from saqc.core.core import run
-from saqc.core.config import Fields as F
-from saqc.lib.plotting import _plot
-from test.common import initData, initMetaDict, TESTFLAGGER
+from saqc import SaQC, register
+from saqc.funcs import flagRange
+from saqc.lib import plotting as splot
+from test.common import initData, TESTFLAGGER, flagAll
 
 
 # no logging output needed here
@@ -22,10 +21,7 @@ logging.disable(logging.CRITICAL)
 OPTIONAL = [False, True]
 
 
-@register()
-def flagAll(data, field, flagger, **kwargs):
-    # NOTE: remember to rename flag -> flag_values
-    return data, flagger.setFlags(field=field, flag=flagger.BAD)
+register(masking='field')(flagAll)
 
 
 @pytest.fixture
@@ -33,109 +29,35 @@ def data():
     return initData(3)
 
 
-def _initFlags(flagger, data, optional):
-    return None
-    if optional:
-        return flagger.initFlags(data[data.columns[::2]])._flags
-
-
 @pytest.fixture
 def flags(flagger, data, optional):
     if not optional:
         return flagger.initFlags(data[data.columns[::2]])._flags
 
 
-# NOTE: there is a lot of pytest magic involved:
-#       the parametrize parameters are implicitly available
-#       within the used fixtures, that is why we need the optional
-#       parametrization without actually using it in the
-#       function
-@pytest.mark.skip(reason="test slicing support is currently disabled")
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("optional", OPTIONAL)
-def test_temporalPartitioning(data, flagger, flags):
-    """
-    Check if the time span in meta is respected
-    """
-    var1, var2, var3, *_ = data.columns
-    split_date = data.index[len(data.index) // 2]
-
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-        {F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_date},
-        {F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_date},
-    ]
-    meta_file, meta_frame = initMetaDict(metadict, data)
-    pdata, pflagger = run(meta_file, flagger, data, flags=flags)
-
-    fields = [F.VARNAME, F.START, F.END]
-    for _, row in meta_frame.iterrows():
-        vname, start_date, end_date = row[fields]
-        fchunk = pflagger.getFlags(field=vname, loc=pflagger.isFlagged(vname))
-        assert fchunk.index.min() == start_date, "different start dates"
-        assert fchunk.index.max() == end_date, "different end dates"
-
-
-@pytest.mark.skip(reason="test slicing support is currently disabled")
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("optional", OPTIONAL)
-def test_positionalPartitioning(data, flagger, flags):
-    data = data.reset_index(drop=True)
-    if flags is not None:
-        flags = flags.reset_index(drop=True)
-    var1, var2, var3, *_ = data.columns
-    split_index = int(len(data.index) // 2)
-
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-        {F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_index},
-        {F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_index},
-    ]
-    meta_file, meta_frame = initMetaDict(metadict, data)
-
-    pdata, pflagger = run(meta_file, flagger, data, flags=flags)
-
-    fields = [F.VARNAME, F.START, F.END]
-    for _, row in meta_frame.iterrows():
-        vname, start_index, end_index = row[fields]
-        fchunk = pflagger.getFlags(field=vname, loc=pflagger.isFlagged(vname))
-        assert fchunk.index.min() == start_index, "different start indices"
-        assert fchunk.index.max() == end_index, f"different end indices: {fchunk.index.max()} vs. {end_index}"
-
-
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_errorHandling(data, flagger):
-    @register()
-    def raisingFunc(data, fielf, flagger, **kwargs):
-        raise TypeError
 
-    var1, *_ = data.columns
-
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "raisingFunc()"},
-    ]
+    @register(masking='field')
+    def raisingFunc(data, field, flagger, **kwargs):
+        raise TypeError
 
-    tests = ["ignore", "warn"]
+    var1 = data.columns[0]
 
-    for policy in tests:
+    for policy in ["ignore", "warn"]:
         # NOTE: should not fail, that's all we are testing here
-        metafobj, _ = initMetaDict(metadict, data)
-        run(metafobj, flagger, data, error_policy=policy)
+        SaQC(flagger, data, error_policy=policy).raisingFunc(var1).getResult()
+
+    with pytest.raises(TypeError):
+        SaQC(flagger, data, error_policy='raise').raisingFunc(var1).getResult()
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_duplicatedVariable(flagger):
     data = initData(1)
-    var1, *_ = data.columns
+    var1 = data.columns[0]
 
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-    ]
-    metafobj, meta = initMetaDict(metadict, data)
-
-    pdata, pflagger = run(metafobj, flagger, data)
-    pflags = pflagger.getFlags()
+    pdata, pflags = SaQC(flagger, data).flagDummy(var1).flagDummy(var1).getResult()
 
     if isinstance(pflags.columns, pd.MultiIndex):
         cols = pflags.columns.get_level_values(0).drop_duplicates()
@@ -145,30 +67,20 @@ def test_duplicatedVariable(flagger):
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_assignVariable(flagger):
+def test_sourceTarget(flagger):
     """
     test implicit assignments
     """
     data = initData(1)
-    var1, *_ = data.columns
-    var2 = "empty"
-
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-        {F.VARNAME: var2, F.TESTS: "flagAll()"},
-    ]
-    metafobj, meta = initMetaDict(metadict, data)
+    var1 = data.columns[0]
+    target = "new"
 
-    pdata, pflagger = run(metafobj, flagger, data)
-    pflags = pflagger.getFlags()
+    pdata, pflagger = SaQC(flagger, data).flagAll(field=var1, target=target).getResult(raw=True)
+    pflags = pflagger.isFlagged()
 
-    if isinstance(pflags.columns, pd.MultiIndex):
-        cols = pflags.columns.get_level_values(0).drop_duplicates()
-        assert (cols == [var1, var2]).all()
-        assert pflagger.isFlagged(var2).any()
-    else:
-        assert (pflags.columns == [var1, var2]).all()
-        assert pflagger.isFlagged(var2).any()
+    assert (pdata[var1] == pdata[target]).all(axis=None)
+    assert (pflags[var1] == False).all(axis=None)
+    assert (pflags[target] == True).all(axis=None)
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
@@ -179,14 +91,10 @@ def test_dtypes(data, flagger, flags):
     """
     flagger = flagger.initFlags(data)
     flags = flagger.getFlags()
-    var1, var2, *_ = data.columns
-
-    metadict = [
-        {F.VARNAME: var1, F.TESTS: "flagAll()"},
-        {F.VARNAME: var2, F.TESTS: "flagAll()"},
-    ]
-    metafobj, meta = initMetaDict(metadict, data)
-    pdata, pflagger = run(metafobj, flagger, data, flags=flags)
+    var1, var2 = data.columns[:2]
+
+    pdata, pflagger = SaQC(flagger, data, flags=flags).flagAll(var1).flagAll(var2).getResult(raw=True)
+
     pflags = pflagger.getFlags()
     assert dict(flags.dtypes) == dict(pflags.dtypes)
 
@@ -203,6 +111,8 @@ def test_plotting(data, flagger):
     field, *_ = data.columns
     flagger = flagger.initFlags(data)
     _, flagger_range = flagRange(data, field, flagger, min=10, max=90, flag=flagger.BAD)
-    _, flagger_range = flagRange(data, field, flagger_range, min=40, max=60, flag=flagger.GOOD)
-    mask = flagger.getFlags(field) != flagger_range.getFlags(field)
-    _plot(data, mask, field, flagger, interactive_backend=False)
+    data_new, flagger_range = flagRange(data, field, flagger_range, min=40, max=60, flag=flagger.GOOD)
+    splot._interactive = False
+    splot._plotSingleVariable(data, data_new, flagger, flagger_range, sources=[], targets=[data_new.columns[0]])
+    splot._plotMultipleVariables(data, data_new, flagger, flagger_range, targets=data_new.columns)
+    splot._interactive = True
diff --git a/test/core/test_evaluator.py b/test/core/test_evaluator.py
deleted file mode 100644
index 1e9a63419916e19a7869622ad87e75710d7b5250..0000000000000000000000000000000000000000
--- a/test/core/test_evaluator.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import pytest
-import numpy as np
-
-from saqc.funcs import register
-from saqc.core.evaluator import (
-    compileTree,
-    parseExpression,
-    initLocalEnv,
-    ConfigChecker,
-    ConfigTransformer,
-)
-
-from test.common import TESTFLAGGER, initData
-
-
-def compileExpression(expr, flagger, nodata=np.nan):
-    data = initData()
-    field = data.columns[0]
-    tree = parseExpression(expr)
-    env = initLocalEnv(data, field, flagger.initFlags(data), nodata)
-    ConfigChecker(env, flagger.signature).visit(tree)
-    transformed_tree = ConfigTransformer(env).visit(tree)
-    code = compileTree(transformed_tree)
-    return code
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_syntaxError(flagger):
-    exprs = [
-        "range(x=5",
-        "rangex=5)",
-        "range[x=5]" "range{x=5}" "int->float(x=4)" "int*float(x=4)",
-    ]
-
-    for expr in exprs:
-        with pytest.raises(SyntaxError):
-            compileExpression(expr, flagger)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_typeError(flagger):
-
-    exprs = [
-        # "func",
-        "flagDummy(kwarg=[1, 2, 3])",
-        "flagDummy(x=5)",
-        "flagDummy(dummy())",
-        "flagDummy(kwarg=dummy(this))",
-    ]
-
-    for expr in exprs:
-        with pytest.raises(TypeError):
-            compileExpression(expr, flagger)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_supportedArguments(flagger):
-    @register()
-    def func(data, field, flagger, kwarg, **kwargs):
-        return data, flagger
-
-    exprs = [
-        "func(kwarg='str')",
-        "func(kwarg=5)",
-        "func(kwarg=5.5)",
-        "func(kwarg=-5)",
-        "func(kwarg=True)",
-        "func(kwarg=func())",
-    ]
-    for expr in exprs:
-        compileExpression(expr, flagger)
diff --git a/test/core/test_masking.py b/test/core/test_masking.py
new file mode 100644
index 0000000000000000000000000000000000000000..b41eebbcb2bc6ba6b3d9a1594fed5282e23ac5fc
--- /dev/null
+++ b/test/core/test_masking.py
@@ -0,0 +1,119 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import logging
+
+import pytest
+import pandas as pd
+
+from saqc import SaQC, register
+from test.common import initData, TESTFLAGGER
+
+
+logging.disable(logging.CRITICAL)
+
+
+@pytest.fixture
+def data():
+    return initData(3)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_masking(data, flagger):
+    """
+    test if flagged values are exluded during the preceding tests
+    """
+    flagger = flagger.initFlags(data)
+    var1 = 'var1'
+    mn = min(data[var1])
+    mx = max(data[var1]) / 2
+
+    qc = SaQC(flagger, data)
+    qc = qc.flagRange(var1, mn, mx)
+    # min is not considered because its the smalles possible value.
+    # if masking works, `data > max` will be masked,
+    # so the following will deliver True for in range (data < max),
+    # otherwise False, like an inverse range-test
+    qc = qc.procGeneric("dummy", func=lambda var1: var1 >= mn)
+
+    pdata, pflagger = qc.getResult(raw=True)
+    out_of_range = pflagger.isFlagged(var1)
+    in_range = ~out_of_range
+
+    assert (pdata.loc[out_of_range, "dummy"] == False).all()
+    assert (pdata.loc[in_range, "dummy"] == True).all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_masking_UnmaskingOnDataChange(data, flagger):
+    """ test if (un)masking works as expected on data-change.
+
+    If the data change in the func, unmasking should respect this changes and
+    should not reapply original data, instead take the new data (and flags) as is.
+    Also if flags change, the data should be taken as is.
+    """
+    FILLER = -9999
+
+    @register(masking='all')
+    def changeData(data, field, flagger, **kwargs):
+        mask = data.isna()
+        data.aloc[mask] = FILLER
+        return data, flagger
+
+    @register(masking='all')
+    def changeFlags(data, field, flagger, **kwargs):
+        mask = data.isna()
+        flagger = flagger.setFlags(field, loc=mask[field], flag=flagger.UNFLAGGED, force=True)
+        return data, flagger
+
+    var = data.columns[0]
+    var_data = data[var]
+    mn, mx = var_data.max() * .25, var_data.max() * .75
+    range_mask = (var_data < mn) | (var_data > mx)
+
+    qc = SaQC(flagger, data)
+    qc = qc.flagRange(var, mn, mx)
+    qcD = qc.changeData(var)
+    qcF = qc.changeFlags(var)
+
+    data, flagger = qcD.getResult()
+    assert (data[var][range_mask] == FILLER).all(axis=None)
+    # only flags change so the data should be still NaN, because
+    # the unmasking was disabled, but the masking indeed was happening
+    data, flagger = qcF.getResult()
+    assert data[var][range_mask].isna().all(axis=None)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_shapeDiffUnmasking(data, flagger):
+    """ test if (un)masking works as expected on index-change.
+
+    If the index of data (and flags) change in the func, the unmasking,
+    should not reapply original data, instead take the new data (and flags) as is.
+    """
+
+    FILLER = -1111
+
+    @register(masking='none')
+    def pseudoHarmo(data, field, flagger, **kwargs):
+        index = data[field].index.to_series()
+        index.iloc[-len(data[field])//2:] += pd.Timedelta("7.5Min")
+
+        data[field] = pd.Series(data=FILLER, index=index)
+
+        flags = flagger.getFlags()
+        flags[field] = pd.Series(data=flags[field].values, index=index)
+
+        flagger = flagger.initFlags(flags=flags)
+        return data, flagger
+
+    var = data.columns[0]
+    var_data = data[var]
+    mn, mx = var_data.max() * .25, var_data.max() * .75
+
+    qc = SaQC(flagger, data)
+    qc = qc.flagRange(var, mn, mx)
+    qc = qc.pseudoHarmo(var)
+
+    data, flagger = qc.getResult(raw=True)
+    assert (data[var] == FILLER).all(axis=None)
diff --git a/test/core/test_reader.py b/test/core/test_reader.py
index 1fb497ad9f4ac230a97c905f9329973745072510..d3733a64ca4420dc378bb5c63e272d18171df7b7 100644
--- a/test/core/test_reader.py
+++ b/test/core/test_reader.py
@@ -1,60 +1,55 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+from pathlib import Path
+
 import pytest
 import numpy as np
+import pandas as pd
+import dios
 
-import saqc
-from saqc.core.reader import checkConfig
 from saqc.core.config import Fields as F
-from test.common import initData, initMetaDict, initMetaString, TESTFLAGGER, TESTNODATA, writeIO
+from test.common import initData, writeIO
+
+from saqc.core.core import SaQC
+from saqc.flagger import SimpleFlagger
+from saqc.core.register import FUNC_MAP, register
 
 
 @pytest.fixture
-def data():
+def data() -> dios.DictOfSeries:
     return initData(3)
 
 
-def test_configPreparation(data):
-    var1, var2, var3, *_ = data.columns
-    date = data.index[len(data.index) // 2]
+def test_packagedConfig():
 
-    # NOTE:
-    # time slicing support is currently disabled
-    tests = [
-        # {F.VARNAME: var1, F.START: date, F.TESTS: "flagAll()", F.PLOT: True},
-        {F.VARNAME: var2, F.TESTS: "flagAll()", F.PLOT: False},
-        # {F.VARNAME: var3, F.END: date, F.TESTS: "flagAll()"},
-        {F.VARNAME: var3, F.TESTS: "flagAll()",},
-    ]
+    path = Path(__file__).parents[2] / "ressources/data"
 
-    defaults = {
-        F.START: data.index.min(),
-        F.END: data.index.max(),
-        F.PLOT: False,
-        F.LINENUMBER: 2,
-    }
+    config_path = path / "config_ci.csv"
+    data_path = path / "data.csv"
 
-    for i, test in enumerate(tests):
-        _, meta_frame = initMetaDict([test], data)
-        result = dict(zip(meta_frame.columns, meta_frame.iloc[0]))
-        expected = {**defaults, **test}
-        assert result == expected
+    data = pd.read_csv(data_path, index_col=0, parse_dates=True,)
+    saqc = SaQC(SimpleFlagger(), dios.DictOfSeries(data)).readConfig(config_path)
+    data, flagger = saqc.getResult()
 
 
 def test_variableRegex(data):
 
+    header = f"{F.VARNAME};{F.TEST};{F.PLOT}"
     tests = [
         ("'.*'", data.columns),
         ("'var(1|2)'", [c for c in data.columns if c[-1] in ("1", "2")]),
         ("'var[12]'", [c for c in data.columns if c[-1] in ("1", "2")]),
         ("var[12]", ["var[12]"]),  # not quoted -> not a regex
-        ('"(.*3)"', [c for c in data.columns if c[-1] == "3"]),
+        ('".*3"', [c for c in data.columns if c[-1] == "3"]),
     ]
 
-    for config_wc, expected in tests:
-        _, config = initMetaDict([{F.VARNAME: config_wc, F.TESTS: "flagAll()"}], data)
-        assert np.all(config[F.VARNAME] == expected)
+    for regex, expected in tests:
+        fobj = writeIO(header + "\n" + f"{regex} ; flagDummy()")
+        saqc = SaQC(SimpleFlagger(), data).readConfig(fobj)
+        expansion = saqc._expandFields(saqc._to_call[0], data.columns)
+        result = [f.field for f in expansion]
+        assert np.all(result == expected)
 
 
 def test_inlineComments(data):
@@ -62,65 +57,40 @@ def test_inlineComments(data):
     adresses issue #3
     """
     config = f"""
-    {F.VARNAME}|{F.TESTS}|{F.PLOT}
-    pre2|flagAll() # test|False # test
+    {F.VARNAME} ; {F.TEST}       ; {F.PLOT}
+    pre2        ; flagDummy() # test ; False # test
     """
-    _, meta_frame = initMetaString(config, data)
-    assert meta_frame.loc[0, F.PLOT] == False
-    assert meta_frame.loc[0, F.TESTS] == "flagAll()"
+    saqc = SaQC(SimpleFlagger(), data).readConfig(writeIO(config))
+    func_dump = saqc._to_call[0]
+    assert func_dump.ctrl.plot is False
+    assert func_dump.func == FUNC_MAP["flagDummy"]["func"]
 
 
 def test_configReaderLineNumbers(data):
     config = f"""
-    {F.VARNAME}|{F.TESTS}
-    #temp1|dummy()
-    pre1|dummy()
-    pre2|dummy()
-    SM|dummy()
-    #SM|dummy()
-    # SM1|dummy()
-
-    SM1|dummy()
+    {F.VARNAME} ; {F.TEST}
+    #temp1      ; flagDummy()
+    pre1        ; flagDummy()
+    pre2        ; flagDummy()
+    SM          ; flagDummy()
+    #SM         ; flagDummy()
+    # SM1       ; flagDummy()
+
+    SM1         ; flagDummy()
     """
-    meta_fname, meta_frame = initMetaString(config, data)
-    result = meta_frame[F.LINENUMBER].tolist()
+    saqc = SaQC(SimpleFlagger(), data).readConfig(writeIO(config))
+    result = [f.ctrl.lineno for f in saqc._to_call]
     expected = [3, 4, 5, 9]
     assert result == expected
 
 
-def test_configMultipleTests(data):
-
-    var = data.columns[0]
-
-    config = f"""
-    {F.VARNAME} ; test_1        ; test_2
-    #-----------;---------------;--------------------------
-    {var}       ; flagMissing() ; flagRange(min=10, max=60)
-    """
-
-    from saqc.flagger import SimpleFlagger
-    from saqc.core.core import run
-    from saqc.core.reader import readConfig, checkConfig
-    from saqc.funcs.functions import flagMissing, flagRange
-
-    flagger = SimpleFlagger().initFlags(data)
-    df = checkConfig(readConfig(writeIO(config), data), data, flagger, np.nan)
-    assert {"test_1", "test_2"} - set(df.columns) == set([])
-
-    flagger_expected = SimpleFlagger().initFlags(data)
-    for func, kwargs in [(flagMissing, {}), (flagRange, {"min": 10, "max": 60})]:
-        data, flagger_expected = func(data, var, flagger_expected, **kwargs)
-    _, flagger_result = run(writeIO(config), SimpleFlagger(), data)
-
-    assert np.all(flagger_result.getFlags() == flagger_expected.getFlags())
-
-
 def test_configFile(data):
 
     # check that the reader accepts different whitespace patterns
 
     config = f"""
-    {F.VARNAME} ; {F.TESTS}
+    {F.VARNAME} ; {F.TEST}
+
     #temp1      ; flagDummy()
     pre1; flagDummy()
     pre2        ;flagDummy()
@@ -130,25 +100,55 @@ def test_configFile(data):
 
     SM1;flagDummy()
     """
-    saqc.run(writeIO(config), TESTFLAGGER[0], data)
+    SaQC(SimpleFlagger(), data).readConfig(writeIO(config))
 
 
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("nodata", TESTNODATA)
-def test_configChecks(data, flagger, nodata, caplog):
+def test_configChecks(data):
 
-    flagger = flagger.initFlags(data)
-    var1, var2, var3, *_ = data.columns
+    var1, _, var3, *_ = data.columns
 
+    @register(masking="none")
+    def flagFunc(data, field, flagger, arg, opt_arg=None, **kwargs):
+        return data, flagger
+
+    header = f"{F.VARNAME};{F.TEST}"
     tests = [
-        ({F.VARNAME: var1, F.TESTS: "flagRange(mn=0)"}, TypeError),
-        ({F.VARNAME: var3, F.TESTS: "flagNothing()"}, NameError),
-        ({F.VARNAME: "", F.TESTS: "flagRange(min=3)"}, SyntaxError),
-        ({F.VARNAME: var1, F.TESTS: ""}, SyntaxError),
-        ({F.TESTS: "flagRange(min=3)"}, SyntaxError),
+        (f"{var1};flagFunc(mn=0)", TypeError),  # bad argument name
+        (f"{var1};flagFunc()", TypeError),  # not enough arguments
+        (f"{var3};flagNothing()", NameError),  # unknown function
+        (f"{var1}; min", TypeError),  # not a function call
     ]
 
-    for config_dict, expected in tests:
-        _, config_df = initMetaDict([config_dict], data)
+    for test, expected in tests:
+        fobj = writeIO(header + "\n" + test)
         with pytest.raises(expected):
-            checkConfig(config_df, data, flagger, nodata)
+            SaQC(SimpleFlagger(), data).readConfig(fobj).getResult()
+
+
+def test_supportedArguments(data):
+
+    # test if the following function arguments
+    # are supported (i.e. parsing does not fail)
+
+    # TODO: necessary?
+
+    @register(masking='field')
+    def func(data, field, flagger, kwarg, **kwargs):
+        return data, flagger
+
+    var1 = data.columns[0]
+
+    header = f"{F.VARNAME};{F.TEST}"
+    tests = [
+        f"{var1};func(kwarg=NAN)",
+        f"{var1};func(kwarg='str')",
+        f"{var1};func(kwarg=5)",
+        f"{var1};func(kwarg=5.5)",
+        f"{var1};func(kwarg=-5)",
+        f"{var1};func(kwarg=True)",
+        f"{var1};func(kwarg=sum([1, 2, 3]))",
+    ]
+
+    for test in tests:
+        fobj = writeIO(header + "\n" + test)
+        SaQC(SimpleFlagger(), data).readConfig(fobj)
diff --git a/test/flagger/test_dmpflagger.py b/test/flagger/test_dmpflagger.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1a9c1b73df1d2a58866291119e5c709bfc06f90
--- /dev/null
+++ b/test/flagger/test_dmpflagger.py
@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import json
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from test.common import initData
+from saqc.flagger import DmpFlagger
+
+
+@pytest.fixture
+def data():
+    return initData(cols=1)
+
+
+@pytest.fixture
+def data_4cols():
+    return initData(cols=4)
+
+
+def parseComments(data):
+    return np.array([json.loads(v)["comment"] for v in data.to_df().values.flatten()])
+
+
+def test_initFlags(data):
+    flagger = DmpFlagger().initFlags(data=data)
+    assert (flagger._flags == flagger.UNFLAGGED).all(axis=None)
+    assert (flagger._causes == "").all(axis=None)
+    assert (flagger._comments == "").all(axis=None)
+
+
+def test_mergeFlaggerOuter(data):
+
+    flagger = DmpFlagger()
+
+    field = data.columns[0]
+
+    data_left = data
+
+    data_right = data.to_df()
+    dates = data_right.index.to_series()
+    dates[len(dates) // 2 :] += pd.Timedelta("1Min")
+    data_right.index = dates
+    data_right = data_right.to_dios()
+
+    left = flagger.initFlags(data=data_left).setFlags(
+        field=field, flag=flagger.BAD, cause="SaQCLeft", comment="testLeft"
+    )
+
+    right = flagger.initFlags(data=data_right).setFlags(
+        field=field, flag=flagger.GOOD, cause="SaQCRight", comment="testRight"
+    )
+
+    merged = left.merge(right, join="outer")
+
+    right_index = data_right[field].index.difference(data_left[field].index)
+    assert (merged._flags.loc[right_index] == flagger.GOOD).all(axis=None)
+    assert (merged._causes.loc[right_index] == "SaQCRight").all(axis=None)
+    assert np.all(parseComments(merged._comments.loc[right_index]) == "testRight")
+
+    left_index = data_left[field].index
+    assert (merged._flags.loc[left_index] == flagger.BAD).all(axis=None)
+    assert (merged._causes.loc[left_index] == "SaQCLeft").all(axis=None)
+    assert np.all(parseComments(merged._comments.loc[left_index]) == "testLeft")
+
+
+def test_mergeFlaggerInner(data):
+
+    flagger = DmpFlagger()
+
+    field = data.columns[0]
+
+    data_left = data
+    data_right = data.iloc[::2]
+
+    left = flagger.initFlags(data=data_left).setFlags(
+        field=field, flag=flagger.BAD, cause="SaQCLeft", comment="testLeft"
+    )
+
+    right = flagger.initFlags(data=data_right).setFlags(
+        field=field, flag=flagger.GOOD, cause="SaQCRight", comment="testRight"
+    )
+
+    merged = left.merge(right, join="inner")
+
+    assert (merged._flags[field].index == data_right[field].index).all()
+    assert (merged._causes[field].index == data_right[field].index).all()
+    assert (merged._comments[field].index == data_right[field].index).all()
+
+    assert (merged._flags[field] == flagger.BAD).all()
+    assert (merged._causes[field] == "SaQCLeft").all(axis=None)
+    assert np.all(parseComments(merged._comments) == "testLeft")
+
+
+def test_sliceFlaggerDrop(data):
+    flagger = DmpFlagger().initFlags(data)
+    with pytest.raises(TypeError):
+        flagger.getFlags(field=data.columns, drop="var")
+
+    field = data.columns[0]
+    expected = data[data.columns.drop(field)].to_df()
+
+    filtered = flagger.slice(drop=field)
+
+    assert (filtered._flags.columns == expected.columns).all(axis=None)
+    assert (filtered._comments.columns == expected.columns).all(axis=None)
+    assert (filtered._causes.columns == expected.columns).all(axis=None)
+
+    assert (filtered._flags.to_df().index == expected.index).all(axis=None)
+    assert (filtered._comments.to_df().index == expected.index).all(axis=None)
+    assert (filtered._causes.to_df().index == expected.index).all(axis=None)
+
diff --git a/test/flagger/test_flagger.py b/test/flagger/test_flagger.py
index c5032930ddbe3d6a6b30aaaa9b59edbb1d556391..77f835b17e577ea9998b513a5bab0df0d13d28af 100644
--- a/test/flagger/test_flagger.py
+++ b/test/flagger/test_flagger.py
@@ -1,135 +1,387 @@
 #!/usr/bin/env python
 
-__author__ = "Bert Palm"
-__email__ = "bert.palm@ufz.de"
-__copyright__ = "Copyright 2018, Helmholtz-Zentrum für Umweltforschung GmbH - UFZ"
-
 import pytest
 import numpy as np
 import pandas as pd
 from pandas.api.types import is_bool_dtype
 
-from test.common import TESTFLAGGER
+import dios
+
+from test.common import TESTFLAGGER, initData
 
 
 def _getDataset(rows, cols):
-    df = pd.DataFrame()
-    for c in range(cols):
-        df[f"var{c}"] = np.linspace(0 + 100 * c, rows, rows)
-    vals = pd.date_range(start="2011-01-01", end="2011-01-10", periods=rows)
-    df.index = pd.DatetimeIndex(data=vals)
-    return df
+    return initData(cols=cols, rows=rows, start_date="2011-01-01", end_date="2011-01-10")
 
 
 DATASETS = [
-    # _getDataset(0, 1),
-    # _getDataset(1, 1),
+    _getDataset(0, 1),
+    _getDataset(1, 1),
     _getDataset(100, 1),
     # _getDataset(1000, 1),
-    # _getDataset(0, 4),
-    # _getDataset(1, 4),
-    _getDataset(100, 4),
+    _getDataset(0, 4),
+    _getDataset(1, 4),
+    # _getDataset(100, 4),
     # _getDataset(1000, 4),
     # _getDataset(10000, 40),
-    # _getDataset(20, 4),
+    _getDataset(20, 4),
 ]
 
 
+def check_all_dios_index_length(tocheck, expected):
+    for c in tocheck:
+        if len(tocheck[c]) != len(expected[c]):
+            return False
+    return True
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_initFlags(data, flagger):
+    """
+    test before:
+    - None
+    """
+
+    newflagger = flagger.initFlags(data)
+    assert isinstance(newflagger, type(flagger))
+    assert newflagger is not flagger
+
+    flags = newflagger.getFlags()
+    assert isinstance(flags, dios.DictOfSeries)
+
+    assert len(flags.columns) >= len(data.columns)
+    assert check_all_dios_index_length(flags, data)
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_initFlagsWithFlags(data, flagger):
+    flags = dios.DictOfSeries(pd.Series(data=flagger.BAD))
+    flagger = flagger.initFlags(flags=flags)
+    assert (flagger.flags == flags).all(axis=None)
+
+
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_setFlagger(data, flagger):
+def test_getFlags(data, flagger):
+    """
+    test before:
+    - initFlags()
+
+    we need to check:
+    - access all flags -> get a dios
+    - access some columns of flags -> get a dios
+    - access one column of flags -> get a series
+    """
 
+    flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
+    # all - dios
+    flags0 = flagger.getFlags()
+    assert isinstance(flags0, dios.DictOfSeries)
+    assert (flags0.columns == data.columns).all()
+    assert check_all_dios_index_length(flags0, data)
+    for dt in flags0.dtypes:
+        assert dt == flagger.dtype
+
+    # some - dios
+    if len(data.columns) >= 2:
+        cols = data.columns[:2].to_list()
+        flags1 = flagger.getFlags(cols)
+        assert isinstance(flags1, dios.DictOfSeries)
+        assert (flags1.columns == data.columns[:2]).all()
+        assert check_all_dios_index_length(flags1, data[cols])
+        for dt in flags1.dtypes:
+            assert dt == flagger.dtype
+
+    # series
+    flags2 = flagger.getFlags(field)
+    assert isinstance(flags2, pd.Series)
+    assert flags2.dtype == flagger.dtype
+    assert flags2.shape[0] == data[field].shape[0]
+    # NOTE: need fix in dios see issue #16 (has very low priority)
+    # assert flags2.name in data.columns
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_setFlags(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags()
+    """
+    flagger = flagger.initFlags(data)
+    sl = slice("2011-01-02", "2011-01-05")
+    field, *_ = data.columns
+
+    base = flagger.getFlags()
+
+    flagger_good = flagger.setFlags(field, flag=flagger.GOOD, loc=sl)
+    assert isinstance(flagger_good, type(flagger))
+    assert flagger_good is not flagger
+
+    flags_good = flagger_good.getFlags()
+    assert len(flags_good[field]) <= len(base[field])
+    assert (flags_good.columns == base.columns).all()
+    assert (flags_good.loc[sl, field] == flagger.GOOD).all()
+
+    # overflag works BAD > GOOD
+    flagger_bad = flagger_good.setFlags(field, flag=flagger.BAD)
+    assert (flagger_bad.getFlags(field) == flagger.BAD).all()
+
+    # overflag doesn't work GOOD < BAD
+    flagger_still_bad = flagger_bad.setFlags(field, flag=flagger.GOOD)
+    assert (flagger_still_bad.getFlags(field) == flagger.BAD).all()
+
+    # overflag does work with force
+    flagger_forced_good = flagger_bad.setFlags(field, flag=flagger.GOOD, force=True)
+    assert (flagger_forced_good.getFlags(field) == flagger.GOOD).all()
+
+    with pytest.raises(ValueError):
+        flagger.setFlags(field=None, flag=flagger.BAD)
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_sliceFlagger(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags() inside slice()
+    """
+    sl = slice(None, None, 3)
+
+    flagger = flagger.initFlags(data)
+    newflagger = flagger.slice(loc=sl)
+    assert isinstance(newflagger, type(flagger))
+
+    newflags = newflagger.getFlags()
+    assert (newflags.columns == data.columns).all()
+    assert check_all_dios_index_length(newflags, data[sl])
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_sliceFlaggerDrop(data, flagger):
+    flagger = flagger.initFlags(data)
+    with pytest.raises(TypeError):
+        flagger.getFlags(field=data.columns, drop="var")
+
+    field = data.columns[0]
+    expected = data.columns.drop(field)
+
+    filtered = flagger.slice(drop=field)
+    assert (filtered.getFlags().columns == expected).all(axis=None)
+    assert (filtered.getFlags().to_df().index == data[expected].to_df().index).all(axis=None)
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_mergeFlagger(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags()
+    - setFlags()
+    - slice()
+    """
+    field, *_ = data.columns
+    sl = slice(None, None, 3)
+
     this_flagger = flagger.initFlags(data)
-    other_flagger = this_flagger.getFlagger(iloc=slice(None, None, 3)).setFlags(field)
-    result_flagger = this_flagger.setFlagger(other_flagger)
+    other_flagger = this_flagger.slice(loc=sl).setFlags(field)
+    result_flagger = this_flagger.merge(other_flagger)
 
+    result_flags = result_flagger.getFlags()
     other_flags = other_flagger.getFlags()
-    result_flags = result_flagger.getFlags(field)
 
-    assert np.all(result_flagger.getFlags(loc=other_flagger.getFlags().index) == other_flags)
+    # check flags that was set
+    check = result_flags.loc[sl, field] == other_flags[field]
+    assert check.all(None)
+    # check flags that was not set
+    mask = ~result_flags[field].index.isin(other_flags[field].index)
+    check = result_flags.loc[mask, field] == result_flagger.UNFLAGGED
+    assert check.all(None)
 
-    assert np.all(result_flags[~result_flags.index.isin(other_flags.index)] == flagger.UNFLAGGED)
+    # check unchanged columns
+    cols = data.columns.to_list()
+    cols.remove(field)
+    check = result_flags[cols] == result_flagger.UNFLAGGED
+    assert check.all(None)
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_setFlaggerColumnsDiff(data, flagger):
-
+def test_mergeFlaggerColumnsDiff(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags()
+    - setFlags()
+    - slice()
+    - merge()
+    """
     field, *_ = data.columns
     new_field = field + "_new"
-    iloc = slice(None, None, 2)
+    sl = slice(None, None, 2)
 
-    other_data = data.iloc[iloc]
+    other_data = data.loc[sl]
     other_data.columns = [new_field] + data.columns[1:].to_list()
+    other_flagger = flagger.initFlags(other_data)
 
     this_flagger = flagger.initFlags(data).setFlags(field, flag=flagger.BAD)
-    other_flagger = flagger.initFlags(other_data)
-    result_flagger = this_flagger.setFlagger(other_flagger)
+    result_flagger = this_flagger.merge(other_flagger)
 
-    assert np.all(result_flagger.getFlags(new_field, loc=other_data.index) == other_flagger.getFlags(new_field))
-    assert np.all(result_flagger.getFlags(new_field, loc=data.index) == flagger.UNFLAGGED)
+    result_flags = result_flagger.getFlags()
+    other_flags = other_flagger.getFlags()
+
+    # we need to check if
+    # - the new column is present
+    # - the new column is identical to the original
+    # - the other column are unchanged
+    #   - field-column is BAD
+    #   - other columns are UNFLAGGED
+
+    assert new_field in result_flags
+
+    check = result_flags[new_field] == other_flags[new_field]
+    assert check.all(None)
+
+    check = result_flags[field] == result_flagger.BAD
+    assert check.all(None)
+
+    cols = data.columns.to_list()
+    cols.remove(field)
+    check = result_flags[cols] == result_flagger.UNFLAGGED
+    assert check.all(None)
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_setFlaggerIndexDiff(data, flagger):
-
+def test_mergeFlaggerIndexDiff(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags()
+    - setFlags()
+    - slice()
+    - merge()
+
+    we need to check:
+    - index is union of this and other's index
+    - indices + values that only in this, should be present
+    - indices + values that only in other, should be present
+    - indices that in this and other, have values from other
+    """
     field, *_ = data.columns
-    iloc = slice(None, None, 2)
+    sl = slice(None, None, 2)
 
-    other_data = data.iloc[iloc]
-    other_data.index = other_data.index + pd.Timedelta(minutes=2, seconds=25)
+    def shiftindex(s):
+        s.index = s.index + pd.Timedelta(minutes=2, seconds=25)
+        return s
+
+    # create a sliced time-shifted version of data
+    other_data = data.loc[sl].apply(shiftindex)
+    if isinstance(other_data, pd.Series):
+        pass
 
     this_flagger = flagger.initFlags(data).setFlags(field, flag=flagger.BAD)
     other_flagger = flagger.initFlags(other_data)
-    result_flagger = this_flagger.setFlagger(other_flagger)
+    result_flagger = this_flagger.merge(other_flagger)
+
+    result_flags = result_flagger.getFlags()
+    this_flags = this_flagger.getFlags()
+    other_flags = other_flagger.getFlags()
+
+    for c in result_flags:
+        t, o, r = this_flags[c], other_flags[c], result_flags[c]
+        assert (r.index == t.index.union(o.index)).all()
 
-    assert np.all(result_flagger.getFlags(field, loc=other_data.index) == other_flagger.getFlags(field))
-    assert np.all(result_flagger.getFlags(field, loc=data.index) == this_flagger.getFlags(field))
+        only_this = t.index.difference(o.index)
+        only_other = o.index.difference(t.index)
+        both = t.index.intersection(o.index)
+
+        # nothing is missing
+        assert (r.index == only_this.union(only_other).union(both)).all()
+
+        assert (r[only_this] == t[only_this]).all()
+        assert (r[only_other] == o[only_other]).all()
+        assert (r[both] == o[both]).all()
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_initFlags(data, flagger):
-    flags = flagger.initFlags(data).getFlags()
-    assert isinstance(flags, pd.DataFrame)
-    assert len(flags.index) == len(data.index)
-    assert len(flags.columns) >= len(data.columns)
+def test_mergeFlaggerOuter(data, flagger):
+
+    field = data.columns[0]
+
+    data_left = data
+    data_right = data.iloc[::2]
+
+    left = flagger.initFlags(data=data_left).setFlags(field=field, flag=flagger.BAD)
+
+    right = flagger.initFlags(data=data_right).setFlags(field, flag=flagger.GOOD)
+
+    merged = left.merge(right, join="outer")
+
+    loc = data_right[field].index.difference(data_left[field].index)
+    assert (merged.getFlags(field, loc=loc) == flagger.GOOD).all(axis=None)
+    assert (merged.getFlags(field, loc=data_left[field].index) == flagger.BAD).all(axis=None)
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_getFlags(data, flagger):
-    flagger = flagger.initFlags(data)
-    field, *_ = data.columns
+def test_mergeFlaggerInner(data, flagger):
 
-    # df
-    flags0 = flagger.getFlags()
-    assert isinstance(flags0, pd.DataFrame)
-    assert flags0.shape == data.shape
-    assert (flags0.columns == data.columns).all()
+    field = data.columns[0]
 
-    for dt in flags0.dtypes:
-        assert dt == flagger.dtype
+    data_left = data
+    data_right = data.iloc[::2]
 
-    # series
-    flags1 = flagger.getFlags(field)
-    assert isinstance(flags1, pd.Series)
-    assert flags1.dtype == flagger.dtype
-    assert flags1.shape[0] == data.shape[0]
-    assert flags1.name in data.columns
+    left = flagger.initFlags(data=data_left).setFlags(field=field, flag=flagger.BAD)
+
+    right = flagger.initFlags(data=data_right).setFlags(field, flag=flagger.GOOD)
+
+    merged = left.merge(right, join="inner")
+
+    assert (merged.getFlags(field).index == data_right[field].index).all()
+    assert (merged.getFlags(field) == flagger.BAD).all()
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_isFlaggedDataFrame(data, flagger):
+def test_mergeFlaggerMerge(data, flagger):
+
+    field = data.columns[0]
+    data_left = data
+    data_right = data.iloc[::2]
+
+    left = flagger.initFlags(data=data_left).setFlags(field=field, flag=flagger.BAD)
+
+    right = flagger.initFlags(data=data_right).setFlags(field, flag=flagger.GOOD)
+
+    merged = left.merge(right, join="merge")
 
+    loc = data_left[field].index.difference(data_right[field].index)
+    assert (merged.getFlags(field, loc=data_right[field].index) == flagger.GOOD).all(axis=None)
+    assert (merged.getFlags(field, loc=loc) == flagger.BAD).all(axis=None)
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_isFlaggedDios(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - setFlags()
+    """
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
-    mask = np.zeros(len(data), dtype=bool)
+    mask = np.zeros(len(data[field]), dtype=bool)
 
     df_tests = [
         (flagger.isFlagged(), mask),
@@ -140,8 +392,8 @@ def test_isFlaggedDataFrame(data, flagger):
     ]
     for flags, expected in df_tests:
         assert np.all(flags[field] == expected)
-        assert isinstance(flags, pd.DataFrame)
-        assert flags.shape == data.shape
+        assert isinstance(flags, dios.DictOfSeries)
+        assert check_all_dios_index_length(flags, data)
         assert (flags.columns == data.columns).all()
         for dt in flags.dtypes:
             assert is_bool_dtype(dt)
@@ -150,11 +402,15 @@ def test_isFlaggedDataFrame(data, flagger):
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_isFlaggedSeries(data, flagger):
-
+    """
+    test before:
+    - initFlags()
+    - setFlags()
+    """
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
-    mask = np.zeros(len(data), dtype=bool)
+    mask = np.zeros(len(data[field]), dtype=bool)
 
     series_tests = [
         (flagger.isFlagged(field), mask),
@@ -167,85 +423,73 @@ def test_isFlaggedSeries(data, flagger):
         assert np.all(flags == expected)
         assert isinstance(flags, pd.Series)
         assert flags.dtype == bool
-        assert flags.shape[0] == data.shape[0]
-        assert flags.name in data.columns
+        assert flags.shape[0] == data[field].shape[0]
+        # NOTE: need fix in dios see issue #16 (has very low priority)
+        # assert flags.name in data.columns
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_isFlaggedSeries(data, flagger):
-
+def test_isFlaggedSeries_fail(data, flagger):
+    """
+    test before:
+    - initFlags()
+    """
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
     fail_tests = [
-        {"flag": pd.Series(index=data.index, data=flagger.BAD).astype(flagger.dtype)},
-        {"field": ["var1", "var2"]},
+        {"flag": pd.Series(index=data[field].index, data=flagger.BAD).astype(flagger.dtype)},
+        # NOTE: allowed since use of dios
+        # {"field": ["var1", "var2"]},
     ]
     for args in fail_tests:
-        with pytest.raises(ValueError):
+        with pytest.raises(TypeError):
             flagger.isFlagged(**args)
 
 
-@pytest.mark.parametrize("data", DATASETS)
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_setFlags(data, flagger):
-    flagger = flagger.initFlags(data)
-    sl = slice("2011-01-02", "2011-01-05")
-    field, *_ = data.columns
-
-    base = flagger.getFlags()
-
-    flagger_good = flagger.setFlags(field, flag=flagger.GOOD, loc=sl)
-    flags_good = flagger_good.getFlags()
-    assert flags_good.shape == base.shape
-    assert (flags_good.columns == base.columns).all()
-    assert (flags_good.loc[sl, field] == flagger.GOOD).all()
-
-    # overflag works BAD > GOOD
-    flagger_bad = flagger_good.setFlags(field, flag=flagger.BAD)
-    assert (flagger_bad.getFlags(field) == flagger.BAD).all()
-
-    # overflag doesn't work GOOD < BAD
-    flagger_still_bad = flagger_bad.setFlags(field, flag=flagger.GOOD)
-    assert (flagger_still_bad.getFlags(field) == flagger.BAD).all()
-
-    # overflag does work with force
-    flagger_forced_good = flagger_bad.setFlags(field, flag=flagger.GOOD, force=True)
-    assert (flagger_forced_good.getFlags(field) == flagger.GOOD).all()
-
-    with pytest.raises(ValueError):
-        flagger.setFlags(field=None, flag=flagger.BAD)
-
-
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_clearFlags(data, flagger):
+    """
+    test before:
+    - initFlags()
+    - getFlags()
+    - setFlags()
+    - isFlagged()
+    """
     flagger = flagger.initFlags(data)
     sl = slice("2011-01-02", "2011-01-05")
     field, *_ = data.columns
 
-    base = flagger.getFlags()
+    base = flagger.getFlags(field)
 
     flagger = flagger.setFlags(field=field, flag=flagger.BAD)
     assert np.sum(flagger.isFlagged(field)) == len(base)
 
+    flaggernew = flagger.clearFlags(field)
+    assert isinstance(flaggernew, type(flagger))
+    assert flaggernew is not flagger
+    assert len(flagger.getFlags(field)) == len(data[field])
+
     flagger = flagger.clearFlags(field)
     assert np.sum(flagger.isFlagged(field)) == 0
+    assert len(flagger.getFlags(field)) == len(data[field])
 
     flagger = flagger.setFlags(field=field, flag=flagger.BAD)
     assert np.sum(flagger.isFlagged(field)) == len(base)
+    assert len(flagger.getFlags(field)) == len(data[field])
 
     flagger = flagger.clearFlags(field, loc=sl)
+    assert len(flagger.getFlags(field)) == len(data[field])
     unflagged = flagger.isFlagged(field, loc=sl)
     assert np.sum(unflagged) == 0
-    assert np.sum(flagger.isFlagged(field)) == len(data) - len(unflagged)
+    assert np.sum(flagger.isFlagged(field)) == len(data[field]) - len(unflagged)
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_dtype(data, flagger):
-
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
@@ -264,7 +508,6 @@ def test_dtype(data, flagger):
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER[-1:])
 def test_returnCopy(data, flagger):
-
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
@@ -283,7 +526,6 @@ LOC_ILOC_FUNCS = ["isFlagged", "getFlags"]
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("flaggerfunc", LOC_ILOC_FUNCS)
 def test_loc(data, flagger, flaggerfunc):
-
     flagger = flagger.initFlags(data)
     sl = slice("2011-01-02", "2011-01-05")
     field, *_ = data.columns
@@ -293,9 +535,9 @@ def test_loc(data, flagger, flaggerfunc):
     if d.empty:
         mask = []
     else:
-        m = data.index.get_loc(d.index[0])
-        M = data.index.get_loc(d.index[-1])
-        mask = np.full(len(data), False)
+        m = data[field].index.get_loc(d[field].index[0])
+        M = data[field].index.get_loc(d[field].index[-1])
+        mask = np.full(len(data[field]), False)
         mask[m:M] = True
 
     flagger_func = getattr(flagger, flaggerfunc)
@@ -332,70 +574,179 @@ def test_loc(data, flagger, flaggerfunc):
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("flaggerfunc", LOC_ILOC_FUNCS)
-def test_iloc(data, flagger, flaggerfunc):
+def test_classicUseCases(data, flagger):
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
 
-    M = len(data.index) - 1 if len(data.index) > 0 else 0
-    m = M // 3
-    M = m * 2
+    flagger = flagger.clearFlags(field)
 
-    array = data.reset_index(drop=True).index.values[m:M]
-    sl = slice(m, M)
-    mask = np.full(len(data), False)
-    mask[sl] = True
+    # data-mask, same length than flags
+    d = data[field]
+    mask = d < (d.max() - d.min()) // 2
+    flagged = flagger.setFlags(field, loc=mask, flag=flagger.BAD).isFlagged(field)
+    assert (flagged == mask).all()
 
-    flagger_func = getattr(flagger, flaggerfunc)
+    flagger = flagger.clearFlags(field)
 
-    # masked
-    mflags0 = flagger_func(field, iloc=mask)
-    mflags1 = flagger_func().iloc[mask, 0]
-    mflags2 = flagger_func(field).iloc[mask]
-    mflags3 = flagger_func(iloc=mask)[field]
-    assert (mflags0 == mflags1).all()
-    assert (mflags0 == mflags2).all()
-    assert (mflags0 == mflags3).all()
+    indices = np.arange(0, len(data[field]))
+    mask = indices % 3 == 0
+    indices = indices[mask]
+    # we had some fun with numpy and end up with
+    # numpy indices (positional), but with different length..
+    # make dt-index with iloc, then pass to loc
+    dt_idx = data[field].iloc[indices].index
+    flagged = flagger.setFlags(field, loc=dt_idx, flag=flagger.BAD).isFlagged(field)
+    assert (flagged.iloc[indices] == flagged[flagged]).all()
 
-    # indexed
-    iflags0 = flagger_func(field, iloc=array)
-    iflags1 = flagger_func().iloc[array, 0]
-    iflags2 = flagger_func(field).iloc[array]
-    iflags3 = flagger_func(iloc=array)[field]
-    assert (iflags0 == iflags1).all()
-    assert (iflags0 == iflags2).all()
-    assert (iflags0 == iflags3).all()
 
-    # sliced
-    sflags0 = flagger_func(field, iloc=sl)
-    sflags1 = flagger_func().iloc[sl, 0]
-    sflags2 = flagger_func(field).iloc[sl]
-    sflags3 = flagger_func(iloc=sl)[field]
-    assert (sflags0 == sflags1).all()
-    assert (sflags0 == sflags2).all()
-    assert (sflags0 == sflags3).all()
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_getFlagsWithExtras(data, flagger):
+    flagger = flagger.initFlags(data)
+    field, *_ = data.columns
 
-    assert (sflags0 == iflags0).all()
-    assert (sflags0 == mflags0).all()
+    flags, extra = flagger.getFlags(field, full=True)
+    assert isinstance(flags, pd.Series)
+    assert isinstance(extra, dict)
+    for k, v in extra.items():
+        assert isinstance(v, pd.Series)
+        assert flags.index.equals(v.index)
+
+    flags, extra = flagger.getFlags(full=True)
+    assert isinstance(flags, dios.DictOfSeries)
+    assert isinstance(extra, dict)
+    for k, v in extra.items():
+        assert isinstance(v, dios.DictOfSeries)
+        assert flags.columns.equals(v.columns)
+        for c in flags:
+            assert flags[c].index.equals(v[c].index)
 
 
 @pytest.mark.parametrize("data", DATASETS)
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_classicUseCases(data, flagger):
+def test_replace_delete(data, flagger):
     flagger = flagger.initFlags(data)
     field, *_ = data.columns
+    newflagger = flagger.replaceField(field=field, flags=None)
 
-    # data-mask, same length than flags
-    d = data[field]
-    mask = d < (d.max() - d.min()) // 2
-    flagger = flagger.clearFlags(field)
-    flagged = flagger.setFlags(field, loc=mask, flag=flagger.BAD).isFlagged(field)
-    assert (flagged == mask).all()
+    new, newextra = newflagger.getFlags(full=True)
+    assert field not in newflagger.flags
+    for k in newextra:
+        assert field not in newextra[k]
 
-    # some fun with numpy but not same dimensions.. pass indices to iloc
-    indices = np.arange(0, len(data))
-    mask = indices % 3 == 0
-    indices = indices[mask]
-    flagger.clearFlags(field)
-    flagged = flagger.setFlags(field, iloc=indices, flag=flagger.BAD).isFlagged(field)
-    assert (flagged.iloc[indices] == flagged[flagged]).all()
+    with pytest.raises(ValueError):
+        flagger.replaceField(field="i_dont_exist", flags=None)
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_replace_insert(data, flagger):
+    flagger = flagger.initFlags(data)
+    field, *_ = data.columns
+    newfield = 'fooo'
+    flags, extra = flagger.getFlags(field, full=True)
+    newflagger = flagger.replaceField(field=newfield, flags=flags, **extra)
+    old, oldextra = flagger.getFlags(full=True)
+    new, newextra = newflagger.getFlags(full=True)
+    assert newfield in newflagger.flags
+    assert (newflagger._flags[newfield] == flagger._flags[field]).all()
+    assert newflagger._flags[newfield] is not flagger._flags[field]  # not a copy
+    for k in newextra:
+        assert newfield in newextra[k]
+        assert (newextra[k][newfield] == oldextra[k][field]).all()
+
+
+@pytest.mark.parametrize("data", DATASETS)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_replace_replace(data, flagger):
+    flagger = flagger.initFlags(data)
+    field, *_ = data.columns
+    flags, extra = flagger.getFlags(field, full=True)
+
+    # set everything to DOUBTFUL
+    flags[:] = flagger.BAD
+    for k, v in extra.items():
+        v[:] = flagger.BAD
+        extra[k] = v
+
+    newflagger = flagger.replaceField(field=field, flags=flags, **extra)
+
+    old, oldextra = flagger.getFlags(full=True)
+    new, newextra = newflagger.getFlags(full=True)
+    assert old.columns.equals(new.columns)
+    assert (new[field] == flagger.BAD).all()
+
+    assert oldextra.keys() == newextra.keys()
+    for k in newextra:
+        o, n = oldextra[k], newextra[k]
+        assert n.columns.equals(o.columns)
+        assert (n[field] == flagger.BAD).all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_flagAfter(flagger):
+    idx = pd.date_range("2000", "2001", freq='1M')
+    s = pd.Series(0, index=idx)
+    data = dios.DictOfSeries(s, columns=['a'])
+    exp_base = pd.Series(flagger.UNFLAGGED, index=idx)
+
+    flagger = flagger.initFlags(data)
+    field, *_ = data.columns
+
+    flags = flagger.setFlags(field, loc=s.index[3], flag_after=5).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[3: 3+5+1] = flagger.BAD
+    assert (flags == exp).all()
+
+    flags = flagger.setFlags(field, loc=s.index[3], flag_after=5, win_flag=flagger.GOOD).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[3: 3+5+1] = flagger.GOOD
+    exp[3] = flagger.BAD
+    assert (flags == exp).all()
+
+    # 3 month < 99 days < 4 month
+    flags = flagger.setFlags(field, loc=s.index[3], flag_after="99d").getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[3: 3+3+1] = flagger.BAD
+    assert (flags == exp).all()
+
+    # 3 month < 99 days < 4 month
+    flags = flagger.setFlags(field, loc=s.index[3], flag_after="99d", win_flag=flagger.GOOD).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[3: 3+3+1] = flagger.GOOD
+    exp[3] = flagger.BAD
+    assert (flags == exp).all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_flagBefore(flagger):
+    idx = pd.date_range("2000", "2001", freq='1M')
+    s = pd.Series(0, index=idx)
+    data = dios.DictOfSeries(s, columns=['a'])
+    exp_base = pd.Series(flagger.UNFLAGGED, index=idx)
+
+    flagger = flagger.initFlags(data)
+    field, *_ = data.columns
+
+    flags = flagger.setFlags(field, loc=s.index[8], flag_before=5).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[8-5: 8+1] = flagger.BAD
+    assert (flags == exp).all()
+
+    flags = flagger.setFlags(field, loc=s.index[8], flag_before=5, win_flag=flagger.GOOD).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[8-5: 8+1] = flagger.GOOD
+    exp[8] = flagger.BAD
+    assert (flags == exp).all()
+
+    # 3 month < 99 days < 4 month
+    flags = flagger.setFlags(field, loc=s.index[8], flag_before="99d").getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[8-3: 8+1] = flagger.BAD
+    assert (flags == exp).all()
+
+    # 3 month < 99 days < 4 month
+    flags = flagger.setFlags(field, loc=s.index[8], flag_before="99d", win_flag=flagger.GOOD).getFlags(field)
+    exp = exp_base.copy()
+    exp.iloc[8-3: 8+1] = flagger.GOOD
+    exp[8] = flagger.BAD
+    assert (flags == exp).all()
diff --git a/test/flagger/test_positionalflagger.py b/test/flagger/test_positionalflagger.py
new file mode 100644
index 0000000000000000000000000000000000000000..9875a7c74ab09aa0f120c09a802970adc870f602
--- /dev/null
+++ b/test/flagger/test_positionalflagger.py
@@ -0,0 +1,56 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import numpy as np
+
+from test.common import initData
+from saqc.flagger import PositionalFlagger
+
+
+@pytest.fixture
+def data():
+    return initData(cols=2)
+
+
+def test_initFlags(data):
+    flagger = PositionalFlagger().initFlags(data=data)
+    assert (flagger.isFlagged() == False).all(axis=None)
+    assert (flagger.flags == flagger.UNFLAGGED).all(axis=None)
+
+
+def test_setFlags(data):
+    flagger = PositionalFlagger().initFlags(data=data)
+
+    field = data.columns[0]
+    mask = np.zeros(len(data[field]), dtype=bool)
+    mask[1:10:2] = True
+
+    flagger = flagger.setFlags(field=field, loc=mask, flag=flagger.SUSPICIOUS)
+    assert (flagger.flags.loc[mask, field] == "91").all(axis=None)
+    assert (flagger.flags.loc[~mask, field] == "90").all(axis=None)
+
+    flagger = flagger.setFlags(field=field, loc=~mask, flag=flagger.BAD)
+    assert (flagger.flags.loc[~mask, field] == "902").all(axis=None)
+    assert (flagger.flags.loc[mask, field] == "910").all(axis=None)
+
+    assert (flagger.flags[data.columns[1]] == "-1").all(axis=None)
+
+
+def test_isFlagged(data):
+    flagger = PositionalFlagger().initFlags(data=data)
+    field = data.columns[0]
+
+    mask_sus = np.zeros(len(data[field]), dtype=bool)
+    mask_sus[1:20:2] = True
+    flagger = flagger.setFlags(field=field, loc=mask_sus, flag=flagger.SUSPICIOUS)
+    assert (flagger.isFlagged(field=field, comparator=">=", flag=flagger.SUSPICIOUS)[mask_sus] == True).all(axis=None)
+    assert (flagger.isFlagged(field=field, comparator=">", flag=flagger.SUSPICIOUS) == False).all(axis=None)
+
+    mask_bad = np.zeros(len(data[field]), dtype=bool)
+    mask_bad[1:10:2] = True
+    flagger = flagger.setFlags(field=field, loc=mask_bad, flag=flagger.BAD)
+    assert (flagger.isFlagged(field=field, comparator=">")[mask_sus] == True).all(axis=None)
+    assert (flagger.isFlagged(field=field, comparator=">=", flag=flagger.BAD)[mask_bad] == True).all(axis=None)
+    assert (flagger.isFlagged(field=field, comparator=">", flag=flagger.BAD) == False).all(axis=None)
diff --git a/test/funcs/conftest.py b/test/funcs/conftest.py
index f0f1d142255c173d4b5bc169c9b98b23c2a87903..1fd4685e6c0aca0015b8f2cbcb4cf67be9a4ec75 100644
--- a/test/funcs/conftest.py
+++ b/test/funcs/conftest.py
@@ -2,6 +2,8 @@ import pytest
 import numpy as np
 import pandas as pd
 
+from dios import DictOfSeries
+
 
 @pytest.fixture
 def char_dict():
@@ -10,9 +12,11 @@ def char_dict():
         "drop": pd.DatetimeIndex([]),
         "peak": pd.DatetimeIndex([]),
         "return": pd.DatetimeIndex([]),
+        "missing": pd.DatetimeIndex([]),
     }
 
 
+
 @pytest.fixture
 def course_1(char_dict):
     # MONOTONOUSLY ASCENDING/DESCENDING
@@ -26,17 +30,19 @@ def course_1(char_dict):
         peak_level=10,
         initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0),
         char_dict=char_dict,
+        name='data'
     ):
 
         t_index = pd.date_range(initial_index, freq=freq, periods=periods)
-        data = np.append(
-            np.linspace(initial_level, peak_level, int(np.floor(len(t_index) / 2))),
-            np.linspace(peak_level, initial_level, int(np.ceil(len(t_index) / 2))),
-        )
-        data = pd.DataFrame(data=data, index=t_index, columns=["data"])
-        char_dict["raise"] = data.index[1 : int(np.floor(len(t_index) / 2))]
-        char_dict["drop"] = data.index[int(np.floor(len(t_index) / 2) + 1) :]
-        char_dict["peak"] = data.index[int(np.floor(len(t_index) / 2)) - 1 : int(np.floor(len(t_index) / 2)) + 1]
+        left = np.linspace(initial_level, peak_level, int(np.floor(len(t_index) / 2)))
+        right = np.linspace(peak_level, initial_level, int(np.ceil(len(t_index) / 2)))
+        s = pd.Series(np.append(left, right), index=t_index)
+
+        char_dict["raise"] = s.index[1 : int(np.floor(len(t_index) / 2))]
+        char_dict["drop"] = s.index[int(np.floor(len(t_index) / 2) + 1) :]
+        char_dict["peak"] = s.index[int(np.floor(len(t_index) / 2)) - 1 : int(np.floor(len(t_index) / 2)) + 1]
+
+        data = DictOfSeries(data=s, columns=[name])
         return data, char_dict
 
     return fix_funk
@@ -57,7 +63,6 @@ def course_2(char_dict):
         initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0),
         char_dict=char_dict,
     ):
-
         t_index = pd.date_range(initial_index, freq=freq, periods=periods)
         data = np.linspace(initial_level, final_level, int(np.floor(len(t_index))))
 
@@ -66,17 +71,38 @@ def course_2(char_dict):
 
         if out_val > data.iloc[int(np.floor(periods / 2) - 1)]:
             kind = "raise"
-
         else:
             kind = "drop"
 
         char_dict[kind] = data.index[int(np.floor(periods / 2))]
         char_dict["return"] = data.index[int(np.floor(len(t_index) / 2)) + 1]
-        return data.to_frame("data"), char_dict
+
+        data = DictOfSeries(data=data, columns=["data"])
+        return data, char_dict
+
+    return fix_funk
+
+
+@pytest.fixture
+def course_test(char_dict):
+    # Test function for pattern detection - same as test pattern for first three values, than constant function
+    def fix_funk(freq='1 D',
+                 initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), out_val=5, char_dict=char_dict):
+
+        t_index = pd.date_range(initial_index, freq=freq, periods=100)
+
+        data = pd.Series(data=0, index=t_index)
+        data.iloc[2] = out_val
+        data.iloc[3] = out_val
+
+
+        data = DictOfSeries(data=data, columns=['data'])
+        return data, char_dict
 
     return fix_funk
 
 
+
 @pytest.fixture
 def course_3(char_dict):
     # CROWD IN A PIT/CROWD ON A SUMMIT
@@ -101,31 +127,36 @@ def course_3(char_dict):
         t_index = pd.date_range(initial_index, freq=freq, periods=periods)
         data = np.linspace(initial_level, final_level, int(np.floor(len(t_index))))
         data = pd.Series(data=data, index=t_index)
+
         ind1 = data.index[int(np.floor(periods / 2))]
-        insertion_index = pd.DatetimeIndex(
-            [ind1 + crowd_spacing * pd.Timedelta(str(k) + "min") for k in range(1, crowd_size + 1)]
-        )
+        dates = [ind1 + crowd_spacing * pd.Timedelta(f"{k}min") for k in range(1, crowd_size + 1)]
+        insertion_index = pd.DatetimeIndex(dates)
+
         data.iloc[int(np.floor(periods / 2))] = out_val
         data = data.append(pd.Series(data=out_val, index=insertion_index))
         data.sort_index(inplace=True)
         anomaly_index = insertion_index.insert(0, data.index[int(np.floor(periods / 2))])
+
         if out_val > data.iloc[int(np.floor(periods / 2) - 1)]:
             kind = "raise"
         else:
             kind = "drop"
+
         char_dict[kind] = anomaly_index
         char_dict["return"] = t_index[int(len(t_index) / 2) + 1]
-        return data.to_frame("data"), char_dict
+
+        data = DictOfSeries(data=data, columns=["data"])
+        return data, char_dict
 
     return fix_funk
 
 
 @pytest.fixture
 def course_4(char_dict):
-    # TEETH (ROW OF SPIKES)
-    # values , that remain on value level "base_level" and than begin exposing an outlierish or spikey value of magnitude
-    # "out_val" every second timestep, starting at periods/2, with the first spike.
-    # number of periods better be even!
+    # TEETH (ROW OF SPIKES) values , that remain on value level "base_level" and than begin exposing an outlierish or
+    # spikey value of magnitude "out_val" every second timestep, starting at periods/2, with the first spike. number
+    # of periods better be even!
+
     def fix_funk(
         freq="10min",
         periods=10,
@@ -136,11 +167,43 @@ def course_4(char_dict):
     ):
 
         t_index = pd.date_range(initial_index, freq=freq, periods=periods)
-
         data = pd.Series(data=base_level, index=t_index)
         data[int(len(t_index) / 2) :: 2] = out_val
         char_dict["raise"] = t_index[int(len(t_index) / 2) :: 2]
         char_dict["return"] = t_index[int((len(t_index) / 2) + 1) :: 2]
-        return data.to_frame("data"), char_dict
+
+        data = DictOfSeries(data=data, columns=["data"])
+        return data, char_dict
+
+    return fix_funk
+
+
+@pytest.fixture
+def course_5(char_dict):
+    # NAN_holes
+    # values , that ascend from initial_level to final_level linearly and have missing data(=nan)
+    # at posiiotns "nan_slice", (=a slice or a list, for iloc indexing)
+    # periods better be even!
+    # periods better be greater 5
+
+    def fix_funk(
+        freq="10min",
+        periods=10,
+        nan_slice=slice(0, None, 5),
+        initial_level=0,
+        final_level=10,
+        initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0),
+        char_dict=char_dict,
+    ):
+        t_index = pd.date_range(initial_index, freq=freq, periods=periods)
+        values = np.linspace(initial_level, final_level, periods)
+        s = pd.Series(values, index=t_index)
+        s.iloc[nan_slice] = np.nan
+        char_dict["missing"] = s.iloc[nan_slice].index
+
+        data = DictOfSeries(data=s, columns=["data"])
+        return data, char_dict
 
     return fix_funk
+
+
diff --git a/test/funcs/test_breaks_detection.py b/test/funcs/test_breaks_detection.py
index 2f8f0961d4dba49eb9532d4c201f11e04bbc57bd..f07e949b2278a3d273b101fdbff9e2e6ba5b0a96 100644
--- a/test/funcs/test_breaks_detection.py
+++ b/test/funcs/test_breaks_detection.py
@@ -9,7 +9,7 @@ from test.common import TESTFLAGGER, initData
 
 @pytest.fixture
 def data():
-    return initData(1, start_date="2011-01-01 00:00:00", end_date="2011-01-02 03:00:00", freq="5min")
+    return initData(cols=1, start_date="2011-01-01 00:00:00", end_date="2011-01-02 03:00:00", freq="5min")
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
diff --git a/test/funcs/test_constants_detection.py b/test/funcs/test_constants_detection.py
index 74e066de8c2c2f1d5d7eb1d4859491d1945d985b..52e2f6d9e50fab7d0ecda01adea82d60ff3614ea 100644
--- a/test/funcs/test_constants_detection.py
+++ b/test/funcs/test_constants_detection.py
@@ -12,14 +12,12 @@ from test.common import TESTFLAGGER, initData
 @pytest.fixture
 def data():
     constants_data = initData(1, start_date="2011-01-01 00:00:00", end_date="2011-01-01 03:00:00", freq="5min")
-    constants_data.iloc[5:25] = 0
+    constants_data.iloc[5:25] = 200
     return constants_data
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_constants_flagBasic(data, flagger):
-    idx = np.array([5, 6, 7, 8, 9, 10, 18, 19, 20, 21])
-    data.iloc[idx] = 200
     expected = np.arange(5, 22)
     field, *_ = data.columns
     flagger = flagger.initFlags(data)
@@ -30,7 +28,6 @@ def test_constants_flagBasic(data, flagger):
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_constants_flagVarianceBased(data, flagger):
-    data.iloc[5:25] = 200
     expected = np.arange(5, 25)
     field, *_ = data.columns
     flagger = flagger.initFlags(data)
diff --git a/test/funcs/test_functions.py b/test/funcs/test_functions.py
index 75e19bfaa701a2ea46a035ff0f3c4c6b3c035161..8670e09a2e675c7fa5b9338916e4c62104948090 100644
--- a/test/funcs/test_functions.py
+++ b/test/funcs/test_functions.py
@@ -3,17 +3,15 @@
 
 import pytest
 import numpy as np
+import pandas as pd
+import dios
 
-from saqc.funcs.functions import (
-    flagRange,
-    flagSesonalRange,
-    forceFlags,
-    clearFlags,
-    flagIsolated,
-)
+from saqc.funcs.functions import *
 from test.common import initData, TESTFLAGGER
 
 
+
+
 @pytest.fixture
 def data():
     return initData(cols=1, start_date="2016-01-01", end_date="2018-12-31", freq="1D")
@@ -31,15 +29,15 @@ def test_flagRange(data, field, flagger):
     data, flagger = flagRange(data, field, flagger, min=min, max=max)
     flagged = flagger.isFlagged(field)
     expected = (data[field] < min) | (data[field] > max)
-    assert np.all(flagged == expected)
+    assert (flagged == expected).all()
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_flagSesonalRange(data, field, flagger):
     # prepare
-    data.loc[::2] = 0
-    data.loc[1::2] = 50
-    nyears = len(data.index.year.unique())
+    data.iloc[::2] = 0
+    data.iloc[1::2] = 50
+    nyears = len(data[field].index.year.unique())
 
     tests = [
         ({"min": 1, "max": 100, "startmonth": 7, "startday": 1, "endmonth": 8, "endday": 31,}, 31 * 2 * nyears // 2,),
@@ -60,8 +58,8 @@ def test_clearFlags(data, field, flagger):
     flags_set = flagger.setFlags(field, flag=flagger.BAD).getFlags()
     _, flagger = clearFlags(data, field, flagger)
     flags_cleared = flagger.getFlags()
-    assert np.all(flags_orig != flags_set)
-    assert np.all(flags_orig == flags_cleared)
+    assert (flags_orig != flags_set).all(None)
+    assert (flags_orig == flags_cleared).all(None)
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
@@ -82,14 +80,136 @@ def test_flagIsolated(data, flagger):
     data.iloc[11:13, 0] = np.nan
     data.iloc[15:17, 0] = np.nan
     flagger = flagger.initFlags(data)
-    flagger = flagger.setFlags(field, iloc=slice(5, 6))
+    s = data[field].iloc[5:6]
+    flagger = flagger.setFlags(field, loc=s)
 
     _, flagger_result = flagIsolated(data, field, flagger, group_window="1D", gap_window="2.1D")
 
     assert flagger_result.isFlagged(field)[slice(3, 6, 2)].all()
 
-    flagger = flagger.setFlags(field, iloc=slice(3, 4), flag=flagger.UNFLAGGED, force=True)
     data, flagger_result = flagIsolated(
         data, field, flagger_result, group_window="2D", gap_window="2.1D", continuation_range="1.1D",
     )
     assert flagger_result.isFlagged(field)[[3, 5, 13, 14]].all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
+def test_flagCrossScoring(dat, flagger):
+    data1, characteristics = dat(initial_level=0, final_level=0, out_val=0)
+    data2, characteristics = dat(initial_level=0, final_level=0, out_val=10)
+    field = "dummy"
+    fields = ["data1", "data2"]
+    s1, s2 = data1.squeeze(), data2.squeeze()
+    s1 = pd.Series(data=s1.values, index=s1.index)
+    s2 = pd.Series(data=s2.values, index=s1.index)
+    data = dios.DictOfSeries([s1, s2], columns=["data1", "data2"])
+    flagger = flagger.initFlags(data)
+    _, flagger_result = flagCrossScoring(data, field, flagger, fields=fields, thresh=3, cross_stat=np.mean)
+    for field in fields:
+        isflagged = flagger_result.isFlagged(field)
+        assert isflagged[characteristics["raise"]].all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_flagManual(data, flagger):
+    field = data.columns[0]
+    flagger = flagger.initFlags(data)
+    args = data, field, flagger
+    dat = data[field]
+
+    mdata = pd.Series("lala", index=dat.index)
+    index_exp = mdata.iloc[[10, 33, 200, 500]].index
+    mdata.iloc[[101, 133, 220, 506]] = "b"
+    mdata.loc[index_exp] = "a"
+    shrinked = mdata.loc[index_exp.union(mdata.iloc[[1, 2, 3, 4, 600, 601]].index)]
+
+    kwargs_list = [
+        dict(mdata=mdata, mflag="a", method="plain"),
+        dict(mdata=mdata.to_list(), mflag="a", method="plain"),
+        dict(mdata=mdata, mflag="a", method="ontime"),
+        dict(mdata=shrinked, mflag="a", method="ontime"),
+    ]
+
+    for kw in kwargs_list:
+        _, fl = flagManual(*args, **kw)
+        isflagged = fl.isFlagged(field)
+        assert isflagged[isflagged].index.equals(index_exp)
+
+    # flag not exist in mdata
+    _, fl = flagManual(*args, mdata=mdata, mflag="i do not exist", method="ontime")
+    isflagged = fl.isFlagged(field)
+    assert isflagged[isflagged].index.equals(pd.DatetimeIndex([]))
+
+    # check right-open / ffill
+    index = pd.date_range(start="2016-01-01", end="2018-12-31", periods=11)
+    mdata = pd.Series(0, index=index)
+    mdata.loc[index[[1, 5, 6, 7, 9, 10]]] = 1
+    # >>> mdata
+    # 2016-01-01 00:00:00    0
+    # 2016-04-19 12:00:00    1
+    # 2016-08-07 00:00:00    0
+    # 2016-11-24 12:00:00    0
+    # 2017-03-14 00:00:00    0
+    # 2017-07-01 12:00:00    1
+    # 2017-10-19 00:00:00    1
+    # 2018-02-05 12:00:00    1
+    # 2018-05-26 00:00:00    0
+    # 2018-09-12 12:00:00    1
+    # 2018-12-31 00:00:00    1
+    # dtype: int64
+
+    # add first and last index from data
+    expected = mdata.copy()
+    expected.loc[dat.index[0]] = 0
+    expected.loc[dat.index[-1]] = 1
+    expected = expected.astype(bool)
+
+    _, fl = flagManual(*args, mdata=mdata, mflag=1, method="right-open")
+    isflagged = fl.isFlagged(field)
+    last = expected.index[0]
+    for curr in expected.index[1:]:
+        expected_value = mdata[last]
+        # datetime slicing is inclusive !
+        i = isflagged[last:curr].index[:-1]
+        chunk = isflagged.loc[i]
+        assert (chunk == expected_value).all()
+        last = curr
+    # check last value
+    assert isflagged[curr] == expected[curr]
+
+    # check left-open / bfill
+    expected.loc[dat.index[-1]] = 0  # this time the last is False
+    _, fl = flagManual(*args, mdata=mdata, mflag=1, method="left-open")
+    isflagged = fl.isFlagged(field)
+    last = expected.index[0]
+    assert isflagged[last] == expected[last]
+    for curr in expected.index[1:]:
+        expected_value = mdata[curr]
+        # datetime slicing is inclusive !
+        i = isflagged[last:curr].index[1:]
+        chunk = isflagged.loc[i]
+        assert (chunk == expected_value).all()
+        last = curr
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_1")])
+def test_flagDriftFromNormal(dat, flagger):
+    data = dat(periods=200, peak_level=5, name='d1')[0]
+    data['d2'] = dat(periods=200, peak_level=10, name='d2')[0]['d2']
+    data['d3'] = dat(periods=200, peak_level=100, name='d3')[0]['d3']
+    data['d4'] = 3 + 4 * data['d1']
+    data['d5'] = 3 + 4 * data['d1']
+
+    flagger = flagger.initFlags(data)
+    data_norm, flagger_norm = flagDriftFromNorm(data, 'dummy', flagger, ['d1', 'd2', 'd3'], segment_freq="200min",
+                                      norm_spread=5)
+
+    data_ref, flagger_ref = flagDriftFromReference(data, 'd1', flagger, ['d1', 'd2', 'd3'], segment_freq="3D",
+                                      thresh=20)
+
+    data_scale, flagger_scale = flagDriftScale(data, 'dummy', flagger, ['d1', 'd3'], ['d4', 'd5'], segment_freq="3D",
+                                                   thresh=20,  norm_spread=5)
+    assert flagger_norm.isFlagged()['d3'].all()
+    assert flagger_ref.isFlagged()['d3'].all()
+    assert flagger_scale.isFlagged()['d3'].all()
diff --git a/test/funcs/test_generic_api_functions.py b/test/funcs/test_generic_api_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..c800178cae77684a29c1e11f2b0ddf8f7e32001b
--- /dev/null
+++ b/test/funcs/test_generic_api_functions.py
@@ -0,0 +1,61 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+import ast
+
+import pytest
+import numpy as np
+import pandas as pd
+
+from dios import DictOfSeries
+
+from test.common import TESTFLAGGER, TESTNODATA, initData, writeIO, flagAll
+from saqc.core.visitor import ConfigFunctionParser
+from saqc.core.config import Fields as F
+from saqc.core.register import register
+from saqc import SaQC, SimpleFlagger
+from saqc.funcs.functions import _execGeneric
+
+
+register(masking='field')(flagAll)
+
+
+@pytest.fixture
+def data():
+    return initData()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_addFieldFlagGeneric(data, flagger):
+    saqc = SaQC(data=data, flagger=flagger)
+
+    data, flags = saqc.flagGeneric(
+        "tmp1",
+        func=lambda var1: pd.Series(False, index=data[var1.name].index)
+    ).getResult()
+    assert "tmp1" in flags.columns and "tmp1" not in data
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_addFieldProcGeneric(data, flagger):
+    saqc = SaQC(data=data, flagger=flagger)
+
+    data, flagger = saqc.procGeneric("tmp1", func=lambda: pd.Series([])).getResult(raw=True)
+    assert "tmp1" in data.columns and data["tmp1"].empty
+
+    data, flagger = saqc.procGeneric("tmp2", func=lambda var1, var2: var1 + var2).getResult()
+    assert "tmp2" in data.columns and (data["tmp2"] == data["var1"] + data["var2"]).all(axis=None)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_mask(data, flagger):
+
+    saqc = SaQC(data=data, flagger=flagger)
+    data_org = data.copy(deep=True)
+    mean = data["var1"] / 2
+
+    data, _ = saqc.procGeneric("var1", lambda var1: mask(var1 < mean)).getResult()
+    assert ((data["var1"].isna()) == (data_org["var1"] < 10) & data_org["var1"].isna()).all(axis=None)
+
+    data, flags = saqc.procGeneric("tmp", lambda var1: mask(var1 < mean)).getResult()
+    assert ("tmp" in data.columns) and ("tmp" in flags.columns)
+    assert ((data["tmp"].isna()) == (data_org["var1"] < 10) & data_org["var1"].isna()).all(axis=None)
diff --git a/test/funcs/test_generic_config_functions.py b/test/funcs/test_generic_config_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..b761fece3127517fc6dbdca65ed539191bcf2c9a
--- /dev/null
+++ b/test/funcs/test_generic_config_functions.py
@@ -0,0 +1,327 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import ast
+
+import pytest
+import numpy as np
+import pandas as pd
+
+from dios import DictOfSeries
+
+from test.common import TESTFLAGGER, TESTNODATA, initData, writeIO
+from saqc.core.visitor import ConfigFunctionParser
+from saqc.core.config import Fields as F
+from saqc.core.register import register
+from saqc import SaQC, SimpleFlagger
+from saqc.funcs.functions import _execGeneric
+
+
+@pytest.fixture
+def data():
+    return initData()
+
+
+@pytest.fixture
+def data_diff():
+    data = initData(cols=3)
+    col0 = data[data.columns[0]]
+    col1 = data[data.columns[1]]
+    mid = len(col0) // 2
+    offset = len(col0) // 8
+    return DictOfSeries(data={col0.name: col0.iloc[: mid + offset], col1.name: col1.iloc[mid - offset :],})
+
+
+def _compileGeneric(expr, flagger):
+    tree = ast.parse(expr, mode="eval")
+    _, kwargs = ConfigFunctionParser(flagger).parse(tree.body)
+    return kwargs["func"]
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_missingIdentifier(data, flagger):
+
+    # NOTE:
+    # - the error is only raised at runtime during parsing would be better
+    tests = [
+        "fff(var2) < 5",
+        "var3 != NODATA",
+    ]
+
+    for test in tests:
+        func = _compileGeneric(f"flagGeneric(func={test})", flagger)
+        with pytest.raises(NameError):
+            _execGeneric(flagger, data, func, field="", nodata=np.nan)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_syntaxError(flagger):
+
+    tests = [
+        "range(x=5",
+        "rangex=5)",
+        "range[x=5]" "range{x=5}" "int->float(x=4)" "int*float(x=4)",
+    ]
+
+    for test in tests:
+        with pytest.raises(SyntaxError):
+            _compileGeneric(f"flagGeneric(func={test})", flagger)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_typeError(flagger):
+
+    """
+    test that forbidden constructs actually throw an error
+    TODO: find a few more cases or get rid of the test
+    """
+
+    # : think about cases that should be forbidden
+    tests = ("lambda x: x * 2",)
+
+    for test in tests:
+        with pytest.raises(TypeError):
+            _compileGeneric(f"flagGeneric(func={test})", flagger)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_comparisonOperators(data, flagger):
+    flagger = flagger.initFlags(data)
+    var1, var2, *_ = data.columns
+    this = var1
+
+    tests = [
+        ("this > 100", data[this] > 100),
+        (f"10 >= {var2}", 10 >= data[var2]),
+        (f"{var2} < 100", data[var2] < 100),
+        (f"this <= {var2}", data[this] <= data[var2]),
+        (f"{var1} == {var2}", data[this] == data[var2]),
+        (f"{var1} != {var2}", data[this] != data[var2]),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"flagGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, field=var1, nodata=np.nan)
+        assert np.all(result == expected)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_arithmeticOperators(data, flagger):
+    flagger = flagger.initFlags(data)
+    var1, *_ = data.columns
+    this = data[var1]
+
+    tests = [
+        ("var1 + 100 > 110", this + 100 > 110),
+        ("var1 - 100 > 0", this - 100 > 0),
+        ("var1 * 100 > 200", this * 100 > 200),
+        ("var1 / 100 > .1", this / 100 > 0.1),
+        ("var1 % 2 == 1", this % 2 == 1),
+        ("var1 ** 2 == 0", this ** 2 == 0),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"procGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, field=var1, nodata=np.nan)
+        assert np.all(result == expected)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_nonReduncingBuiltins(data, flagger):
+    flagger = flagger.initFlags(data)
+    var1, *_ = data.columns
+    this = var1
+    mean = data[var1].mean()
+
+    tests = [
+        (f"abs({this})", np.abs(data[this])),
+        (f"log({this})", np.log(data[this])),
+        (f"exp({this})", np.exp(data[this])),
+        (f"ismissing(mask({this} < {mean}))", data.mask(data[this] < mean).isna()),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"procGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, field=this, nodata=np.nan)
+        assert (result == expected).all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("nodata", TESTNODATA)
+def test_reduncingBuiltins(data, flagger, nodata):
+
+    data.loc[::4] = nodata
+    flagger = flagger.initFlags(data)
+    var1 = data.columns[0]
+    this = data.iloc[:, 0]
+
+    tests = [
+        ("min(this)", np.nanmin(this)),
+        (f"max({var1})", np.nanmax(this)),
+        (f"sum({var1})", np.nansum(this)),
+        ("mean(this)", np.nanmean(this)),
+        (f"std({this.name})", np.std(this)),
+        (f"len({this.name})", len(this)),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"procGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, field=this.name, nodata=nodata)
+        assert result == expected
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("nodata", TESTNODATA)
+def test_ismissing(data, flagger, nodata):
+
+    data.iloc[: len(data) // 2, 0] = np.nan
+    data.iloc[(len(data) // 2) + 1 :, 0] = -9999
+    this = data.iloc[:, 0]
+
+    tests = [
+        (f"ismissing({this.name})", (pd.isnull(this) | (this == nodata))),
+        (f"~ismissing({this.name})", (pd.notnull(this) & (this != nodata))),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"flagGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, this.name, nodata)
+        assert np.all(result == expected)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("nodata", TESTNODATA)
+def test_bitOps(data, flagger, nodata):
+    var1, var2, *_ = data.columns
+    this = var1
+
+    flagger = flagger.initFlags(data)
+
+    tests = [
+        ("~(this > mean(this))", ~(data[this] > np.nanmean(data[this]))),
+        (f"(this <= 0) | (0 < {var1})", (data[this] <= 0) | (0 < data[var1])),
+        (f"({var2} >= 0) & (0 > this)", (data[var2] >= 0) & (0 > data[this])),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"flagGeneric(func={test})", flagger)
+        result = _execGeneric(flagger, data, func, this, nodata)
+        assert np.all(result == expected)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_isflagged(data, flagger):
+
+    var1, var2, *_ = data.columns
+
+    flagger = flagger.initFlags(data).setFlags(var1, loc=data[var1].index[::2], flag=flagger.BAD)
+
+    tests = [
+        (f"isflagged({var1})", flagger.isFlagged(var1)),
+        (f"isflagged({var1}, flag=BAD)", flagger.isFlagged(var1, flag=flagger.BAD, comparator=">=")),
+        (f"isflagged({var1}, UNFLAGGED, '==')", flagger.isFlagged(var1, flag=flagger.UNFLAGGED, comparator="==")),
+        (f"~isflagged({var2})", ~flagger.isFlagged(var2)),
+        (f"~({var2}>999) & (~isflagged({var2}))", ~(data[var2] > 999) & (~flagger.isFlagged(var2))),
+    ]
+
+    for test, expected in tests:
+        func = _compileGeneric(f"flagGeneric(func={test}, flag=BAD)", flagger)
+        result = _execGeneric(flagger, data, func, field=None, nodata=np.nan)
+        assert np.all(result == expected)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_variableAssignments(data, flagger):
+    var1, var2, *_ = data.columns
+
+    config = f"""
+    {F.VARNAME}  ; {F.TEST}
+    dummy1       ; procGeneric(func=var1 + var2)
+    dummy2       ; flagGeneric(func=var1 + var2 > 0)
+    """
+
+    fobj = writeIO(config)
+    saqc = SaQC(flagger, data).readConfig(fobj)
+    result_data, result_flagger = saqc.getResult(raw=True)
+
+    assert set(result_data.columns) == set(data.columns) | {
+        "dummy1",
+    }
+    assert set(result_flagger.getFlags().columns) == set(data.columns) | {"dummy1", "dummy2"}
+
+
+@pytest.mark.xfail(stric=True)
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_procGenericMultiple(data_diff, flagger):
+    var1, var2, *_ = data_diff.columns
+
+    config = f"""
+    {F.VARNAME} ; {F.TEST}
+    dummy       ; procGeneric(func=var1 + 1)
+    dummy       ; procGeneric(func=var2 - 1)
+    """
+
+    fobj = writeIO(config)
+    saqc = SaQC(flagger, data_diff).readConfig(fobj)
+    result_data, result_flagger = saqc.getResult()
+    assert len(result_data["dummy"]) == len(result_flagger.getFlags("dummy"))
+
+
+def test_callableArgumentsUnary(data):
+
+    window = 5
+
+    @register(masking='field')
+    def testFuncUnary(data, field, flagger, func, **kwargs):
+        data[field] = data[field].rolling(window=window).apply(func)
+        return data, flagger.initFlags(data=data)
+
+    flagger = SimpleFlagger()
+    var = data.columns[0]
+
+    config = f"""
+    {F.VARNAME} ; {F.TEST}
+    {var}       ; testFuncUnary(func={{0}})
+    """
+
+    tests = [
+        ("sum", np.sum),
+        ("std(exp(x))", lambda x: np.std(np.exp(x))),
+    ]
+
+    for (name, func) in tests:
+        fobj = writeIO(config.format(name))
+        result_config, _ = SaQC(flagger, data).readConfig(fobj).getResult()
+        result_api, _ = SaQC(flagger, data).testFuncUnary(var, func=func).getResult()
+        expected = data[var].rolling(window=window).apply(func)
+        assert (result_config[var].dropna() == expected.dropna()).all(axis=None)
+        assert (result_api[var].dropna() == expected.dropna()).all(axis=None)
+
+
+def test_callableArgumentsBinary(data):
+
+    flagger = SimpleFlagger()
+    var1, var2 = data.columns[:2]
+
+    @register(masking='field')
+    def testFuncBinary(data, field, flagger, func, **kwargs):
+        data[field] = func(data[var1], data[var2])
+        return data, flagger.initFlags(data=data)
+
+    config = f"""
+    {F.VARNAME} ; {F.TEST}
+    {var1}      ; testFuncBinary(func={{0}})
+    """
+
+    tests = [
+        ("x + y", lambda x, y: x + y),
+        ("y - (x * 2)", lambda y, x: y - (x * 2)),
+    ]
+
+    for (name, func) in tests:
+        fobj = writeIO(config.format(name))
+        result_config, _ = SaQC(flagger, data).readConfig(fobj).getResult()
+        result_api, _ = SaQC(flagger, data).testFuncBinary(var1, func=func).getResult()
+        expected = func(data[var1], data[var2])
+        assert (result_config[var1].dropna() == expected.dropna()).all(axis=None)
+        assert (result_api[var1].dropna() == expected.dropna()).all(axis=None)
diff --git a/test/funcs/test_generic_functions.py b/test/funcs/test_generic_functions.py
deleted file mode 100644
index 383504230f1ae2cc64b9bed0abc3b87ede66536f..0000000000000000000000000000000000000000
--- a/test/funcs/test_generic_functions.py
+++ /dev/null
@@ -1,240 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import pytest
-import numpy as np
-import pandas as pd
-
-from test.common import initData, TESTFLAGGER, TESTNODATA
-from saqc.core.core import run
-from saqc.core.config import Fields as F
-
-from test.common import initData, TESTFLAGGER, TESTNODATA, initMetaDict, initMetaString
-
-from saqc.core.evaluator import (
-    DslTransformer,
-    initLocalEnv,
-    parseExpression,
-    evalExpression,
-    compileTree,
-    evalCode,
-)
-
-
-def _evalDslExpression(expr, data, field, flagger, nodata=np.nan):
-    env = initLocalEnv(data, field, flagger, nodata)
-    tree = parseExpression(expr)
-    transformed_tree = DslTransformer(env).visit(tree)
-    code = compileTree(transformed_tree)
-    return evalCode(code, local_env=env)
-
-
-@pytest.fixture
-def data():
-    return initData()
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_missingIdentifier(data, flagger):
-
-    flagger = flagger.initFlags(data)
-    tests = ["flagGeneric(func=fff(var2) < 5)", "flagGeneric(func=var3 != NODATA)"]
-    for expr in tests:
-        with pytest.raises(NameError):
-            evalExpression(expr, data, data.columns[0], flagger, np.nan)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_comparisonOperators(data, flagger):
-    flagger = flagger.initFlags(data)
-    var1, var2, *_ = data.columns
-    this = var1
-
-    tests = [
-        ("this > 100", data[this] > 100),
-        (f"10 >= {var2}", 10 >= data[var2]),
-        (f"{var2} < 100", data[var2] < 100),
-        (f"this <= {var2}", data[this] <= data[var2]),
-        (f"{var1} == {var2}", data[this] == data[var2]),
-        (f"{var1} != {var2}", data[this] != data[var2]),
-    ]
-
-    # check within the usually enclosing scope
-    for expr, mask in tests:
-        _, result_flagger = evalExpression(f"flagGeneric(func={expr})", data, this, flagger, np.nan)
-        expected_flagger = flagger.setFlags(this, loc=mask, test="generic")
-        assert np.all(result_flagger.isFlagged() == expected_flagger.isFlagged())
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_arithmeticOperators(data, flagger):
-    flagger = flagger.initFlags(data)
-    var1, *_ = data.columns
-    this = data[var1]
-
-    tests = [
-        ("this + 100", this + 100),
-        ("this - 1000", this - 1000),
-        ("this * 2", this * 2),
-        ("this / 100", this / 100),
-        ("this % 2", this % 2),
-        ("this ** 2", this ** 2),
-    ]
-
-    # check within the usually enclosing scope
-    for expr, expected in tests:
-        result_data, _ = evalExpression(f"procGeneric(func={expr})", data, var1, flagger, np.nan)
-        assert np.all(result_data[expected.name] == expected)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_nonReduncingBuiltins(data, flagger):
-    flagger = flagger.initFlags(data)
-    var1, *_ = data.columns
-    this = data[var1]
-
-    tests = [
-        ("abs(this)", np.abs(this)),
-        ("sqrt(this)", np.sqrt(this)),
-        ("exp(this)", np.exp(this)),
-        ("log(this)", np.log(this)),
-    ]
-
-    for expr, expected in tests:
-        result_data, _ = evalExpression(f"procGeneric(func={expr})", data, var1, flagger, np.nan)
-        assert np.all(result_data[expected.name] == expected)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("nodata", TESTNODATA)
-def test_reduncingBuiltins(data, flagger, nodata):
-    data.loc[::4] = nodata
-    flagger = flagger.initFlags(data)
-    var1, *_ = data.columns
-    this = data[var1]
-
-    tests = [
-        ("min(this)", np.min(this)),
-        (f"max(this)", np.max(this)),
-        (f"sum(this)", np.nansum(this)),
-        ("mean(this)", np.nanmean(this)),
-        (f"std(this)", np.std(this)),
-        (f"len(this)", len(this)),
-    ]
-    for expr, expected in tests:
-        result_data, _ = evalExpression(f"procGeneric(func={expr})", data, var1, flagger, np.nan)
-        assert np.all(result_data[var1] == expected)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("nodata", TESTNODATA)
-def test_ismissing(data, flagger, nodata):
-
-    data.iloc[: len(data) // 2, 0] = np.nan
-    data.iloc[(len(data) // 2) + 1 :, 0] = -9999
-    var1, *_ = data.columns
-
-    flagger = flagger.initFlags(data)
-
-    tests = [
-        (f"ismissing({var1})", lambda data: (pd.isnull(data) | (data == nodata)).all()),
-        (f"~ismissing({var1})", lambda data: (pd.notnull(data) & (data != nodata)).all(),),
-    ]
-
-    for expr, checkFunc in tests:
-        idx = _evalDslExpression(expr, data, var1, flagger, nodata)
-        assert checkFunc(data.loc[idx, var1])
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("nodata", TESTNODATA)
-def test_bitOps(data, flagger, nodata):
-    var1, var2, *_ = data.columns
-    this = data[var1]
-
-    flagger = flagger.initFlags(data)
-
-    tests = [
-        (f"~(this > mean(this))", ~(this > np.nanmean(this))),
-        (f"(this <= 0) | (0 < {var1})", (this <= 0) | (0 < data[var1])),
-        (f"({var2} >= 0) & (0 > this)", (data[var2] >= 0) & (0 > this)),
-    ]
-
-    for expr, expected in tests:
-        _, flagger_result = evalExpression(f"flagGeneric(func={expr})", data, this.name, flagger, nodata)
-        assert (flagger_result.isFlagged(this.name) == expected).all()
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_isflagged(data, flagger):
-
-    flagger = flagger.initFlags(data)
-    var1, var2, *_ = data.columns
-
-    flagger = flagger.setFlags(var1, iloc=slice(None, None, 2))
-    flagger = flagger.setFlags(var2, iloc=slice(None, None, 2))
-
-    idx = _evalDslExpression(f"isflagged({var1})", data, var2, flagger)
-
-    flagged = flagger.isFlagged(var1)
-    assert (flagged == idx).all
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_invertIsFlagged(data, flagger):
-
-    flagger = flagger.initFlags(data)
-    var1, var2, *_ = data.columns
-
-    flagger = flagger.setFlags(var2, iloc=slice(None, None, 2))
-
-    tests = [
-        (f"~isflagged({var2})", ~flagger.isFlagged(var2)),
-        (f"~({var2}>999) & (~isflagged({var2}))", ~(data[var2] > 999) & (~flagger.isFlagged(var2))),
-    ]
-
-    for expr, flags_expected in tests:
-        _, flagger_result = evalExpression(f"flagGeneric(func={expr})", data, var1, flagger, np.nan)
-        flags_result = flagger_result.isFlagged(var1)
-        assert np.all(flags_result == flags_expected)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_isflaggedArgument(data, flagger):
-
-    var1, var2, *_ = data.columns
-
-    flagger = flagger.initFlags(data).setFlags(var1, iloc=slice(None, None, 2), flag=flagger.BAD)
-
-    tests = [
-        (_evalDslExpression(f"isflagged({var1}, BAD)", data, var2, flagger), flagger.isFlagged(var1, flag=flagger.BAD)),
-        (
-            _evalDslExpression(f"isflagged({var1}, UNFLAGGED, '==')", data, var2, flagger),
-            flagger.isFlagged(var1, flag=flagger.UNFLAGGED, comparator="=="),
-        ),
-    ]
-
-    for result, expected in tests:
-        assert np.all(result == expected)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_variableAssignments(data, flagger):
-    var1, var2, *_ = data.columns
-
-    from saqc.core.core import run
-    from saqc.core.config import Fields as F
-    from test.common import writeIO
-
-    config = f"""
-    {F.VARNAME}  ; {F.TESTS}
-    dummy1       ; procGeneric(func=var1 + var2)
-    dummy2       ; flagGeneric(func=var1 + var2 > 0)
-    """
-
-    result_data, result_flagger = run(writeIO(config), flagger, data)
-
-    assert set(result_data.columns) == set(data.columns) | {
-        "dummy1",
-    }
-    assert set(result_flagger.getFlags().columns) == set(data.columns) | {"dummy1", "dummy2"}
diff --git a/test/funcs/test_harm_funcs.py b/test/funcs/test_harm_funcs.py
index c5ddfacdc27185ce9b5586b52c5c72e0a43d0d87..d8825f9689c4c7108b53e9dc22772d203449ab04 100644
--- a/test/funcs/test_harm_funcs.py
+++ b/test/funcs/test_harm_funcs.py
@@ -1,39 +1,27 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+
+# see test/functs/conftest.py for global fixtures "course_..."
 import pytest
 
 import numpy as np
 import pandas as pd
+import dios
 
-from test.common import TESTFLAGGER, initData
+from test.common import TESTFLAGGER
 
 from saqc.funcs.harm_functions import (
-    harm_harmonize,
-    harm_deharmonize,
-    _interpolate,
-    _interpolateGrid,
-    _insertGrid,
-    _outsortCrap,
     harm_linear2Grid,
     harm_interpolate2Grid,
     harm_shift2Grid,
     harm_aggregate2Grid,
-    harm_downsample,
+    harm_deharmonize,
 )
 
+RESHAPERS = ["nshift", "fshift", "bshift", "nagg", "bagg", "fagg", "interpolation"]
 
-RESHAPERS = ["nshift", "fshift", "bshift"]
-
-COFLAGGING = [False, True]
-
-SETSHIFTCOMMENT = [False, True]
-
-INTERPOLATIONS = ["fshift", "bshift", "nshift", "nagg", "bagg"]
-
-INTERPOLATIONS2 = ["fagg", "time", "polynomial"]
-
-FREQS = ["15min", "30min"]
+INTERPOLATIONS = ["time", "polynomial"]
 
 
 @pytest.fixture
@@ -48,263 +36,178 @@ def data():
     dat = pd.Series(np.linspace(-50, 50, index.size), index=index, name="data")
     # good to have some nan
     dat[-3] = np.nan
-    data = dat.to_frame()
-    return data
-
-
-@pytest.fixture
-def multi_data():
-    index = pd.date_range(start="1.1.2011 00:00:00", end="1.1.2011 01:00:00", freq="15min")
-    index = index.insert(2, pd.Timestamp(2011, 1, 1, 0, 29, 0))
-    index = index.insert(2, pd.Timestamp(2011, 1, 1, 0, 28, 0))
-    index = index.insert(5, pd.Timestamp(2011, 1, 1, 0, 32, 0))
-    index = index.insert(5, pd.Timestamp(2011, 1, 1, 0, 31, 0))
-    index = index.insert(0, pd.Timestamp(2010, 12, 31, 23, 57, 0))
-    index = index.drop(pd.Timestamp("2011-01-01 00:30:00"))
-    dat = pd.Series(np.linspace(-50, 50, index.size), index=index, name="data")
-    # good to have some nan
-    dat[-3] = np.nan
-    data = dat.to_frame()
-    data.index = data.index.shift(1, "2min")
-    dat2 = data.copy()
-    dat2.index = dat2.index.shift(1, "17min")
-    dat2.rename(columns={"data": "data2"}, inplace=True)
-    dat3 = data.copy()
-    dat3.index = dat3.index.shift(1, "1h")
-    dat3.rename(columns={"data": "data3"}, inplace=True)
-    dat3.drop(dat3.index[2:-2], inplace=True)
-    # merge
-    data = pd.merge(data, dat2, how="outer", left_index=True, right_index=True)
-    data = pd.merge(data, dat3, how="outer", left_index=True, right_index=True)
+    data = dios.DictOfSeries(dat)
     return data
 
 
-@pytest.mark.skip(reason="makes all other tests in this module fail")
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_heapConsistency(data, flagger):
-
-    # NOTE:
-    #
-    # We currently rely on a heap usage, that breaks a situation
-    # like the one tested here:
-    # 1. harmonize a dataset `d_1` with index `i_1`
-    # 2. harmonize a dateset `d_2` with index `i_2` and
-    #    `i_1[0] != i_2[0]` and/or `i_1[-1] != i_2[-1]`
-    # 3. deharmonize `d_2`
-    #
-    # Expected behaviour:
-    # `deharmonize(harmonize(d_2)).index == i_2`
-    #
-    # Actual behaviour:
-    # `deharmonize(harmonize(d_2)).index == i_1`
-    #
-    # We cannot fix that right now, because this would break the more
-    # common usage pattern where SaQC only sees one dataset during the
-    # entire lifetime of the harmonization heap (we used to be CLI-first,
-    # after all).
-    #
-    # Merging `dios` should fix that issue, though.
-
-    freq = "15Min"
-
-    # harmonize `other_data` and prefill the HEAP
-    other_data = initData(3)
-    other_flagger = flagger.initFlags(other_data)
-    harm_harmonize(other_data, other_data.columns[0], other_flagger, freq, "time", "nshift")
-
-    # harmonize and deharmonize `data`
-    # -> we want both harmonizations (`data` and `other_data`) to not interfere
-    flagger = flagger.initFlags(data)
-    data_harm, flagger_harm = harm_harmonize(data, "data", flagger, freq, "time", "nshift")
-    data_deharm, flagger_deharm = harm_deharmonize(data_harm, "data", flagger_harm)
-    assert np.all(data.dropna() == data_deharm.dropna())
-
-
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("reshaper", RESHAPERS)
-@pytest.mark.parametrize("co_flagging", COFLAGGING)
-def test_harmSingleVarIntermediateFlagging(data, flagger, reshaper, co_flagging):
-
+def test_harmSingleVarIntermediateFlagging(data, flagger, reshaper):
     flagger = flagger.initFlags(data)
-    # flags = flagger.initFlags(data)
     # make pre harm copies:
     pre_data = data.copy()
     pre_flags = flagger.getFlags()
     freq = "15min"
-
-    # harmonize data:
-    data, flagger = harm_harmonize(data, "data", flagger, freq, "time", reshaper)
-
+    assert len(data.columns) == 1
+    field = data.columns[0]
+    data, flagger = harm_linear2Grid(data, "data", flagger, freq)
     # flag something bad
-    flagger = flagger.setFlags("data", loc=data.index[3:4])
-    data, flagger = harm_deharmonize(data, "data", flagger, co_flagging=co_flagging)
-
+    flagger = flagger.setFlags("data", loc=data[field].index[3:4])
+    data, flagger = harm_deharmonize(data, "data", flagger, method="inverse_" + reshaper)
+    d = data[field]
+    if reshaper == "nagg":
+        assert flagger.isFlagged(loc=d.index[3:7]).squeeze().all()
+        assert (~flagger.isFlagged(loc=d.index[0:3]).squeeze()).all()
+        assert (~flagger.isFlagged(loc=d.index[7:]).squeeze()).all()
     if reshaper == "nshift":
-        if co_flagging is True:
-            assert flagger.isFlagged(loc=data.index[3:7]).squeeze().all()
-            assert (~flagger.isFlagged(loc=data.index[0:3]).squeeze()).all()
-            assert (~flagger.isFlagged(loc=data.index[7:]).squeeze()).all()
-        if co_flagging is False:
-            assert (
-                flagger.isFlagged().squeeze() == [False, False, False, False, True, False, True, False, False]
-            ).all()
+        assert (flagger.isFlagged().squeeze() == [False, False, False, False, True, False, False, False, False]).all()
+    if reshaper == "bagg":
+        assert flagger.isFlagged(loc=d.index[5:7]).squeeze().all()
+        assert (~flagger.isFlagged(loc=d.index[0:5]).squeeze()).all()
+        assert (~flagger.isFlagged(loc=d.index[7:]).squeeze()).all()
     if reshaper == "bshift":
-        if co_flagging is True:
-            assert flagger.isFlagged(loc=data.index[5:7]).squeeze().all()
-            assert (~flagger.isFlagged(loc=data.index[0:5]).squeeze()).all()
-            assert (~flagger.isFlagged(loc=data.index[7:]).squeeze()).all()
-        if co_flagging is False:
-            assert (
-                flagger.isFlagged().squeeze() == [False, False, False, False, False, True, True, False, False]
-            ).all()
+        assert (flagger.isFlagged().squeeze() == [False, False, False, False, False, True, False, False, False]).all()
+    if reshaper == "fagg":
+        assert flagger.isFlagged(loc=d.index[3:5]).squeeze().all()
+        assert (~flagger.isFlagged(loc=d.index[0:3]).squeeze()).all()
+        assert (~flagger.isFlagged(loc=d.index[5:]).squeeze()).all()
     if reshaper == "fshift":
-        if co_flagging is True:
-            assert flagger.isFlagged(loc=data.index[3:5]).squeeze().all()
-            assert flagger.isFlagged(loc=data.index[6:7]).squeeze().all()
-            assert (~flagger.isFlagged(loc=data.index[0:3]).squeeze()).all()
-            assert (~flagger.isFlagged(loc=data.index[7:]).squeeze()).all()
-        if co_flagging is False:
-            assert (
-                flagger.isFlagged().squeeze() == [False, False, False, False, True, False, True, False, False]
-            ).all()
+        assert (flagger.isFlagged().squeeze() == [False, False, False, False, True, False, False, False, False]).all()
 
     flags = flagger.getFlags()
-    assert pre_data.equals(data)
-    assert len(data) == len(flags)
-    assert (pre_flags.index == flags.index).all()
+    assert pre_data[field].equals(data[field])
+    assert len(data[field]) == len(flags[field])
+    assert (pre_flags[field].index == flags[field].index).all()
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("interpolation", INTERPOLATIONS)
-@pytest.mark.parametrize("freq", FREQS)
-def test_harmSingleVarInterpolations(data, flagger, interpolation, freq):
+def test_harmSingleVarInterpolations(data, flagger):
     flagger = flagger.initFlags(data)
-    flags = flagger.getFlags()
-    # make pre harm copies:
-    pre_data = data.copy()
-    pre_flags = flags.copy()
-
-    harm_start = data.index[0].floor(freq=freq)
-    harm_end = data.index[-1].ceil(freq=freq)
-    test_index = pd.date_range(start=harm_start, end=harm_end, freq=freq)
-    data, flagger = harm_harmonize(
-        data, "data", flagger, freq, interpolation, "fshift", reshape_shift_comment=False, inter_agg="sum",
-    )
-
-    if interpolation == "fshift":
-        if freq == "15min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -37.5, -25.0, 0.0, 37.5, 50.0]}, index=test_index))
-        if freq == "30min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -37.5, 0.0, 50.0]}, index=test_index))
-    if interpolation == "bshift":
-        if freq == "15min":
-            assert data.equals(pd.DataFrame({"data": [-50.0, -37.5, -25.0, 12.5, 37.5, 50.0]}, index=test_index))
-        if freq == "30min":
-            assert data.equals(pd.DataFrame({"data": [-50.0, -37.5, 12.5, 50.0]}, index=test_index))
-    if interpolation == "nshift":
-        if freq == "15min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -37.5, -25.0, 12.5, 37.5, 50.0]}, index=test_index))
-        if freq == "30min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -37.5, 12.5, 50.0]}, index=test_index))
-    if interpolation == "nagg":
-        if freq == "15min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -87.5, -25.0, 0.0, 37.5, 50.0]}, index=test_index))
-        if freq == "30min":
-            assert data.equals(pd.DataFrame({"data": [np.nan, -87.5, -25.0, 87.5]}, index=test_index))
-    if interpolation == "bagg":
-        if freq == "15min":
-            assert data.equals(pd.DataFrame({"data": [-50.0, -37.5, -37.5, 12.5, 37.5, 50.0]}, index=test_index))
-        if freq == "30min":
-            assert data.equals(pd.DataFrame({"data": [-50.0, -75.0, 50.0, 50.0]}, index=test_index))
-
-    data, flagger = harm_deharmonize(data, "data", flagger, co_flagging=True)
-
-    # data, flagger = harm_deharmonize(data, "data", flagger, co_flagging=True)
-    flags = flagger.getFlags()
-
-    assert pre_data.equals(data)
-    assert len(data) == len(flags)
-    assert (pre_flags.index == flags.index).all()
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-@pytest.mark.parametrize("shift_comment", SETSHIFTCOMMENT)
-def test_multivariatHarmonization(multi_data, flagger, shift_comment):
-    flagger = flagger.initFlags(multi_data)
-    flags = flagger.getFlags()
-    # for comparison
-    pre_data = multi_data.copy()
-    pre_flags = flags.copy()
-    freq = "15min"
-
-    harm_start = multi_data.index[0].floor(freq=freq)
-    harm_end = multi_data.index[-1].ceil(freq=freq)
-    test_index = pd.date_range(start=harm_start, end=harm_end, freq=freq)
-    # harm:
-    multi_data, flagger = harm_harmonize(
-        multi_data, "data", flagger, freq, "time", "nshift", reshape_shift_comment=shift_comment,
-    )
-
-    multi_data, flagger = harm_harmonize(
-        multi_data,
-        "data2",
-        flagger,
-        freq,
-        "bagg",
-        "bshift",
-        inter_agg="sum",
-        reshape_agg="max",
-        reshape_shift_comment=shift_comment,
-    )
-
-    multi_data, flagger = harm_harmonize(
-        multi_data, "data3", flagger, freq, "fshift", "fshift", reshape_shift_comment=shift_comment,
-    )
-    assert multi_data.index.equals(test_index)
-    assert pd.Timedelta(pd.infer_freq(multi_data.index)) == pd.Timedelta(freq)
-
-    multi_data, flagger = harm_deharmonize(multi_data, "data3", flagger, co_flagging=False)
-    multi_data, flagger = harm_deharmonize(multi_data, "data2", flagger, co_flagging=True)
-    multi_data, flagger = harm_deharmonize(multi_data, "data", flagger, co_flagging=True)
-
-    flags = flagger.getFlags()
-    assert pre_data.equals(multi_data[pre_data.columns.to_list()])
-    assert len(multi_data) == len(flags)
-    assert (pre_flags.index == flags.index).all()
-
-
-@pytest.mark.parametrize("method", INTERPOLATIONS2)
+    field = data.columns[0]
+    pre_data = data[field]
+    pre_flags = flagger.getFlags(field)
+    tests = [
+        (
+            "nagg",
+            "15Min",
+            pd.Series(
+                data=[-87.5, -25.0, 0.0, 37.5, 50.0],
+                index=pd.date_range("2011-01-01 00:00:00", "2011-01-01 01:00:00", freq="15min"),
+            ),
+        ),
+        (
+            "nagg",
+            "30Min",
+            pd.Series(
+                data=[-87.5, -25.0, 87.5],
+                index=pd.date_range("2011-01-01 00:00:00", "2011-01-01 01:00:00", freq="30min"),
+            ),
+        ),
+        (
+            "bagg",
+            "15Min",
+            pd.Series(
+                data=[-50.0, -37.5, -37.5, 12.5, 37.5, 50.0],
+                index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15min"),
+            ),
+        ),
+        (
+            "bagg",
+            "30Min",
+            pd.Series(
+                data=[-50.0, -75.0, 50.0, 50.0],
+                index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30min"),
+            ),
+        ),
+    ]
+
+    for interpolation, freq, expected in tests:
+        data_harm, flagger_harm = harm_aggregate2Grid(
+            data, field, flagger, freq, value_func=np.sum, method=interpolation
+        )
+        assert data_harm[field].equals(expected)
+        data_deharm, flagger_deharm = harm_deharmonize(
+            data_harm, "data", flagger_harm, method="inverse_" + interpolation
+        )
+        assert data_deharm[field].equals(pre_data)
+        assert flagger_deharm.getFlags([field]).squeeze().equals(pre_flags)
+
+    tests = [
+        (
+            "fshift",
+            "15Min",
+            pd.Series(
+                data=[np.nan, -37.5, -25.0, 0.0, 37.5, 50.0],
+                index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"),
+            ),
+        ),
+        (
+            "fshift",
+            "30Min",
+            pd.Series(
+                data=[np.nan, -37.5, 0.0, 50.0],
+                index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"),
+            ),
+        ),
+        (
+            "bshift",
+            "15Min",
+            pd.Series(
+                data=[-50.0, -37.5, -25.0, 12.5, 37.5, 50.0],
+                index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"),
+            ),
+        ),
+        (
+            "bshift",
+            "30Min",
+            pd.Series(
+                data=[-50.0, -37.5, 12.5, 50.0],
+                index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"),
+            ),
+        ),
+        (
+            "nshift",
+            "15min",
+            pd.Series(
+                data=[np.nan, -37.5, -25.0, 12.5, 37.5, 50.0],
+                index=pd.date_range("2010-12-31 23:45:00", "2011-01-01 01:00:00", freq="15Min"),
+            ),
+        ),
+        (
+            "nshift",
+            "30min",
+            pd.Series(
+                data=[np.nan, -37.5, 12.5, 50.0],
+                index=pd.date_range("2010-12-31 23:30:00", "2011-01-01 01:00:00", freq="30Min"),
+            ),
+        ),
+    ]
+
+    for interpolation, freq, expected in tests:
+        data_harm, flagger_harm = harm_shift2Grid(data, field, flagger, freq, method=interpolation)
+        assert data_harm[field].equals(expected)
+        data_deharm, flagger_deharm = harm_deharmonize(
+            data_harm, "data", flagger_harm, method="inverse_" + interpolation
+        )
+        assert data_deharm[field].equals(pre_data)
+        assert flagger_deharm.getFlags([field]).squeeze().equals(pre_flags)
+
+
+@pytest.mark.parametrize("method", INTERPOLATIONS)
 def test_gridInterpolation(data, method):
     freq = "15min"
-    data = (data * np.sin(data)).append(data.shift(1, "2h")).shift(1, "3s")
     data = data.squeeze()
-    # we are just testing if the interpolation gets passed to the series without causing an error:
-    _interpolateGrid(data, freq, method, order=1, agg_method="sum", downcast_interpolation=True)
-    if method == "polynomial":
-        _interpolateGrid(data, freq, method, order=2, agg_method="sum", downcast_interpolation=True)
-        _interpolateGrid(data, freq, method, order=10, agg_method="sum", downcast_interpolation=True)
-        data = _insertGrid(data, freq)
-        _interpolate(data, method, inter_limit=3)
-
-
-@pytest.mark.parametrize("flagger", TESTFLAGGER)
-def test_outsortCrap(data, flagger):
-
-    field = data.columns[0]
-    flagger = flagger.initFlags(data)
-    flagger = flagger.setFlags(field, iloc=slice(5, 7))
-
-    drop_index = data.index[5:7]
-    d, _ = _outsortCrap(data, field, flagger, drop_flags=flagger.BAD)
-    assert drop_index.difference(d.index).equals(drop_index)
+    field = data.name
+    data = (data * np.sin(data)).append(data.shift(1, "2h")).shift(1, "3s")
+    data = dios.DictOfSeries(data)
+    flagger = TESTFLAGGER[0].initFlags(data)
 
-    flagger = flagger.setFlags(field, iloc=slice(0, 1), flag=flagger.GOOD)
-    drop_index = drop_index.insert(-1, data.index[0])
-    d, _ = _outsortCrap(data, field, flagger, drop_flags=[flagger.BAD, flagger.GOOD],)
-    assert drop_index.sort_values().difference(d.index).equals(drop_index.sort_values())
+    # we are just testing if the interpolation gets passed to the series without causing an error:
 
-    f_drop, _ = _outsortCrap(data, field, flagger, drop_flags=[flagger.BAD, flagger.GOOD], return_drops=True,)
-    assert f_drop.index.sort_values().equals(drop_index.sort_values())
+    harm_interpolate2Grid(data, field, flagger, freq, method=method, downcast_interpolation=True)
+    if method == "polynomial":
+        harm_interpolate2Grid(data, field, flagger, freq, order=2, method=method, downcast_interpolation=True)
+        harm_interpolate2Grid(data, field, flagger, freq, order=10, method=method, downcast_interpolation=True)
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
@@ -313,8 +216,8 @@ def test_wrapper(data, flagger):
     field = data.columns[0]
     freq = "15min"
     flagger = flagger.initFlags(data)
-    harm_downsample(data, field, flagger, "15min", "30min", agg_func="sum", sample_func="mean")
-    harm_linear2Grid(data, field, flagger, freq, method="nagg", func="max", drop_flags=None)
-    harm_aggregate2Grid(data, field, flagger, freq, value_func="sum", flag_func="max", method="nagg", drop_flags=None)
-    harm_shift2Grid(data, field, flagger, freq, method="nshift", drop_flags=None)
+
+    harm_linear2Grid(data, field, flagger, freq, to_drop=None)
+    harm_aggregate2Grid(data, field, flagger, freq, value_func=np.nansum, method="nagg", to_drop=None)
+    harm_shift2Grid(data, field, flagger, freq, method="nshift", to_drop=None)
     harm_interpolate2Grid(data, field, flagger, freq, method="spline")
diff --git a/test/funcs/test_modelling.py b/test/funcs/test_modelling.py
new file mode 100644
index 0000000000000000000000000000000000000000..f221944f1c6c2fcfd1c23acba4dd13f552b9063f
--- /dev/null
+++ b/test/funcs/test_modelling.py
@@ -0,0 +1,74 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+# see test/functs/conftest.py for global fixtures "course_..."
+
+import pytest
+
+import numpy as np
+import pandas as pd
+import dios
+
+from test.common import TESTFLAGGER
+
+from saqc.funcs.modelling import modelling_polyFit, modelling_rollingMean, modelling_mask
+
+TF = TESTFLAGGER[:1]
+
+
+@pytest.mark.parametrize("flagger", TF)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
+def test_modelling_polyFit_forRegular(dat, flagger):
+    data, _ = dat(freq="10min", periods=30, initial_level=0, final_level=100, out_val=-100)
+    # add some nice sine distortion
+    data = data + 10 * np.sin(np.arange(0, len(data.indexes[0])))
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    result1, _ = modelling_polyFit(data, "data", flagger, 11, 2, numba=False)
+    result2, _ = modelling_polyFit(data, "data", flagger, 11, 2, numba=True)
+    assert (result1["data"] - result2["data"]).abs().max() < 10 ** -10
+    result3, _ = modelling_polyFit(data, "data", flagger, "110min", 2, numba=False)
+    assert result3["data"].equals(result1["data"])
+    result4, _ = modelling_polyFit(data, "data", flagger, 11, 2, numba=True, min_periods=11)
+    assert (result4["data"] - result2["data"]).abs().max() < 10 ** -10
+    data.iloc[13:16] = np.nan
+    result5, _ = modelling_polyFit(data, "data", flagger, 11, 2, numba=True, min_periods=9)
+    assert result5["data"].iloc[10:19].isna().all()
+
+
+@pytest.mark.parametrize("flagger", TF)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_2")])
+def test_modelling_rollingMean_forRegular(dat, flagger):
+    data, _ = dat(freq="10min", periods=30, initial_level=0, final_level=100, out_val=-100)
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    modelling_rollingMean(data, "data", flagger, 5, eval_flags=True, min_periods=0, center=True)
+    modelling_rollingMean(data, "data", flagger, 5, eval_flags=True, min_periods=0, center=False)
+
+@pytest.mark.parametrize("flagger", TF)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_1")])
+def test_modelling_mask(dat, flagger):
+    data, _ = dat()
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    data_seasonal, flagger_seasonal = modelling_mask(data, "data", flagger, mode='seasonal', season_start="20:00",
+                                                     season_end="40:00", include_bounds=False)
+    flaggs = flagger_seasonal._flags["data"]
+    assert flaggs[np.logical_and(20 <= flaggs.index.minute, 40 >= flaggs.index.minute)].isna().all()
+    data_seasonal, flagger_seasonal = modelling_mask(data, "data", flagger, mode='seasonal', season_start="15:00:00",
+                                                     season_end="02:00:00")
+    flaggs = flagger_seasonal._flags["data"]
+    assert flaggs[np.logical_and(15 <= flaggs.index.hour, 2 >= flaggs.index.hour)].isna().all()
+    data_seasonal, flagger_seasonal = modelling_mask(data, "data", flagger, mode='seasonal', season_start="03T00:00:00",
+                                                     season_end="10T00:00:00")
+    flaggs = flagger_seasonal._flags["data"]
+    assert flaggs[np.logical_and(3 <= flaggs.index.hour, 10 >= flaggs.index.hour)].isna().all()
+
+    mask_ser = pd.Series(False, index=data["data"].index)
+    mask_ser[::5] = True
+    data["mask_ser"] = mask_ser
+    flagger = flagger.initFlags(data)
+    data_masked, flagger_masked = modelling_mask(data, "data", flagger, mode='mask_var', mask_var="mask_ser")
+    flaggs = flagger_masked._flags["data"]
+    assert flaggs[data_masked['mask_ser']].isna().all()
\ No newline at end of file
diff --git a/test/funcs/test_pattern_rec.py b/test/funcs/test_pattern_rec.py
new file mode 100644
index 0000000000000000000000000000000000000000..66ebcbfd1fdf13f5cb30cb5bd34a5a457a31dc3d
--- /dev/null
+++ b/test/funcs/test_pattern_rec.py
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import pytest
+from dios import dios
+
+from saqc.funcs.pattern_rec import *
+from test.common import initData, TESTFLAGGER
+
+
+@pytest.fixture
+def data():
+    return initData(cols=1, start_date="2016-01-01", end_date="2018-12-31", freq="1D")
+
+
+@pytest.fixture
+def field(data):
+    return data.columns[0]
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_flagPattern_wavelet(flagger):
+
+    data = pd.Series(0, index=pd.date_range(start="2000", end='2001', freq='1d'))
+    data.iloc[2:4] = 7
+    pattern = data.iloc[1:6]
+
+    data = dios.DictOfSeries(dict(data=data, pattern_data=pattern))
+
+    flagger = flagger.initFlags(data)
+    data, flagger = flagPattern_wavelet(data, "data", flagger, ref_field="pattern_data")
+    assert (flagger.isFlagged("data")[1:6]).all()
+    assert (flagger.isFlagged("data")[:1]).any()
+    assert (flagger.isFlagged("data")[7:]).any()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_flagPattern_dtw(flagger):
+
+    data = pd.Series(0, index=pd.date_range(start="2000", end='2001', freq='1d'))
+    data.iloc[2:4] = 7
+    pattern = data.iloc[1:6]
+
+    data = dios.DictOfSeries(dict(data=data, pattern_data=pattern))
+
+    flagger = flagger.initFlags(data)
+    data, flagger = flagPattern_dtw(data, "data", flagger, ref_field="pattern_data")
+    assert (flagger.isFlagged("data")[1:6]).all()
+    assert (flagger.isFlagged("data")[:1]).any()
+    assert (flagger.isFlagged("data")[7:]).any()
diff --git a/test/funcs/test_proc_functions.py b/test/funcs/test_proc_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..457c56f06b0da92dbe372a71ed9b570aa351dbd1
--- /dev/null
+++ b/test/funcs/test_proc_functions.py
@@ -0,0 +1,109 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+# see test/functs/conftest.py for global fixtures "course_..."
+
+import pytest
+import numpy as np
+import pandas as pd
+import dios
+
+from saqc.funcs.proc_functions import (
+    proc_interpolateMissing,
+    proc_resample,
+    proc_transform,
+    proc_rollingInterpolateMissing,
+    proc_interpolateGrid,
+    proc_offsetCorrecture
+)
+from saqc.lib.ts_operators import linearInterpolation, polynomialInterpolation
+
+from test.common import TESTFLAGGER
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_rollingInterpolateMissing(course_5, flagger):
+    data, characteristics = course_5(periods=10, nan_slice=[5, 6])
+    field = data.columns[0]
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    dataInt, *_ = proc_rollingInterpolateMissing(
+        data, field, flagger, 3, func=np.median, center=True, min_periods=0, interpol_flag="UNFLAGGED"
+    )
+    # import pdb
+    # pdb.set_trace()
+    assert dataInt[field][characteristics["missing"]].notna().all()
+    dataInt, *_ = proc_rollingInterpolateMissing(
+        data, field, flagger, 3, func=np.nanmean, center=False, min_periods=3, interpol_flag="UNFLAGGED"
+    )
+    assert dataInt[field][characteristics["missing"]].isna().all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_interpolateMissing(course_5, flagger):
+    data, characteristics = course_5(periods=10, nan_slice=[5])
+    field = data.columns[0]
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    dataLin, *_ = proc_interpolateMissing(data, field, flagger, method="linear")
+    dataPoly, *_ = proc_interpolateMissing(data, field, flagger, method="polynomial")
+    assert dataLin[field][characteristics["missing"]].notna().all()
+    assert dataPoly[field][characteristics["missing"]].notna().all()
+    data, characteristics = course_5(periods=10, nan_slice=[5, 6, 7])
+    dataLin1, *_ = proc_interpolateMissing(data, field, flagger, method="linear", inter_limit=2)
+    dataLin2, *_ = proc_interpolateMissing(data, field, flagger, method="linear", inter_limit=3)
+    dataLin3, *_ = proc_interpolateMissing(data, field, flagger, method="linear", inter_limit=4)
+    assert dataLin1[field][characteristics["missing"]].isna().all()
+    assert dataLin2[field][characteristics["missing"]].isna().all()
+    assert dataLin3[field][characteristics["missing"]].notna().all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_transform(course_5, flagger):
+    data, characteristics = course_5(periods=10, nan_slice=[5, 6])
+    field = data.columns[0]
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    data1, *_ = proc_transform(data, field, flagger, func=linearInterpolation)
+    assert data1[field][characteristics["missing"]].isna().all()
+    data1, *_ = proc_transform(data, field, flagger, func=lambda x: linearInterpolation(x, inter_limit=3))
+    assert data1[field][characteristics["missing"]].notna().all()
+    data1, *_ = proc_transform(
+        data, field, flagger, func=lambda x: polynomialInterpolation(x, inter_limit=3, inter_order=3)
+    )
+    assert data1[field][characteristics["missing"]].notna().all()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_resample(course_5, flagger):
+    data, characteristics = course_5(freq="1min", periods=30, nan_slice=[1, 11, 12, 22, 24, 26])
+    field = data.columns[0]
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    data1, *_ = proc_resample(data, field, flagger, "10min", np.mean, max_invalid_total_d=2, max_invalid_consec_d=1)
+    assert ~np.isnan(data1[field].iloc[0])
+    assert np.isnan(data1[field].iloc[1])
+    assert np.isnan(data1[field].iloc[2])
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_interpolateGrid(course_5, course_3, flagger):
+    data, _ = course_5()
+    data_grid, characteristics = course_3()
+    data['grid'] = data_grid.to_df()
+    # data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    dataInt, *_ = proc_interpolateGrid(data, 'data', flagger, '1h', 'time', grid_field='grid', inter_limit=10)
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+def test_offsetCorrecture(flagger):
+    data = pd.Series(0, index=pd.date_range('2000', freq='1d', periods=100), name='dat')
+    data.iloc[30:40] = -100
+    data.iloc[70:80] = 100
+    data = dios.DictOfSeries(data)
+    flagger = flagger.initFlags(data)
+    data, flagger = proc_offsetCorrecture(data, 'dat', flagger, 40, 20, '3d', 1)
+    assert (data == 0).all()[0]
+
diff --git a/test/funcs/test_soil_moisture_tests.py b/test/funcs/test_soil_moisture_tests.py
index a5047d0e7b7a4c01cfab3ce5c29d617091ec1236..d4eb78f788c94a2999a6093c20528954e7e20394 100644
--- a/test/funcs/test_soil_moisture_tests.py
+++ b/test/funcs/test_soil_moisture_tests.py
@@ -4,6 +4,7 @@
 import pytest
 import numpy as np
 import pandas as pd
+import dios
 
 from saqc.funcs.soil_moisture_tests import sm_flagFrost, sm_flagPrecipitation, sm_flagConstants, sm_flagRandomForest
 
@@ -13,10 +14,11 @@ from test.common import TESTFLAGGER, initData
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_sm_flagFrost(flagger):
     index = pd.date_range(start="2011-01-01 00:00:00", end="2011-01-01 03:00:00", freq="5min")
-    data = pd.DataFrame(
-        {"soil_moisture": np.linspace(0, 1, index.size), "soil_temperature": np.linspace(1, -1, index.size),},
-        index=index,
-    )
+
+    sm = pd.Series(data=np.linspace(0, +1, index.size), index=index)
+    st = pd.Series(data=np.linspace(1, -1, index.size), index=index)
+    data = dios.DictOfSeries([sm, st], columns=["soil_moisture", "soil_temperature"])
+
     flagger = flagger.initFlags(data)
     data, flagger_result = sm_flagFrost(data, "soil_moisture", flagger, "soil_temperature")
     flag_assertion = np.arange(19, 37)
@@ -27,13 +29,17 @@ def test_sm_flagFrost(flagger):
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_flagSoilMoisturePrecipitationEvents(flagger):
     index = pd.date_range(start="2011-01-01 00:00:00", end="2011-01-04 00:00:00", freq="15min")
-    data = pd.DataFrame(
-        {"soil_moisture": np.linspace(0, 1, index.size), "precipitation": np.linspace(1, 1, index.size),}, index=index,
-    )
-    data["precipitation"]["2011-01-03"] = 0
-    data["precipitation"]["2011-01-04"] = 0
+
+    sm = pd.Series(data=np.linspace(0, 1, index.size), index=index)
+    pr = pd.Series(data=np.linspace(1, 1, index.size), index=index)
+    data = dios.DictOfSeries([sm, pr], columns=["soil_moisture", "precipitation"])
+
+    data.loc["2011-01-03", "precipitation"] = 0
+    data.loc["2011-01-04", "precipitation"] = 0
+
     flagger = flagger.initFlags(data)
     data, flag_result = sm_flagPrecipitation(data, "soil_moisture", flagger, "precipitation")
+
     flag_assertion = [288, 287]
     flag_result = flag_result.getFlags("soil_moisture")
     test_sum = (flag_result[flag_assertion] == flagger.BAD).sum()
@@ -42,10 +48,9 @@ def test_flagSoilMoisturePrecipitationEvents(flagger):
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 def test_sm_flagConstantss(flagger):
-
     data = initData(1, start_date="2011-01-01 00:00:00", end_date="2011-01-02 00:00:00", freq="5min")
     data.iloc[5:25] = 0
-    data.iloc[100:120] = data.max()[0]
+    data.iloc[100:120] = data.apply(max)[0]
     field = data.columns[0]
     flagger = flagger.initFlags(data)
     data, flagger = sm_flagConstants(data, field, flagger, window="1h", precipitation_window="1h")
@@ -70,6 +75,7 @@ def test_sm_flagRandomForest(flagger):
     field = "SM2"
 
     # prepare flagsframe
+    data = dios.to_dios(data)
     flagger = flagger.initFlags(data)
     flagger = flagger.setFlags(field, loc=mask_bad[field])
     flagger = flagger.setFlags(field, loc=mask_unflagged[field], flag=flagger.UNFLAGGED)
diff --git a/test/funcs/test_spikes_detection.py b/test/funcs/test_spikes_detection.py
index b7876483a26cda6f96659da9db9c4b2e536fa071..cfdeb79b0a6a5f612f3b2c5a88cdd1e8fdaa61c6 100644
--- a/test/funcs/test_spikes_detection.py
+++ b/test/funcs/test_spikes_detection.py
@@ -1,9 +1,11 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 
+# see test/functs/conftest.py for global fixtures "course_..."
 import pytest
 import numpy as np
 import pandas as pd
+import dios
 
 from saqc.funcs.spikes_detection import (
     spikes_flagSpektrumBased,
@@ -11,7 +13,8 @@ from saqc.funcs.spikes_detection import (
     spikes_flagSlidingZscore,
     spikes_flagBasic,
     spikes_flagRaise,
-    spikes_flagOddWater,
+    spikes_flagMultivarScores,
+    spikes_flagGrubbs,
 )
 
 from test.common import TESTFLAGGER
@@ -20,11 +23,11 @@ from test.common import TESTFLAGGER
 @pytest.fixture(scope="module")
 def spiky_data():
     index = pd.date_range(start="2011-01-01", end="2011-01-05", freq="5min")
-    spiky_series = pd.DataFrame(dict(spiky_data=np.linspace(1, 2, index.size)), index=index)
-    spiky_series.iloc[100] = 100
-    spiky_series.iloc[1000] = -100
+    s = pd.Series(np.linspace(1, 2, index.size), index=index, name="spiky_data")
+    s.iloc[100] = 100
+    s.iloc[1000] = -100
     flag_assertion = [100, 1000]
-    return spiky_series, flag_assertion
+    return dios.DictOfSeries(s), flag_assertion
 
 
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
@@ -52,7 +55,6 @@ def test_flagMad(spiky_data, flagger):
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("method", ["modZ", "zscore"])
 def test_slidingOutlier(spiky_data, flagger, method):
-
     # test for numeric input
     data = spiky_data[0]
     field, *_ = data.columns
@@ -74,12 +76,13 @@ def test_flagSpikesBasic(spiky_data, flagger):
     data = spiky_data[0]
     field, *_ = data.columns
     flagger = flagger.initFlags(data)
-    data, flagger_result = spikes_flagBasic(data, field, flagger, thresh=60, tolerance=10, window_size="20min")
+    data, flagger_result = spikes_flagBasic(data, field, flagger, thresh=60, tolerance=10, window="20min")
     flag_result = flagger_result.getFlags(field)
     test_sum = (flag_result[spiky_data[1]] == flagger.BAD).sum()
     assert test_sum == len(spiky_data[1])
 
 
+# see test/functs/conftest.py for the 'course_N'
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize(
     "dat",
@@ -102,16 +105,36 @@ def test_flagSpikesLimitRaise(dat, flagger):
     assert not flagger_result.isFlagged(field)[characteristics["drop"]].any()
 
 
+# see test/functs/conftest.py for the 'course_N'
 @pytest.mark.parametrize("flagger", TESTFLAGGER)
 @pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_3")])
-def test_flagSpikesOddWater(dat, flagger):
+def test_flagMultivarScores(dat, flagger):
     data1, characteristics = dat(periods=1000, initial_level=5, final_level=15, out_val=50)
     data2, characteristics = dat(periods=1000, initial_level=20, final_level=1, out_val=30)
     field = "dummy"
     fields = ["data1", "data2"]
-    data = pd.DataFrame({"data1": data1.squeeze(), "data2": data2.squeeze()}, index=data1.index)
+    s1, s2 = data1.squeeze(), data2.squeeze()
+    s1 = pd.Series(data=s1.values, index=s1.index)
+    s2 = pd.Series(data=s2.values, index=s1.index)
+    data = dios.DictOfSeries([s1, s2], columns=["data1", "data2"])
     flagger = flagger.initFlags(data)
-    _, flagger_result = spikes_flagOddWater(
-        data, field, flagger, fields=fields, bin_frac=50, trafo="np.log", iter_start=0.95, n_neighbors=10
+    _, flagger_result = spikes_flagMultivarScores(
+        data, field, flagger, fields=fields, binning=50, trafo=np.log, iter_start=0.95, n_neighbors=10
+    )
+    for field in fields:
+        isflagged = flagger_result.isFlagged(field)
+        assert isflagged[characteristics["raise"]].all()
+        assert not isflagged[characteristics["return"]].any()
+        assert not isflagged[characteristics["drop"]].any()
+
+
+@pytest.mark.parametrize("flagger", TESTFLAGGER)
+@pytest.mark.parametrize("dat", [pytest.lazy_fixture("course_3")])
+def test_grubbs(dat, flagger):
+    data, char_dict = dat(
+        freq="10min", periods=45, initial_level=0, final_level=0, crowd_size=1, crowd_spacing=3, out_val=-10
     )
-    assert flagger_result.isFlagged(fields[0])[characteristics["raise"]].all()
+    flagger = flagger.initFlags(data)
+    data, result_flagger = spikes_flagGrubbs(data, "data", flagger, winsz=20, min_periods=15)
+    assert result_flagger.isFlagged("data")[char_dict["drop"]].all()
+
diff --git a/test/lib/test_rolling.py b/test/lib/test_rolling.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7245b3b0c7564c8c8ba50a8f643597cd84487d0
--- /dev/null
+++ b/test/lib/test_rolling.py
@@ -0,0 +1,220 @@
+import pytest
+
+from saqc.lib.rolling import customRoller, Rolling
+import pandas as pd
+import numpy as np
+
+FUNCTS = ['count', 'sum', 'mean', 'median', 'var', 'std', 'min', 'max', 'corr', 'cov', 'skew', 'kurt', ]
+
+OTHA = ['apply',
+        'aggregate',  # needs param func eg. func='min'
+        'quantile',  # needs param quantile=0.5 (0<=q<=1)
+        ]
+
+
+@pytest.fixture
+def data():
+    return data_()
+
+
+def data_():
+    s1 = pd.Series(1., index=pd.date_range("1999/12", periods=12, freq='1M') + pd.Timedelta('1d'))
+    s2 = pd.Series(1., index=pd.date_range('2000/05/15', periods=8, freq='1d'))
+    s = pd.concat([s1, s2]).sort_index()
+    s.name = 's'
+    s[15] = np.nan
+    return s
+
+
+len_s = len(data_())
+
+
+def make_num_kws():
+    l = []
+    n = list(range(len_s))
+    for window in n:
+        mp = list(range(window))
+        for min_periods in [None] + mp:
+            if min_periods is not None and min_periods > window:
+                continue
+            for center in [False, True]:
+                l.append(dict(window=window, min_periods=min_periods, center=center))
+    return l
+
+
+def make_dt_kws():
+    l = []
+    n = [0, 1, 2, 10, 32, 70, 120]
+    mp = list(range(len_s))
+    for closed in ['right', 'both', 'neither', 'left']:
+        for window in n:
+            for min_periods in [None] + mp:
+                l.append(dict(window=f'{window}d', min_periods=min_periods, closed=closed))
+    return l
+
+
+def check_series(result, expected):
+    if not (result.isna() == expected.isna()).all():
+        return False
+    result = result.dropna()
+    expected = expected.dropna()
+    if not (result == expected).all():
+        return False
+    return True
+
+
+def print_diff(s, result, expected):
+    df = pd.DataFrame()
+    df['s'] = s
+    df['exp'] = expected
+    df['res'] = result
+    print(df)
+
+
+def call_rolling_function(roller, func):
+    if isinstance(func, str):
+        return getattr(roller, func)()
+    else:
+        return getattr(roller, 'apply')(func)
+
+
+@pytest.mark.parametrize("kws", make_dt_kws(), ids=lambda x: str(x))
+@pytest.mark.parametrize("func", FUNCTS)
+def test_pandas_conform_dt(data, kws, func):
+    s = data
+    try:
+        expR = s.rolling(**kws)
+        expected = call_rolling_function(expR, func)
+    except Exception as e0:
+        # pandas failed, so we should also fail
+        try:
+            resR = customRoller(s, **kws)
+            result = call_rolling_function(resR, func)
+        except Exception as e1:
+            assert type(e0) == type(e1)
+            return
+        assert False, 'pandas faild, but we succeed'
+
+    resR = customRoller(s, **kws)
+    result = call_rolling_function(resR, func)
+    success = check_series(result, expected)
+    if success:
+        return
+    print_diff(s, result, expected)
+    assert False
+
+
+@pytest.mark.parametrize("kws", make_num_kws(), ids=lambda x: str(x))
+@pytest.mark.parametrize("func", FUNCTS)
+def test_pandas_conform_num(data, kws, func):
+    s = data
+    try:
+        expR = s.rolling(**kws)
+        expected = call_rolling_function(expR, func)
+    except Exception as e0:
+        # pandas failed, so we should also fail
+        try:
+            resR = customRoller(s, **kws)
+            result = call_rolling_function(resR, func)
+        except Exception as e1:
+            assert type(e0) == type(e1)
+            return
+        assert False, 'pandas faild, but we succeed'
+
+    resR = customRoller(s, **kws)
+    result = call_rolling_function(resR, func)
+    success = check_series(result, expected)
+    if success:
+        return
+    print_diff(s, result, expected)
+    assert False
+
+
+@pytest.mark.parametrize("kws", make_dt_kws(), ids=lambda x: str(x))
+@pytest.mark.parametrize("func", FUNCTS)
+def test_forward_dt(data, kws, func):
+    s = data
+    try:
+        expR = pd.Series(reversed(s), reversed(s.index)).rolling(**kws)
+        expected = call_rolling_function(expR, func)[::-1]
+    except Exception as e0:
+        # pandas failed, so we should also fail
+        try:
+            resR = customRoller(s, forward=True, **kws)
+            result = call_rolling_function(resR, func)
+        except Exception as e1:
+            assert type(e0) == type(e1)
+            return
+        assert False, 'pandas faild, but we succeed'
+
+    resR = customRoller(s, forward=True, **kws)
+    result = call_rolling_function(resR, func)
+    success = check_series(result, expected)
+    if success:
+        return
+    print_diff(s, result, expected)
+    assert False
+
+
+@pytest.mark.parametrize("kws", make_num_kws(), ids=lambda x: str(x))
+@pytest.mark.parametrize("func", FUNCTS)
+def test_forward_num(data, kws, func):
+    s = data
+    try:
+        expR = pd.Series(reversed(s), reversed(s.index)).rolling(**kws)
+        expected = call_rolling_function(expR, func)[::-1]
+    except Exception as e0:
+        # pandas failed, so we should also fail
+        try:
+            resR = customRoller(s, forward=True, **kws)
+            result = call_rolling_function(resR, func)
+        except Exception as e1:
+            assert type(e0) == type(e1)
+            return
+        assert False, 'pandas faild, but we succeed'
+
+    resR = customRoller(s, forward=True, **kws)
+    result = call_rolling_function(resR, func)
+    success = check_series(result, expected)
+    if success:
+        return
+    print_diff(s, result, expected)
+    assert False
+
+
+def dt_center_kws():
+    l = []
+    for window in range(2, 10, 2):
+        for min_periods in range(1, window + 1):
+            l.append(dict(window=window, min_periods=min_periods))
+    return l
+
+
+@pytest.mark.parametrize("kws", dt_center_kws(), ids=lambda x: str(x))
+def test_centering_w_dtindex(kws):
+    print(kws)
+    s = pd.Series(0., index=pd.date_range("2000", periods=10, freq='1H'))
+    s[4:7] = 1
+
+    w = kws.pop('window')
+    mp = kws.pop('min_periods')
+
+    pd_kw = dict(window=w, center=True, min_periods=mp)
+    our_kw = dict(window=f'{w}h', center=True, closed='both', min_periods=mp)
+    expected = s.rolling(**pd_kw).sum()
+    result = customRoller(s, **our_kw).sum()
+    success = check_series(result, expected)
+    if not success:
+        print_diff(s, result, expected)
+        assert False
+
+    w -= 1
+    mp -= 1
+    pd_kw = dict(window=w, center=True, min_periods=mp)
+    our_kw = dict(window=f'{w}h', center=True, closed='neither', min_periods=mp)
+    expected = s.rolling(**pd_kw).sum()
+    result = customRoller(s, **our_kw).sum()
+    success = check_series(result, expected)
+    if not success:
+        print_diff(s, result, expected)
+        assert False