Compare revisions

12377f47 · a0568836 · 0c0155bf · 5dffc8b6 · 0cb73e01 · 88b1d253
--- a/.gitattributes
+++ b/.gitattributes
@@ -5,3 +5,4 @@
 *.feather filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 resources/machine_learning/data/soil_moisture_mwe.feather filter=lfs diff=lfs merge=lfs -text
+saqc/_version.py export-subst
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -29,7 +29,7 @@ jobs:
      fail-fast: false
      matrix:
        os: ["windows-latest", "ubuntu-latest", "macos-latest"]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
    defaults:
      run:
        # somehow this also works for windows O.o ??
@@ -58,7 +58,6 @@ jobs:

      - name: run SaQC test suite
        run: |
-          pytest tests dios/test -Werror
          python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv

      # - name: run doc tests

--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,6 +2,20 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later

+
+# ===========================================================
+# Hints
+# ===========================================================
+
+# $PYPI_PKG_NAME
+#   The variable PYPI_PKG_NAME is used in setup.py to determine
+#   how to name the tarball package. If not set the package is
+#   named 'saqc'.
+
+# $TESTPYPI_TOKEN
+#   The upload token used for testpypi, set it on the gitlab
+#   page and enable masking to prevent revealing
+
 # ===========================================================
 # preparation
 # ===========================================================
@@ -16,11 +30,13 @@ stages:
  - deploy

 default:
-  image: python:3.10
+  image: python:3.11
  before_script:
    - pip install --upgrade pip
    - pip install -r requirements.txt
    - pip install -r tests/requirements.txt
+    - apt update
+    - apt install -y xvfb

 # ===========================================================
 # Compliance stage
@@ -61,8 +77,10 @@ coverage:
  stage: test
  allow_failure: true
  script:
+    - export DISPLAY=:99
+    - Xvfb :99 &
    - pip install pytest-cov coverage
-    - pytest --cov=saqc tests --ignore=tests/fuzzy -Werror
+    - pytest --cov=saqc tests --ignore=tests/fuzzy tests/extras -Werror
  after_script:
    - coverage xml
  # regex to find the coverage percentage in the job output
@@ -75,11 +93,13 @@ coverage:
        path: coverage.xml


-python38:
+python39:
  stage: test
-  image: python:3.8
+  image: python:3.9
  script:
-    - pytest tests -Werror --junitxml=report.xml
+    - export DISPLAY=:99
+    - Xvfb :99 &
+    - pytest tests -Werror --junitxml=report.xml --ignore=tests/extras
    - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
  artifacts:
    when: always
@@ -87,42 +107,49 @@ python38:
      junit: report.xml


-python39:
+python310:
  stage: test
-  image: python:3.9
+  image: python:3.10
  script:
-    - pytest tests -Werror --junitxml=report.xml
+    - export DISPLAY=:99
+    - Xvfb :99 &
+    - pytest tests -Werror --junitxml=report.xml --ignore=tests/extras
    - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
  artifacts:
    when: always
    reports:
      junit: report.xml

-
-python310:
+python311:
  stage: test
-  image: python:3.10
+  image: python:3.11
  script:
-    - pytest tests -Werror --junitxml=report.xml
+    - export DISPLAY=:99
+    - Xvfb :99 &
+    - pytest tests -Werror --junitxml=report.xml --ignore=tests/extras
    - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
  artifacts:
    when: always
    reports:
      junit: report.xml

-# python311:
-#   stage: test
-#   image: python:3.11
-#   script:
-#     - pytest tests -Werror --junitxml=report.xml
-#     - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
-#   artifacts:
-#     when: always
-#     reports:
-#       junit: report.xml
+python312:
+  stage: test
+  image: python:3.12
+  script:
+    - export DISPLAY=:99
+    - Xvfb :99 &
+    - pytest tests -Werror --junitxml=report.xml --ignore=tests/extras
+    - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
+  artifacts:
+    when: always
+    reports:
+      junit: report.xml

 doctest:
  stage: test
+  variables:
+    COLUMNS: 200
  script:
    - cd docs
    - pip install -r requirements.txt
@@ -134,24 +161,72 @@ doctest:
 # Building stage
 # ===========================================================
 # check if we are able to build a wheel
-wheel:
+# and if the import works
+wheel39:
  stage: build
+  image: python:3.9
+  variables:
+    PYPI_PKG_NAME: "saqc-dev"
  script:
    - pip install wheel
    - pip wheel .
    - pip install .
+    - python -c 'import saqc; print(f"{saqc.__version__=}")'

-docs:
+wheel310:
  stage: build
+  image: python:3.10
+  variables:
+    PYPI_PKG_NAME: "saqc-dev"
  script:
-    - cd docs
-    - pip install -r requirements.txt
-    - make doc
+    - pip install wheel
+    - pip wheel .
+    - pip install .
+    - python -c 'import saqc; print(f"{saqc.__version__=}")'
+
+wheel311:
+  stage: build
+  image: python:3.11
+  variables:
+    PYPI_PKG_NAME: "saqc-dev"
+  script:
+    - pip install wheel
+    - pip wheel .
+    - pip install .
+    - python -c 'import saqc; print(f"{saqc.__version__=}")'
+
+wheel312:
+  stage: build
+  image: python:3.12
+  variables:
+    PYPI_PKG_NAME: "saqc-dev"
+  script:
+    - pip install wheel
+    - pip wheel .
+    - pip install .
+    - python -c 'import saqc; print(f"{saqc.__version__=}")'

 # ===========================================================
 # Extra Pipeline (run with a successful run of all other jobs on develop)
 # ===========================================================

+upload_testpypi:
+  stage: deploy
+  only:
+    - develop
+  except:
+    - schedules
+  variables:
+    PYPI_PKG_NAME: "saqc-dev"
+    TWINE_USERNAME: __token__
+    TWINE_PASSWORD: $TESTPYPI_TOKEN
+  script:
+    - pip install build twine
+    - python -m build
+    - twine check --strict dist/*
+    - twine upload -r testpypi dist/*
+
+
 # make html docu with sphinx
 pages:
  stage: deploy

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,13 +5,101 @@ SPDX-License-Identifier: GPL-3.0-or-later
 -->

 # Changelog
-
 ## Unreleased
-[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
+[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.6.0...develop)
+### Added
+- `flagPlateaus`: added function to search and flag outlierish value plateaus of certain temporal extension
+- `flagUniLOF`: added dispatch to Local Outlier Probability (*LoOP*) variant
+- `flaguniLOF`: made `thresh` Optional
+- `flagPlateaus`: added function to search and flag anomalous value plateaus of certain temporal extension
+### Changed
+### Removed
+### Fixed
+- `flagConstants`: fixed bug where last `min_periods` will never get flagged
+### Deprecated
+
+## [2.6.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.6.0) - 2024-04-15
+[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.5.0...v2.6.0)
+### Added
+- `reindex`: base reindexer function
+- `flagGeneric`, `processGeneric`: target broadcasting and numpy array support
+- `SaQC`: automatic translation of incoming flags
+- Option to change the flagging scheme after initialization
+- `flagByClick`: manually assign flags using a graphical user interface
+- `SaQC`: support for selection, slicing and setting of items by subscription on `SaQC` objects
+- `transferFlags` is a multivariate function
+- `plot`: added `yscope` keyword
+- `setFlags`: function to replace `flagManual`
+- `flagUniLOF`: added parameter `slope_correct` to correct for overflagging at relatively steep data value slopes
+- `History`: added option to change aggregation behavior
+- "horizontal" axis / multivariate mode for `rolling`
+- Translation scheme `AnnotatedFloatScheme`
+### Changed
+- `SaQC.flags` always returns a `DictOfSeries`
+### Removed
+- `SaQC` methods deprecated in version 2.4: `interpolate`, `interpolateIndex`, `interpolateInvalid`, `roll`, `linear`,`shift`, `flagCrossStatistics`
+- Method `Flags.toDios` deprecated in version 2.4
+- Method `DictOfSeries.index_of` method deprecated in version 2.4
+- Option `"complete"` for parameter `history` of method `plot`
+- Option `"cycleskip"` for parameter `ax_kwargs` of method `plot`
+- Parameter `phaseplot` from method `plot`
+### Fixed
+- `flagConstants`: fixed flagging of rolling ramps
+- `Flags`: add meta entry to imported flags
+- group operations were overwriting existing flags
+- `SaQC._construct` : was not working for inherited classes
+- `processgeneric`: improved numpy function compatability
+### Deprecated
+- `flagManual` in favor of `setFlags`
+- `inverse_**` options for `concatFlags` parameter `method` in favor of `invert=True`
+- `flagRaise` with delegation to better replacements `flagZScore`, `flagUniLOF`, `flagJumps` or `flagOffset`
+- `flagByGrubbs` with delegation to better replacements `flagZScore`, `flagUniLOF`s
+- `flagMVScore` with delegation to manual application of the steps
+
+## [2.5.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.5.0) - 2023-09-05
+[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.1...v2.5.0)
 ### Added
+- WMO standard mean aggregations
+- Function selection via strings for most function-expecting parameters
+- `SaQC.plot`:
+  - enable multivariate plots
+  - keyword `plot_kwargs` to pass matplotlib related arguments
+- CLI:
+  - `--version` to print the SaQC version
+  - `-ll` as a shorthand for `--log-level`
+  - `--json-field` to use a non-root element of a json file.
+  - basic json support for CLI config files, which are detected by `.json`-extension.
+- `SaQC.flagScatterLowpass`: option to select function based on string names.
+- Checks and unified error message for common function inputs.
 ### Changed
+- Require pandas >= 2.0
+- `SaQC.flagUniLOF` and `SaQC.assignUniLOF`: changed parameter `fill_na` to type `bool`.
+- `SaQC.plot`:
+   - changed default color for single variables to `black` with `80% transparency`
+   - added seperate legend for flags
 ### Removed
+- `SaQC.plot`: option to plot with complete history (`history="complete"`)
+- Support for Python 3.8
 ### Fixed
+- `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed `min_period`
+   was only recognised if `window` was also a tuple.
+- `SaQC.propagateFlags` was overwriting existing flags
+### Deprecated
+- `SaQC.andGroup` and `SaQC.orGroup`: option to pass dictionaries to `group`.
+- `SaQC.plot`:
+  - `phaseplot` in favor of usage with `mode="biplot"`
+  - `cyclestart` in favor of usage with `marker_kwargs`
+- `SaQC.flagStatLowPass` in favor of `SaQC.flagScatterLowpass`
+
+## [2.4.1](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22
+[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...v.2.4.1)
+### Added
+### Changed
+- pin pandas to versions >= 2.0
+### Removed
+- removed deprecated `DictOfSeries.to_df`
+### Fixed
+### Deprecated

 ## [2.4.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.0) - 2023-04-25
 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.3.0...v2.4.0)
@@ -21,11 +109,9 @@ SPDX-License-Identifier: GPL-3.0-or-later
 - Expose the `History` via `SaQC._history`
 - Config function `cv` (coefficient of variation)
 ### Changed
- Deprecate `interpolate`, `linear` and `shift` in favor of `align`
- Deprecate `roll` in favor of `rolling`
 - Rename `interplateInvalid` to `interpolate`
 - Rename `interpolateIndex` to `align`
- Deprecate `flagMVScore` parameters: `partition_min` in favor of `window`, `partition_min` in favor of `min_periods`, `min_periods` in favor of `min_periods_r`
+- Rewrite of `dios.DictOfSeries`
 ### Removed
 - Parameter `limit` from `align`
 - Parameter `max_na_group_flags`, `max_na_flags`, `flag_func`, `freq_check` from `resample`
@@ -33,9 +119,13 @@ SPDX-License-Identifier: GPL-3.0-or-later
 - `func` arguments in text configurations were not parsed correctly
 - fail on duplicated arguments to test methods
 - `reample` was not writing meta entries
- `flagByStatLowPass` was overwriting existing flags
+- `flagByScatterLowpass` was overwriting existing flags
 - `flagUniLOF` and `flagLOF` were overwriting existing flags
-
+### Deprecated
+- Deprecate `flagMVScore` parameters: `partition` in favor of `window`, `partition_min` in favor of `min_periods`, `min_periods` in favor of `min_periods_r`
+- Deprecate `interpolate`, `linear` and `shift` in favor of `align`
+- Deprecate `roll` in favor of `rolling`
+- Deprecate `DictOfSeries.to_df` in favor of `DictOfSeries.to_pandas`
 ## [2.3.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.3.0) - 2023-01-17
 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.2.1...v2.3.0)
 ### Added

--- a/CITATION.cff
+++ b/CITATION.cff
@@ -3,7 +3,7 @@ title: SaQC - System for automated Quality Control
 message: "Please cite this software using these metadata."
 type: software
 version: 2.0.0
-doi: https://doi.org/10.5281/zenodo.5888547
+doi: 10.5281/zenodo.5888547
 date-released: "2021-11-25"
 license: "GPL-3.0"
 repository-code: "https://git.ufz.de/rdm-software/saqc"

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -59,7 +59,7 @@ It is not a shame to name a parameter just `n` or `alpha` etc., if, for example,


 ### Test Functions
- testnames: [testmodule_]flagTestName
+- testnames: flagTestName
 
 ## Formatting
 We use [black](https://black.readthedocs.io/en/stable/) in its default settings.
@@ -70,13 +70,17 @@ Only absolute imports are accepted.


 # Development Workflow
+
+## Releases
+Every release is planned by an associated Milestone. This milestone should have a end date, usually, the first of the month the next release is planned and contain all issue/merge requests to include. 
+
 ## Repository Structure

- `master` - branch:
+- `main` - branch:
  + Stable and usually protected.
  + Regular merges from `develop`, these merges are tagged and increasing at least the minor version.
  + Irregular merges from `develop` in case of critical bugs. Such merges increase at least the patch level.
-  + Merges into `master` usually lead to a PyPI release.
+  + Merges into `main` usually lead to a PyPI release.
 - `develop` - branch:
  + The main development branch, no hard stability requirements/guarantees.
  + Merges into `develop` should mostly follow a [Merge Request Workflow](#merge-request-workflow), minor changes can however be committed directly. Such minor changes include:
@@ -105,6 +109,6 @@ Only absolute imports are accepted.
  release date. Commits to `develop` after the merge window of a release closes need to be integrated during the subsequent release
  cycle
 - The release cycle is organized by Gitlab Milestones, the expiration date of a certain milestone indicates the end of the 
-  related merge window, the actual merge into `master` and the accompanying release is scheduled for the week after the
+  related merge window, the actual merge into `main` and the accompanying release is scheduled for the week after the
  milestones expiration date. 
 - Issues and Merge Requests can and should be associated to these milestone as this help in the organization of review activities.
--- a/LICENSES/Unlicense.txt
+++ b/LICENSES/Unlicense.txt
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
--- a/README.md
+++ b/README.md
@@ -14,30 +14,39 @@ SPDX-License-Identifier: GPL-3.0-or-later

 #  SaQC: System for automated Quality Control

-Anomalies and errors are the rule not the exception when working with 
-time series data. This is especially true, if such data originates
-from in-situ measurements of environmental properties. 
-Almost all applications, however, implicily rely on data, that complies
-with some definition of 'correct'. 
-In order to infer reliable data products and tools, there is no alternative
-to quality control. SaQC provides all the building blocks to comfortably
-bridge the gap between 'usually faulty' and 'expected to be corrected' in 
-a accessible, consistent, objective and reproducible way.
+`SaQC` is a tool/framework/application to quality control time series data.
+It provides
+a growing collection of algorithms and methods to analyze, annotate and
+process timeseries data. It supports the end to end enrichment of metadata
+and provides various user interfaces: 1) a Python API, 2) a command line interface
+with a text based configuration system and a
+[web based user interface](https://webapp.ufz.de/saqc-config-app/)
+
+`SaQC` is designed with a particular focus on the needs of active data professionals,
+including sensor hardware-oriented engineers, domain experts, and data scientists,
+all of whom can benefit from its capabilities to improve the quality standards of given data products.

 For a (continously improving) overview of features, typical usage patterns,
-the specific system components and how to customize `SaQC` to your specific
+the specific system components and how to customize `SaQC` to your own
 needs, please refer to our
 [online documentation](https://rdm-software.pages.ufz.de/saqc/index.html).


 ## Installation

-SaQC is available on the Python Package Index ([PyPI](https://pypi.org/)) and
+`SaQC` is available on the Python Package Index ([PyPI](https://pypi.org/)) and
 can be installed using [pip](https://pip.pypa.io/en/stable/):
 ```sh
 python -m pip install saqc
 ```
-For a more detailed installion guide, see the [installation guide](https://rdm-software.pages.ufz.de/saqc/gettingstarted/InstallationGuide.html).
+Additionally `SaQC` is available via conda and can be installed with:
+
+```sh
+conda create -c conda-forge -n saqc saqc
+```
+
+For more details, see the [installation guide](https://rdm-software.pages.ufz.de/saqc/gettingstarted/InstallationGuide.html).
+

 ## Usage

@@ -53,11 +62,11 @@ could look like [this](https://git.ufz.de/rdm-software/saqc/raw/develop/docs/res
 ```
 varname    ; test
 #----------; ---------------------------------------------------------------------
-SM2        ; shift(freq="15Min")
+SM2        ; align(freq="15Min")
 'SM(1|2)+' ; flagMissing()
 SM1        ; flagRange(min=10, max=60)
 SM2        ; flagRange(min=10, max=40)
-SM2        ; flagMAD(window="30d", z=3.5)
+SM2        ; flagZScore(window="30d", thresh=3.5, method='modified', center=False)
 Dummy      ; flagGeneric(field=["SM1", "SM2"], func=(isflagged(x) | isflagged(y)))
 ```

@@ -92,30 +101,27 @@ data = pd.read_csv(
    index_col=0, parse_dates=True,
 )

-saqc = SaQC(data=data)
-saqc = (saqc
-        .shift("SM2", freq="15Min")
-        .flagMissing("SM(1|2)+", regex=True)
-        .flagRange("SM1", min=10, max=60)
-        .flagRange("SM2", min=10, max=40)
-        .flagMAD("SM2", window="30d", z=3.5)
-        .flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y))))
+qc = SaQC(data=data)
+qc = (qc
+      .align("SM2", freq="15Min")
+      .flagMissing("SM(1|2)+", regex=True)
+      .flagRange("SM1", min=10, max=60)
+      .flagRange("SM2", min=10, max=40)
+      .flagZScore("SM2", window="30d", thresh=3.5, method='modified', center=False)
+      .flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y))))
 ```

-A more detailed description of the Python API is available in the 
+A more detailed description of the Python API is available in the
 [respective section](https://rdm-software.pages.ufz.de/saqc/gettingstarted/TutorialAPI.html)
 of the documentation.

-## Changelog
-All notable changes to this project will be documented in [CHANGELOG.md](CHANGELOG.md).
-
 ## Get involved

 ### Contributing
-You found a bug or you want to suggest some cool features? Please refer to our [contributing guidelines](CONTRIBUTING.md) to see how you can contribute to SaQC.
+You found a bug or you want to suggest new features? Please refer to our [contributing guidelines](CONTRIBUTING.md) to see how you can contribute to SaQC.

 ### User support
-If you need help or have a question, you can use the SaQC user support mailing list: [saqc-support@ufz.de](mailto:saqc-support@ufz.de)
+If you need help or have questions, send us an email to [saqc-support@ufz.de](mailto:saqc-support@ufz.de)

 ## Copyright and License
 Copyright(c) 2021, [Helmholtz-Zentrum für Umweltforschung GmbH -- UFZ](https://www.ufz.de). All rights reserved.
@@ -125,17 +131,18 @@ Copyright(c) 2021, [Helmholtz-Zentrum für Umweltforschung GmbH -- UFZ](https://

 For full details, see [LICENSE](LICENSE.md).

-## Acknowledgements
-...
-
 ## Publications
-coming soon...
+> Lennart Schmidt, David Schäfer, Juliane Geller, Peter Lünenschloss, Bert Palm, Karsten Rinke, Corinna Rebmann, Michael Rode, Jan Bumberger, System for automated Quality Control (SaQC) to enable traceable and reproducible data streams in environmental science, Environmental Modelling & Software, 2023, 105809, ISSN 1364-8152, https://doi.org/10.1016/j.envsoft.2023.105809. (https://www.sciencedirect.com/science/article/pii/S1364815223001950)

 ## How to cite SaQC
 If SaQC is advancing your research, please cite as:

 > Schäfer, David, Palm, Bert, Lünenschloß, Peter, Schmidt, Lennart, & Bumberger, Jan. (2023). System for automated Quality Control - SaQC (2.3.0). Zenodo. https://doi.org/10.5281/zenodo.5888547

+or
+
+> Lennart Schmidt, David Schäfer, Juliane Geller, Peter Lünenschloss, Bert Palm, Karsten Rinke, Corinna Rebmann, Michael Rode, Jan Bumberger, System for automated Quality Control (SaQC) to enable traceable and reproducible data streams in environmental science, Environmental Modelling & Software, 2023, 105809, ISSN 1364-8152, https://doi.org/10.1016/j.envsoft.2023.105809. (https://www.sciencedirect.com/science/article/pii/S1364815223001950)
+
 -----------------

 <a href="https://www.ufz.de/index.php?en=33573">

--- a/docs/Makefile
+++ b/docs/Makefile
@@ -30,7 +30,7 @@ clean:
 # make documentation
 doc:
 	# generate environment table from dictionary
-	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@ $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

 # run tests
 test:

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -24,11 +24,14 @@ package_path = os.path.abspath("..")
 os.environ["PYTHONPATH"] = ":".join((package_path, os.environ.get("PYTHONPATH", "")))

 # ---------- Version string --------------------------------------------------
-# read the version string without importing it
-vdict = {}
-with open("../saqc/version.py") as f:
-    exec(f.read(), vdict)
-version = vdict["__version__"]
+# TODO: what for we need `version` and the `release` variables for ?
+
+# import saqc for versioning, but prevent plots to pop up
+# by setting mpl backend to non-interactive
+import saqc.funcs
+
+version = saqc.__version__
+saqc.funcs.tools._MPL_DEFAULT_BACKEND = "Agg"

 # -- Customize logging -------------------------------------------------------


--- a/docs/cookbooks/CookBooksPanels.rst
+++ b/docs/cookbooks/CookBooksPanels.rst
@@ -7,6 +7,18 @@
 Cook Books
 ==========

+.. toctree::
+   :caption: Cookbooks
+   :maxdepth: 1
+   :hidden:
+
+   DataRegularisation
+   OutlierDetection
+   ResidualOutlierDetection
+   DriftDetection
+   MultivariateFlagging
+   ../documentation/GenericFunctions
+

 .. grid:: 2
   :gutter: 2
@@ -52,6 +64,16 @@ Cook Books
      +++
      *Wrap your custom logical and arithmetic expressions with the generic functions*

+
+   .. grid-item-card:: Drift Detection
+      :link: DriftDetection
+      :link-type: doc
+
+      * define metrics to measure distance between data series
+      * automatically determine majority and anomalous data groups
+      +++
+      *Detecting datachunks drifting apart from a reference group*
+
   .. grid-item-card:: Modelling, Residuals and Arithmetics
      :link: ResidualOutlierDetection
      :link-type: doc

--- a/docs/cookbooks/DataRegularisation.rst
+++ b/docs/cookbooks/DataRegularisation.rst
@@ -315,10 +315,10 @@ Aggregation
 If we want to comprise several values by aggregation and assign the result to the new regular timestamp, instead of
 selecting a single one, we can do this, with the :py:meth:`~saqc.SaQC.resample` method.
 Lets resample the *SoilMoisture* data to have a *20* minutes sample rate by aggregating every *20* minutes intervals
-content with the arithmetic mean (which is provided by the ``numpy.mean`` function for example).
+content with the arithmetic mean.

   >>> import numpy as np
-   >>> qc = qc.resample('SoilMoisture', target='SoilMoisture_mean', freq='20min', method='bagg', func=np.mean)
+   >>> qc = qc.resample('SoilMoisture', target='SoilMoisture_mean', freq='20min', method='bagg', func="mean")
   >>> qc.data # doctest: +SKIP
                       SoilMoisture |                     SoilMoisture_mean |
   ================================ | ===================================== |

--- a/docs/cookbooks/DriftDetection.rst
+++ b/docs/cookbooks/DriftDetection.rst
+.. SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
+..
+.. SPDX-License-Identifier: GPL-3.0-or-later
+
+
+Drift Detection
+===============
+
+
+
+Overview
+--------
+
+The guide briefly introduces the usage of the :py:meth:`~saqc.SaQC.flagDriftFromNorm` method.
+The method detects sections in timeseries that deviate from the majority in a group of variables
+
+
+* :ref:`Parameters <cookbooks/DriftDetection:Parameters>`
+* :ref:`Algorithm <cookbooks/DriftDetection:Algorithm>`
+* :ref:`Example Data import <cookbooks/DriftDetection:Example Data import>`
+* :ref:`Example Algorithm Application <cookbooks/DriftDetection:Example Algorithm Application>`
+
+
+
+
+Parameters
+----------
+
+Although there seems to be a lot of user input to parametrize, most of it is easy to be interpreted and can be selected
+defaultly.
+
+window
+^^^^^^
+
+Length of the partitions the target group of data series` is divided into.
+For example, if selected ``1D`` (one day), the group to check will be divided into one day chunks and every chunk is be checked for time series deviating from the normal group.
+
+frac
+^^^^
+
+The percentage of data, needed to define the "normal" group expressed in a number out of :math:`[0,1]`.
+This, of course must be something over 50  percent (math:`0.5`), and can be
+selected according to the number of drifting variables one expects the data to have at max.
+
+method
+^^^^^^
+
+The linkage method can have some impact on the clustering, but sticking to the default value `single` might be
+sufficient for most the tasks.
+
+spread
+^^^^^^
+
+The main parameter to control the algorithm's behavior. It has to be selected carefully.
+It determines the maximum spread of a normal group by limiting the costs, a cluster agglomeration must not exceed in
+every linkage step.
+
+For singleton clusters, that costs equals half the distance, the timeseries in the clusters have to each other. So, only timeseries with a distance of less than two times the spreading norm can be clustered.
+
+When timeseries get clustered together, this new clusters distance to all the other timeseries/clusters is calculated
+according to the linkage method specified. By default, it is the minimum distance, the members of the clusters have to
+each other.
+
+Having that in mind, it is advisable to choose a distance function as metric, that can be well interpreted in the units
+dimension of the measurement, and where the interpretation is invariant over the length of the timeseries.
+
+metric
+^^^^^^
+
+The default *averaged manhatten metric* roughly represents the averaged value distance of two timeseries (as opposed to *euclidean*, which scales non linearly with the
+compared timeseries' length). For the selection of the :py:attr:`spread` parameter the default metric is helpful, since it allows to interpret the spreading in the dimension of the measurements.
+
+
+Algorithm
+---------
+
+The aim of the algorithm is to flag sections in timeseries, that significantly deviate from a normal group of timeseries running in parallel within a given section.
+
+"Normality" is determined in terms of a maximum spreading distance, that members of a normal group must not exceed.
+In addition, a group is only considered to be "normal", if it contains more then a certain percentage of the timeseries to be clustered into "normal" ones and "abnormal" ones.
+
+The steps of the algorithm are the following:
+
+* Calculate the distances :math:`d(x_i,x_j)` for all timeseries :math:`x_i` that are to be clustered with a metric specified by the user
+* Calculate a dendogram using a hierarchical linkage algorithm, specified by the user.
+* Flatten the dendogram at the level, the agglomeration costs exceed the value given by a spreading norm, specified by the user
+* check if there is a cluster containing more than a certain percentage of variables as specified by the user.
+   * if yes: flag all the variables that are not in that cluster
+   * if no: flag nothing
+
+Example Data Import
+-------------------
+
+.. plot::
+   :context: reset
+   :include-source: False
+
+   import matplotlib
+   import saqc
+   import pandas as pd
+   data = pd.read_csv('../resources/data/tempSensorGroup.csv', index_col=0)
+   data.index = pd.DatetimeIndex(data.index)
+   variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
+   qc = saqc.SaQC(data)
+
+We load the example `data set <https://git.ufz.de/rdm-software/saqc/-/blob/develop/docs/resources/data/tempsenorGroup.csv>`_
+from the *saqc* repository using the `pandas <https://pandas.pydata.org/>`_ csv
+file reader. Subsequently, we cast the index of the imported data to `DatetimeIndex`
+instantiate a saqc object and plot the data:
+
+.. doctest:: flagDriftFromNorm
+
+   >>> import saqc
+   >>> data = pd.read_csv('./resources/data/tempSensorGroup.csv', index_col=0)
+   >>> data.index = pd.DatetimeIndex(data.index)
+   >>> variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
+   >>> qc = saqc.SaQC(data)
+   >>> qc.plot(variables) # doctest: +SKIP
+
+
+.. plot::
+   :context: close-figs
+   :include-source: False
+   :class: center
+
+    qc.plot(variables)
+
+
+Example Algorithm Application
+-----------------------------
+
+Looking at the example data set more closely, we see that 2 of the 5 variables start to drift away.
+
+.. plot::
+   :context: close-figs
+   :include-source: False
+   :class: center
+   :caption: 2 variables are departed from the majority group of variables (the group containing more than ``frac`` variables) by the end of the year.
+
+    qc.plot(variables, xscope=slice('2017-05', '2017-11'))
+
+Lets try to detect those drifts via saqc. The changes we observe in the data seem to develop significantly only in temporal spans over a month,
+so we go for ``"1ME"`` as value for the
+``window`` parameter. We identified the majority group as a group containing three variables, whereby two variables
+seem to be scattered away, so that we can leave the ``frac`` value at its default ``.5`` level.
+The majority group seems on average not to be spread out more than 3 or 4 degrees. So, for the ``spread`` value
+we go for ``3``. This can be interpreted as follows, for every member of a group, there is another member that
+is not distanted more than ``3`` degrees from that one (on average in one month) - this should be sufficient to bundle
+the majority group and to discriminate against the drifting variables, that seem to deviate more than 3 degrees on
+average in a month from any member of the majority group.
+
+.. doctest:: flagDriftFromNorm
+
+   >>> variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
+   >>> qc = qc.flagDriftFromNorm(variables, window='1ME', spread=3)
+
+.. plot::
+   :context: close-figs
+   :include-source: False
+   :class: center
+
+   >>> variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
+   >>> qc = qc.flagDriftFromNorm(variables, window='1ME', spread=3)
+
+Lets check the results:
+
+.. doctest:: flagDriftFromNorm
+
+   >>> qc.plot(variables, marker_kwargs={'alpha':.3, 's': 1, 'color': 'red', 'edgecolor': 'face'}) # doctest: +SKIP
+
+.. plot::
+   :context: close-figs
+   :include-source: False
+   :class: center
+
+   qc.plot(variables, marker_kwargs={'alpha':.3, 's': 1, 'color': 'red', 'edgecolors': 'face'})
+
--- a/docs/cookbooks/MultivariateFlagging.rst
+++ b/docs/cookbooks/MultivariateFlagging.rst
@@ -191,7 +191,6 @@ The resulting timeseries now has has regular timestamp.
 .. doctest:: exampleMV

   >>> qc.data['sac254_raw'] #doctest:+NORMALIZE_WHITESPACE
-   Timestamp
   2016-01-01 00:00:00          NaN
   2016-01-01 00:15:00    18.617873
   2016-01-01 00:30:00    18.942700
@@ -246,17 +245,14 @@ Check out the results for the year *2016*

 .. doctest:: exampleMV

-   >>> plt.plot(qc.data['sac254_raw']['2016'], alpha=.5, color='black', label='original') # doctest:+SKIP
-   >>> plt.plot(qc.data['sac254_corrected']['2016'], color='black', label='corrected') # doctest:+SKIP
+   >>> qc.plot(['sac254_raw','sac254_corrected'], xscope='2016', plot_kwargs={'color':['black', 'black'], 'alpha':[.5, 1], 'label':['original', 'corrrected']}) # doctest:+SKIP

 .. plot::
   :context:
   :include-source: False

-   plt.figure(figsize=(16,9))
-   plt.plot(qc.data['sac254_raw']['2016'], alpha=.5, color='black', label='original')
-   plt.plot(qc.data['sac254_corrected']['2016'], color='black', label='corrected')
-   plt.legend()
+   >>> qc.plot(['sac254_raw','sac254_corrected'], xscope='2016', plot_kwargs={'color':['black', 'black'], 'alpha':[.5, 1], 'label':['original', 'corrrected']})
+

 Multivariate Flagging Procedure
 -------------------------------
@@ -345,7 +341,7 @@ correlated with relatively high *kNNscores*, we could try to calculate a thresho
 `STRAY <https://arxiv.org/pdf/1908.04000.pdf>`_ algorithm, which is available as the method:
 :py:meth:`~saqc.SaQC.flagByStray`. This method will mark some samples of the `kNNscore` variable as anomaly.
 Subsequently we project this marks (or *flags*) on to the *sac* variable with a call to
-:py:meth:`~saqc.SaQC.concatFlags`. For the sake of demonstration, we also project the flags
+:py:meth:`~saqc.SaQC.transferFlags`. For the sake of demonstration, we also project the flags
 on the normalized *sac* and plot the flagged values in the *sac254_norm* - *level_norm* feature space.


@@ -353,8 +349,8 @@ on the normalized *sac* and plot the flagged values in the *sac254_norm* - *leve
 .. doctest:: exampleMV

   >>> qc = qc.flagByStray(field='kNNscores', freq='30D', alpha=.3)
-   >>> qc = qc.concatFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
-   >>> qc = qc.concatFlags(field='kNNscores', target='sac254_norm', label='STRAY')
+   >>> qc = qc.transferFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
+   >>> qc = qc.transferFlags(field='kNNscores', target='sac254_norm', label='STRAY')
   >>> qc.plot('sac254_corrected', xscope='2016-11') # doctest:+SKIP
   >>> qc.plot('sac254_norm', phaseplot='level_norm', xscope='2016-11') # doctest:+SKIP

@@ -363,8 +359,8 @@ on the normalized *sac* and plot the flagged values in the *sac254_norm* - *leve
   :include-source: False

   qc = qc.flagByStray(field='kNNscores', freq='30D', alpha=.3)
-   qc = qc.concatFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
-   qc = qc.concatFlags(field='kNNscores', target='sac254_norm', label='STRAY')
+   qc = qc.transferFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
+   qc = qc.transferFlags(field='kNNscores', target='sac254_norm', label='STRAY')

 .. plot::
   :context: close-figs
@@ -393,4 +389,4 @@ Config
 To configure `saqc` to execute the above data processing and flagging steps, the config file would have to look
 as follows:

-.. literalinclude:: ../resources/data/hydro_config.csv
\ No newline at end of file
+.. literalinclude:: ../resources/data/hydro_config.csv
--- a/docs/cookbooks/ResidualOutlierDetection.rst
+++ b/docs/cookbooks/ResidualOutlierDetection.rst
@@ -147,19 +147,19 @@ Rolling Mean
 ^^^^^^^^^^^^

 Easiest thing to do, would be, to apply some rolling mean
-model via the method :py:meth:`saqc.SaQC.roll`.
+model via the method :py:meth:`saqc.SaQC.rolling`.

 .. doctest:: exampleOD

   >>> import numpy as np
-   >>> qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D')
+   >>> qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D')

 .. plot::
   :context:
   :include-source: False

   import numpy as np
-   qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D')
+   qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D')

 The ``field`` parameter is passed the variable name, we want to calculate the rolling mean of.
 The ``target`` parameter holds the name, we want to store the results of the calculation to.
@@ -174,13 +174,13 @@ under the name ``np.median``. We just calculate another model curve for the ``"i

 .. doctest:: exampleOD

-   >>> qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D')
+   >>> qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D')

 .. plot::
   :context:
   :include-source: False

-   qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D')
+   qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D')

 We chose another :py:attr:`target` value for the rolling *median* calculation, in order to not override our results from
 the previous rolling *mean* calculation.
@@ -255,25 +255,11 @@ This function object, we can pass on to the :py:meth:`~saqc.SaQC.processGeneric`
 Visualisation
 -------------

-We can obtain those updated informations by generating a `pandas dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
-representation of it, with the :py:attr:`data <saqc.core.core.SaQC.data>` method:
+To see all the results obtained so far, plotted in one figure window, we make use of the :py:meth:`~saqc.SaQC.plot` method.

 .. doctest:: exampleOD

-   >>> data = qc.data
-
-.. plot::
-   :context:
-   :include-source: False
-
-   data = qc.data
-
-To see all the results obtained so far, plotted in one figure window, we make use of the dataframes `plot <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html>`_ method.
-
-.. doctest:: exampleOD
-
-   >>> data.to_df().plot()
-   <Axes...>
+   >>> qc.plot(".", regex=True) # doctest: +SKIP

 .. plot::
   :context:
@@ -281,7 +267,7 @@ To see all the results obtained so far, plotted in one figure window, we make us
   :width: 80 %
   :class: center

-   data.to_df().plot()
+   qc.plot(".", regex=True)


 Residuals and Scores
@@ -332,18 +318,18 @@ for the point lying in the center of every window, we would define our function

   z_score = lambda D: abs((D[14] - np.mean(D)) / np.std(D))

-And subsequently, use the :py:meth:`~saqc.SaQC.roll` method to make a rolling window application with the scoring
+And subsequently, use the :py:meth:`~saqc.SaQC.rolling` method to make a rolling window application with the scoring
 function:

 .. doctest:: exampleOD

-   >>> qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D')
+   >>> qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27)

 .. plot::
   :context: close-figs
   :include-source: False

-   qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D')
+   qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27)

 Optimization by Decomposition
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -361,13 +347,13 @@ So the attempt works fine, only because our data set is small and strictly regul
 Meaning that it has constant temporal distances between subsequent meassurements.

 In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring
-into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the
+into seperate calls to the :py:meth:`~saqc.SaQC.rolling` function, by calculating the series of the
 residuals *mean* and *standard deviation* seperately:

 .. doctest:: exampleOD

-   >>> qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
-   >>> qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
+   >>> qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
+   >>> qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
   >>> qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm",
   ... func=lambda this, mean, std: (this - mean) / std)

@@ -376,15 +362,15 @@ residuals *mean* and *standard deviation* seperately:
   :context: close-figs
   :include-source: False

-   qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
-   qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
+   qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
+   qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
   qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", func=lambda this, mean, std: (this - mean) / std)


 With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cookbooks/ResidualOutlierDetection:Scores>`\ ,
 because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in.

-Also, as a result of the :py:meth:`~saqc.SaQC.roll` assigning its results to the center of every window,
+Also, as a result of the :py:meth:`~saqc.SaQC.rolling` assigning its results to the center of every window,
 all the values are centered and we dont have to care about window center indices when we are generating
 the *Z*\ -Scores from the two series.


--- a/docs/devresources/devResPanels.rst
+++ b/docs/devresources/devResPanels.rst
@@ -5,6 +5,13 @@
 Developers Resources
 ====================

+.. toctree::
+   :caption: Developer Resources
+   :hidden:
+   :maxdepth: 1
+
+   Documentation Guide <HowToDoc>
+   Writing Functions <WritingFunctions>

 .. grid:: 3
   :gutter: 2

--- a/docs/documentation/Customizations.rst
+++ b/docs/documentation/Customizations.rst
@@ -5,88 +5,136 @@
 Customizations
 ==============

-SaQC comes with a continuously growing number of pre-implemented
-quality checking and processing routines as well as flagging schemes. 
-For any sufficiently large use case however, it is very likely that the 
-functions provided won't fulfill all your needs and requirements.
-
-Acknowledging the impossibility to address all imaginable use cases, we 
-designed the system to allow for extensions and costumizations. The main extensions options, namely 
+SaQC comes with a continuously growing number of pre-implemented quality-checking and processing
+routines as well as flagging schemes. For a sufficiently large use case, however, it might be
+necessary to extend the system anyhow. The main extension options, namely
 :ref:`quality check routines <documentation/Customizations:custom quality check routines>`
-and the :ref:`flagging scheme <documentation/Customizations:custom flagging schemes>`
-are described within this documents.
+and the :ref:`flagging scheme <documentation/Customizations:custom flagging schemes>`.
+Both of these mechanisms are described within this document.

-Custom quality check routines
+Custom Quality Check Routines
 -----------------------------

-In case you are missing quality check routines, you are of course very
-welcome to file a feature request issue on the project's
-`gitlab repository <https://git.ufz.de/rdm-software/saqc>`_. However, if 
-you are more the "I-get-this-done-by-myself" type of person,
-SaQC provides two ways to integrate custom routines into the system:
-
+In case you are missing quality check routines, you are, of course, very welcome to file a feature request issue on the project's `GitLab repository <https://git.ufz.de/rdm-software/saqc>`_. However, if you are more the "I-get-this-done-by-myself" type of person, SaQC offers the possibility to directly extend its functionality using its interface to the evaluation machinery.

-#. The :ref:`extension language <documentation/GenericFunctions:Generic Functions>`
-#. An :ref:`interface <documentation/Customizations:interface>` to the evaluation machinery
+In order to make a function usable within the evaluation framework of SaQC, it needs to implement the following function interface:

-Interface
-^^^^^^^^^
-
-In order to make a function usable within the evaluation framework of SaQC, it needs to
-implement the following function interface

 .. code-block:: python

-   import pandas
   import saqc

-   def yourTestFunction(
-      saqc: SaQC
-      field: str,
-      *args,
-      **kwargs
-      ) -> SaQC
+   def yourTestFunction(qc: SaQC, field: str | list[str], *args, **kwargs) -> SaQC:
+       # your code
+       return qc
+

-Argument Descriptions
-~~~~~~~~~~~~~~~~~~~~~
+with the following parameters

 .. list-table::
   :header-rows: 1

   * - Name
     - Description
-   * - ``data``
-     - The actual dataset, an instance of ``saqc.DictOfSeries``.
+   * - ``qc``
+     - An instance of ``SaQC``
   * - ``field``
-     - The field/column within ``data``, that function is processing.
-   * - ``flags``
-     - An instance of saqc.Flags, responsible for the translation of test results into quality attributes.
+     - The field(s)/column(s) of ``data`` the function is processing/flagging.
   * - ``args``
-     - Any other arguments needed to parameterize the function.
+     - Any number of named arguments needed to parameterize the function.
   * - ``kwargs``
-     - Any other keyword arguments needed to parameterize the function.
+     - Any number of named keyword arguments needed to parameterize the function. ``kwargs``
+       need to be present, even if the function needs no keyword arguments at all


 Integrate into SaQC
 ^^^^^^^^^^^^^^^^^^^

-In order make your function available to the system it needs to be registered. We provide a decorator 
-`\ ``flagging`` <saqc/functions/register.py>`_ with saqc, to integrate your 
-test functions into SaQC. Here is a complete dummy example:
+SaQC provides two decorators, :py:func:`@flagging` and :py:func:`@register`, to integrate custom functions
+into its workflow. The choice between them depends on the nature of your algorithm. :py:func:`@register`
+is a more versatile decorator, allowing you to handle masking, demasking, and squeezing of data and flags, while
+:py:func:`@flagging` is simpler and suitable for univariate flagging functions without the need for complex
+data manipulations.
+
+Use :py:func:`@flagging` for simple univariate flagging tasks without the need for complex data manipulations.
+:py:func:`@flagging` is especially suitable when your algorithm operates on a single column
+

 .. code-block:: python

-   from saqc import register
+   from saqc import SaQC
+   from saqc.core.register import flagging

   @flagging()
-   def yourTestFunction(saqc: SaQC, field: str, *args, **kwargs):
+   def simpleFlagging(saqc: SaQC, field: str | list[str], param1: ..., param2: ..., **kwargs) -> SaQC:
+       """
+       Your simple univariate flagging logic goes here.
+
+       Parameters
+       ----------
+       saqc : SaQC
+           The SaQC instance.
+       field : str
+          The field or fields on which to apply anomaly detection.
+       param1 : ...
+           Additional parameters needed for your algorithm.
+       param2 : ...
+           Additional parameters needed for your algorithm.
+
+       Returns
+       -------
+       SaQC
+           The modified SaQC instance.
+       """
+       # Your flagging logic here
+       # Modify saqc._flags as needed
+       return saqc
+
+
+Use :py:func:`@register` when your algorithm needs to handle multiple columns simultaneously (``multivariate=True``)
+and or you need explicit control over masking, demasking, and squeezing of data and flags.
+:py:func:`register` is especially for complex algorithms that involve interactions between different columns.
+
+
+.. code-block:: python
+
+   from saqc import SaQC
+   from saqc.core.register import register
+
+   @register(
+       mask=["field"], # Parameter(s) of the decorated functions giving the names of columns in SaQC._data to mask before the call
+       demask=["field"], # Parameter(s) of the decorated functions giving the names of columns in SaQC._data to unmask after the call
+       squeeze=["field"], # Parameter(s) of the decorated functions giving the names of columns in SaQC._flags to squeeze into a single flags column after the call
+       multivariate=True,  # Set to True to handle multiple columns
+       handles_target=False,
+   )
+   def complexAlgorithm(
+       saqc: SaQC, field: str | list[str], param1: ..., param2: ..., **kwargs
+   ) -> SaQC:
+       """
+       Your custom anomaly detection logic goes here.
+
+       Parameters
+       ----------
+       saqc : SaQC
+           The SaQC instance.
+       field : str or list of str
+           The field or fields on which to apply anomaly detection.
+       param1 : ...
+           Additional parameters needed for your algorithm.
+       param2 : ...
+           Additional parameters needed for your algorithm.
+
+       Returns
+       -------
+       SaQC
+           The modified SaQC instance.
+       """
+       # Your anomaly detection logic here
+       # Modify saqc._flags and saqc._data as needed
       return saqc

-Example
-^^^^^^^

-The function `\ ``flagRange`` <saqc/funcs/outliers.py>`_ provides a simple, yet complete implementation of 
-a quality check routine. You might want to look into its implementation as an example.

 Custom flagging schemes
 -----------------------

--- a/docs/documentation/GenericFunctions.rst
+++ b/docs/documentation/GenericFunctions.rst
@@ -51,8 +51,7 @@ dummy dataset, to lead us through the following code snippets:

 .. testsetup:: python

-   from saqc import fromConfig
-   from tests.common import writeIO
+   from saqc.parsing.reader import _ConfigReader as ConfigReader

 .. testcode:: python
              
@@ -116,16 +115,13 @@ Simple constraints
     .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test                    
        ...         #-------;------------------------
        ...         x       ; flagGeneric(func=x < 30)
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc1.flags  #doctest:+NORMALIZE_WHITESPACE
        True

@@ -177,16 +173,13 @@ Cross variable constraints
     .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test                    
        ...         #-------;------------------------------------
        ...         x       ; flagGeneric(field="y", func=y > 30)
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc2.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -241,16 +234,13 @@ need to be put in parentheses.
     .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test                    
        ...         #-------;--------------------------------------------------------
        ...         x       ; flagGeneric(field=["y", "z"], func=(y > 30) & (z < 50))
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc3.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -293,16 +283,13 @@ Arithmetics
     .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test
        ...         #-------;-------------------------------------------------------
        ...         x       ; flagGeneric(field=["x", "y", "z"], func=x > (y + z)/2)
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc4.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -351,16 +338,13 @@ Special functions
      .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test
        ...         #-------;---------------------------------------------------
        ...         x       ; flagGeneric(field=["x", "z"], func=x > std(z) * 2)
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc5.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -402,17 +386,14 @@ Special functions
      .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test
        ...         #-------;------------------------------------------
        ...         y       ; flagRange(min=10, max=60)
        ...         x       ; flagGeneric(field="y", func=isflagged(y))
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc6.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -481,16 +462,13 @@ Let's consider the following dataset:
      .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test
        ...         #-------;---------------------------------------------------------------
        ...         meas    ; flagGeneric(field=["fan", "volt"], func=(x == 0) | (y < 12.0))
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc7.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -533,8 +511,7 @@ But we could also quality check our independent variables first and than leverag
      .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test
        ...         #-------;--------------------------------------------------------------------------
@@ -543,9 +520,7 @@ But we could also quality check our independent variables first and than leverag
        ...         volt    ; flagGeneric(func=volt < 12.0)
        ...         meas    ; flagGeneric(field=["fan", "volt"], func=isflagged(fan) | isflagged(volt))
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.flags == qc8.flags #doctest:+NORMALIZE_WHITESPACE
        True

@@ -634,16 +609,13 @@ variables in a given dataset. We start with dummy data again:
     .. doctest:: python
        :hide:

-        >>> tmp = fromConfig(
-        ...     writeIO(
+        >>> tmp = ConfigReader(data).readString(
        ...         """
        ...         varname ; test                    
        ...         #-------;------------------------------------------------------
        ...         mean    ; processGeneric(field=["x", "y", "z"], func=(x+y+z)/2)
        ...         """
-        ...     ),
-        ...     data
-        ... )
+        ... ).run()
        >>> tmp.data == qc1.data #doctest:+NORMALIZE_WHITESPACE
        True


--- a/docs/documentation/GlobalKeywords.rst
+++ b/docs/documentation/GlobalKeywords.rst
@@ -37,7 +37,6 @@ Example Data
   :context: close-figs
   :include-source: False

-   import matplotlib.pyplot as plt
   import pandas as pd
   import numpy as np
   import saqc

--- a/docs/documentation/documentationPanels.rst
+++ b/docs/documentation/documentationPanels.rst
@@ -7,7 +7,18 @@ Documentation
 =============


-.. grid:: 3
+.. toctree::
+   :caption: Documentation
+   :maxdepth: 1
+   :hidden:
+
+   ConfigurationFiles
+   GlobalKeywords
+   Customizations
+   SourceTarget
+   FlaggingTranslation
+
+.. grid:: 2
   :gutter: 2

   .. grid-item-card:: Configuration files (csv)
@@ -30,5 +41,12 @@ Documentation
      +++
      *Keywords shared by all the flagging functions*

+   .. grid-item-card:: Customizations
+      :link: Customizations
+      :link-type: doc
+
+      * add custom functions to SaQC
+      +++
+      *Keywords shared by all the flagging functions*
No results found