Compare revisions

f3784668 · f3784668 · f3784668 · f3784668 · f3784668 · f3784668
--- a/saqc/funcs/__init__.py
+++ b/saqc/funcs/__init__.py
@@ -18,7 +18,7 @@ from saqc.funcs.interpolation import *
 from saqc.funcs.outliers import *
 from saqc.funcs.pattern import *
 from saqc.funcs.resampling import *
-from saqc.funcs.residues import *
+from saqc.funcs.residuals import *
 from saqc.funcs.rolling import *
 from saqc.funcs.scores import *
 from saqc.funcs.tools import *

--- a/saqc/funcs/changepoints.py
+++ b/saqc/funcs/changepoints.py
@@ -319,9 +319,9 @@ def _assignChangePointCluster(
    result_arr = stat_arr > thresh_arr
    if model_by_resids:
-        residues = pd.Series(np.nan, index=data[field].index)
+        residuals = pd.Series(np.nan, index=data[field].index)
-        residues[masked_index] = stat_arr
+        residuals[masked_index] = stat_arr
-        data[field] = residues
+        data[field] = residuals
        flags[:, field] = UNFLAGGED
        return data, flags

--- a/saqc/funcs/curvefit.py
+++ b/saqc/funcs/curvefit.py
@@ -47,7 +47,7 @@ def fitPolynomial(
    In case your data is sampled at an equidistant frequency grid:
    (1) If you know your data to have no significant number of missing values,
-    or if you do not want to calculate residues for windows containing missing values
+    or if you do not want to calculate residuals for windows containing missing values
    any way, performance can be increased by setting min_periods=window.
    Note, that the initial and final window/2 values do not get fitted.
@@ -92,7 +92,7 @@ def fitPolynomial(
    flags : saqc.Flags
        Flags
    """
-    reserved = ["residues", "set_flags"]
+    reserved = ["residuals", "set_flags"]
    filterKwargs(kwargs, reserved)
    return _fitPolynomial(
        data=data,
@@ -103,7 +103,7 @@ def fitPolynomial(
        min_periods=min_periods,
        **kwargs,
        # ctrl args
-        return_residues=False,
+        return_residuals=False,
        set_flags=True,
    )
@@ -116,7 +116,7 @@ def _fitPolynomial(
    order: int,
    set_flags: bool = True,
    min_periods: int = 0,
-    return_residues: bool = False,
+    return_residuals: bool = False,
    **kwargs,
 ) -> Tuple[DictOfSeries, Flags]:
@@ -140,7 +140,7 @@ def _fitPolynomial(
        ).floor()
        centers = centers.drop(centers[centers.isna()].index)
        centers = centers.astype(int)
-        residues = to_fit.rolling(
+        residuals = to_fit.rolling(
            pd.Timedelta(window), closed="both", min_periods=min_periods
        ).apply(polyRollerIrregular, args=(centers, order))
@@ -153,11 +153,11 @@ def _fitPolynomial(
            .apply(center_func, raw=False)
            .astype(int)
        )
-        temp = residues.copy()
+        temp = residuals.copy()
        for k in centers_iloc.iteritems():
-            residues.iloc[k[1]] = temp[k[0]]
+            residuals.iloc[k[1]] = temp[k[0]]
-        residues[residues.index[0] : residues.index[centers_iloc[0]]] = np.nan
+        residuals[residuals.index[0] : residuals.index[centers_iloc[0]]] = np.nan
-        residues[residues.index[centers_iloc[-1]] : residues.index[-1]] = np.nan
+        residuals[residuals.index[centers_iloc[-1]] : residuals.index[-1]] = np.nan
    else:
        if isinstance(window, str):
            window = pd.Timedelta(window) // regular
@@ -185,7 +185,7 @@ def _fitPolynomial(
            na_mask = to_fit.isna()
            to_fit[na_mask] = miss_marker
            if numba:
-                residues = to_fit.rolling(window).apply(
+                residuals = to_fit.rolling(window).apply(
                    polyRollerNumba,
                    args=(miss_marker, val_range, center_index, order),
                    raw=True,
@@ -194,18 +194,18 @@ def _fitPolynomial(
                )
                # due to a tiny bug - rolling with center=True doesnt work
                # when using numba engine.
-                residues = residues.shift(-int(center_index))
+                residuals = residuals.shift(-int(center_index))
            else:
-                residues = to_fit.rolling(window, center=True).apply(
+                residuals = to_fit.rolling(window, center=True).apply(
                    polyRoller,
                    args=(miss_marker, val_range, center_index, order),
                    raw=True,
                )
-            residues[na_mask] = np.nan
+            residuals[na_mask] = np.nan
        else:
            # we only fit fully populated intervals:
            if numba:
-                residues = to_fit.rolling(window).apply(
+                residuals = to_fit.rolling(window).apply(
                    polyRollerNoMissingNumba,
                    args=(val_range, center_index, order),
                    engine="numba",
@@ -214,18 +214,18 @@ def _fitPolynomial(
                )
                # due to a tiny bug - rolling with center=True doesnt work
                # when using numba engine.
-                residues = residues.shift(-int(center_index))
+                residuals = residuals.shift(-int(center_index))
            else:
-                residues = to_fit.rolling(window, center=True).apply(
+                residuals = to_fit.rolling(window, center=True).apply(
                    polyRollerNoMissing,
                    args=(val_range, center_index, order),
                    raw=True,
                )
-    if return_residues:
+    if return_residuals:
-        residues = to_fit - residues
+        residuals = to_fit - residuals
-    data[field] = residues
+    data[field] = residuals
    if set_flags:
        # TODO: we does not get any flags here, because of masking=field
        worst = flags[field].rolling(window, center=True, min_periods=min_periods).max()

--- a/saqc/funcs/outliers.py
+++ b/saqc/funcs/outliers.py
@@ -14,6 +14,7 @@ import numba
 import numpy as np
 import numpy.polynomial.polynomial as poly
 import pandas as pd
+import warnings
 from dios import DictOfSeries
 from outliers import smirnov_grubbs
@@ -308,8 +309,8 @@ def _expFit(
        Niveau of significance by which it is tested, if a score might be drawn from another distribution, than the
        majority of the data.
    bin_frac : {int, str}, default 10
-        Controls the binning for the histogram in the fitting step. If an integer is passed, the residues will
+        Controls the binning for the histogram in the fitting step. If an integer is passed, the residuals will
-        equidistantly be covered by `bin_frac` bins, ranging from the minimum to the maximum of the residues.
+        equidistantly be covered by `bin_frac` bins, ranging from the minimum to the maximum of the residuals.
        If a string is passed, it will be passed on to the ``numpy.histogram_bin_edges`` method.
    """
@@ -1369,6 +1370,14 @@ def flagCrossStatistics(
        The quality flags of data
        Flags values may have changed relatively to the input flags.
+    Notes
+    -----
+    The input variables dont necessarily have to be aligned. If the variables are unaligned, scoring
+    and flagging will be only performed on the subset of inices shared among all input variables.
    References
    ----------
    [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
@@ -1376,13 +1385,6 @@ def flagCrossStatistics(
    fields = toSequence(field)
-    for src in fields[1:]:
-        if (data[src].index != data[fields[0]].index).any():
-            raise ValueError(
-                f"indices of '{fields[0]}' and '{src}' are not compatibble, "
-                "please resample all variables to a common (time-)grid"
-            )
    df = data[fields].loc[data[fields].index_of("shared")].to_df()
    if isinstance(method, str):
@@ -1419,6 +1421,7 @@ def flagCrossStatistics(
        return data, flags
    for f in fields:
-        flags[mask[f], f] = flag
+        m = mask[f].reindex(index=flags[f].index, fill_value=False)
+        flags[m, f] = flag
    return data, flags
--- a/saqc/funcs/residues.py
+++ b/saqc/funcs/residues.py
@@ -21,7 +21,7 @@ from saqc.lib.tools import filterKwargs
 @register(mask=["field"], demask=[], squeeze=[])
-def calculatePolynomialResidues(
+def calculatePolynomialResiduals(
    data: DictOfSeries,
    field: str,
    flags: Flags,
@@ -31,19 +31,19 @@ def calculatePolynomialResidues(
    **kwargs
 ) -> Tuple[DictOfSeries, Flags]:
    """
-    Fits a polynomial model to the data and calculate the residues.
+    Fits a polynomial model to the data and calculate the residuals.
-    The residue  is calculated by fitting a polynomial of degree `order` to a data
+    The residual  is calculated by fitting a polynomial of degree `order` to a data
    slice of size `window`, that has x at its center.
-    Note, that calculating the residues tends to be quite costy, because a function
+    Note, that calculating the residuals tends to be quite costy, because a function
    fitting is performed for every sample. To improve performance, consider the
    following possibilities:
    In case your data is sampled at an equidistant frequency grid:
    (1) If you know your data to have no significant number of missing values,
-    or if you do not want to calculate residues for windows containing missing values
+    or if you do not want to calculate residuals for windows containing missing values
    any way, performance can be increased by setting min_periods=window.
    Note, that the initial and final window/2 values do not get fitted.
@@ -85,7 +85,7 @@ def calculatePolynomialResidues(
    data : dios.DictOfSeries
    flags : saqc.Flags
    """
-    reserved = ["residues", "set_flags"]
+    reserved = ["residuals", "set_flags"]
    filterKwargs(kwargs, reserved)
    return _fitPolynomial(
        data=data,
@@ -96,13 +96,13 @@ def calculatePolynomialResidues(
        min_periods=min_periods,
        **kwargs,
        # ctrl args
-        return_residues=True,
+        return_residuals=True,
        set_flags=True,
    )
 @register(mask=["field"], demask=[], squeeze=[])
-def calculateRollingResidues(
+def calculateRollingResiduals(
    data: DictOfSeries,
    field: str,
    flags: Flags,
@@ -146,7 +146,7 @@ def calculateRollingResidues(
    flags : saqc.Flags
        The quality flags of data
    """
-    reserved = ["return_residues", "set_flags"]
+    reserved = ["return_residuals", "set_flags"]
    kwargs = filterKwargs(kwargs, reserved)
    return _roll(
        data=data,
@@ -159,5 +159,5 @@ def calculateRollingResidues(
        **kwargs,
        # ctrl args
        set_flags=True,
-        return_residues=True,
+        return_residuals=True,
    )
--- a/saqc/funcs/rolling.py
+++ b/saqc/funcs/rolling.py
@@ -61,7 +61,7 @@ def roll(
    flags : saqc.Flags
        The quality flags of data
    """
-    reserved = ["return_residues", "set_flags"]
+    reserved = ["return_residuals", "set_flags"]
    kwargs = filterKwargs(kwargs, reserved)
    return _roll(
        data=data,
@@ -74,7 +74,7 @@ def roll(
        **kwargs,
        # ctrl args
        set_flags=True,
-        return_residues=False,
+        return_residuals=False,
    )
@@ -87,7 +87,7 @@ def _roll(
    set_flags: bool = True,
    min_periods: int = 0,
    center: bool = True,
-    return_residues=False,
+    return_residuals=False,
    **kwargs
 ):
    to_fit = data[field].copy()
@@ -153,7 +153,7 @@ def _roll(
                func
            )
-    if return_residues:
+    if return_residuals:
        means = to_fit - means
    data[field] = means

--- a/saqc/funcs/tools.py
+++ b/saqc/funcs/tools.py
@@ -334,10 +334,10 @@ def plot(
    """
    interactive = path is None
-    level = kwargs.get("flag", BAD)
+    level = kwargs.get("flag", UNFLAGGED)
    if dfilter < np.inf:
-        data = data.copy()
+        data_temp = data[field].copy()
        data.loc[flags[field] >= dfilter, field] = np.nan
    if store_kwargs is None:
@@ -374,4 +374,7 @@ def plot(
        else:
            fig.savefig(path, **store_kwargs)
+    if dfilter < np.inf:
+        data[field] = data_temp
    return data, flags
--- a/saqc/lib/docurator.py
+++ b/saqc/lib/docurator.py
+# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+import re
+FUNC_NAPOLEAN_STYLE_ORDER = [
+    "Head",
+    "Parameters",
+    "Returns",
+    "Notes",
+    "See also",
+    "Examples",
+    "References",
+]
+def doc(doc_string: str, template="saqc_methods", source="function_string"):
+    def docFunc(meth):
+        if template == "saqc_methods":
+            meth.__doc__ = saqcMethodsTemplate(doc_string, source)
+        return meth
+    return docFunc
+def getDocstringIndent(doc_string: list) -> str:
+    """returns a whitespace string matching the indent size of the passed docstring_list"""
+    regular_line = False
+    current_line = 0
+    while not regular_line:
+        # check if line is empty
+        if len(doc_string[current_line]) == 0 or re.match(
+            " *$", doc_string[current_line]
+        ):
+            current_line += 1
+        else:
+            regular_line = True
+    # get indent-string (smth. like "   ")
+    indent_str = re.match(" *", doc_string[current_line])[0]
+    return indent_str
+def getSections(doc_string: list, indent_str: str) -> dict:
+    """Returns a dictionary of sections, with section names as keys"""
+    section_lines = [0]
+    section_headings = ["Head"]
+    for k in range(len(doc_string) - 1):
+        # check if next line is an underscore line (section signator):
+        if re.match(indent_str + "-+$", doc_string[k + 1]):
+            # check if underscore length matches heading length
+            if len(doc_string[k + 1]) == len(doc_string[k]):
+                section_lines.append(k)
+                # skip leading whitespaces
+                skip = re.match("^ *", doc_string[k]).span()[-1]
+                section_headings.append(doc_string[k][skip:])
+    section_lines.append(len(doc_string))
+    section_content = [
+        doc_string[section_lines[k] : section_lines[k + 1]]
+        for k in range(len(section_lines) - 1)
+    ]
+    section_content = [clearTrailingWhitespace(p) for p in section_content]
+    sections = dict(zip(section_headings, section_content))
+    return sections
+def getParameters(section: list, indent_str: str) -> dict:
+    """Returns a dictionary of Parameter documentations, with parameter names as keys"""
+    parameter_lines = []
+    parameter_names = []
+    for k in range(len(section)):
+        # try catch a parameter definition start (implicitly assuming parameter names have no
+        # whitespaces):
+        param = re.match(indent_str + r"(\S+) *:", section[k])
+        if param:
+            parameter_lines.append(k)
+            parameter_names.append(param.group(1))
+    parameter_lines.append(len(section))
+    parameter_content = [
+        section[parameter_lines[k] : parameter_lines[k + 1]]
+        for k in range(len(parameter_lines) - 1)
+    ]
+    parameter_content = [clearTrailingWhitespace(p) for p in parameter_content]
+    parameter_dict = dict(zip(parameter_names, parameter_content))
+    return parameter_dict
+def mkParameter(
+    parameter_name: str, parameter_type: str, parameter_doc: str, indent_str: str
+) -> dict:
+    parameter_doc = parameter_doc.splitlines()
+    parameter_doc = [indent_str + " " * 4 + p for p in parameter_doc]
+    content = [indent_str + f"{parameter_name} : {parameter_type}"]
+    content += parameter_doc
+    return {parameter_name: content}
+def makeSection(section_name: str, indent_str: str, doc_content: str = None) -> dict:
+    content = [indent_str + section_name]
+    content += [indent_str + "_" * len(section_name)]
+    content += [" "]
+    if doc_content:
+        content += doc_content.splitlines()
+    return {section_name: content}
+def composeDocstring(
+    section_dict: dict, order: list = FUNC_NAPOLEAN_STYLE_ORDER
+) -> str:
+    """Compose final docstring from a sections dictionary"""
+    doc_string = []
+    section_dict = section_dict.copy()
+    for sec in order:
+        dc = section_dict.pop(sec, [])
+        doc_string += dc
+        # blank line at section end
+        if len(dc) > 0:
+            doc_string += [""]
+    return "\n".join(doc_string)
+def clearTrailingWhitespace(doc: list) -> list:
+    """Clears trailing whitespace lines"""
+    for k in range(len(doc), 0, -1):
+        if not re.match(r"^\s*$", doc[k - 1]):
+            break
+    return doc[:k]
+def saqcMethodsTemplate(doc_string: str, source="function_string"):
+    if source == "function_string":
+        doc_string = doc_string.splitlines()
+        indent_string = getDocstringIndent(doc_string)
+        sections = getSections(doc_string, indent_str=indent_string)
+        sections.pop("Returns", None)
+        returns_section = makeSection(section_name="Returns", indent_str=indent_string)
+        out_para = mkParameter(
+            parameter_name="out",
+            parameter_type="saqc.SaQC",
+            parameter_doc="An :py:meth:`saqc.SaQC` object, holding the (possibly) modified data",
+            indent_str=indent_string,
+        )
+        returns_section["Returns"] += out_para["out"]
+        sections.update(returns_section)
+        doc_string = composeDocstring(
+            section_dict=sections, order=FUNC_NAPOLEAN_STYLE_ORDER
+        )
+    return doc_string
--- a/saqc/lib/plotting.py
+++ b/saqc/lib/plotting.py
@@ -225,7 +225,7 @@ def _plotVarWithFlags(
                flags_i[~mask] = np.nan
                # Skip plot, if the test did not have no effect on the all over flagging result. This avoids
                # legend overflow
-                if ~(flags_i >= level).any():
+                if ~(flags_i > level).any():
                    continue
                # Also skip plot, if all flagged values are np.nans (to catch flag missing and masked results mainly)
@@ -254,7 +254,7 @@ def _plotVarWithFlags(
 def _plotFlags(ax, datser, flags, na_mask, level, scatter_kwargs):
-    is_flagged = flags.astype(float) >= level
+    is_flagged = flags.astype(float) > level
    is_flagged = is_flagged[~na_mask]
    is_flagged = datser[is_flagged[is_flagged].index]
    ax.scatter(is_flagged.index, is_flagged.values, **scatter_kwargs)

--- a/setup.py
+++ b/setup.py
@@ -20,20 +20,20 @@ with open("README.md", "r") as fh:
 setup(
    name="saqc",
    version=version,
-    author="Bert Palm, David Schaefer, Peter Luenenschloss, Lennard Schmidt",
+    author="Bert Palm, David Schaefer, Peter Luenenschloss, Lennart Schmidt",
    author_email="david.schaefer@ufz.de",
    description="Data quality checking and processing tool/framework",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://git.ufz.de/rdm-software/saqc",
    packages=find_packages(exclude=("tests",)),
-    python_requires=">=3.7, <3.10",
+    python_requires=">=3.7",
    install_requires=[
        "Click==8.0.*",
        "dtw==1.4.*",
        "matplotlib>=3.4,<3.6",
-        "numba==0.54.*",
+        "numba>=0.54",
-        "numpy==1.20.*",
+        "numpy==1.21.5",
        "outlier-utils==0.0.3",
        "pyarrow==6.0.*",
        "pandas==1.3.*",

--- a/sphinxdoc/Makefile
+++ b/sphinxdoc/Makefile
@@ -34,35 +34,25 @@ clean:
 # make doctest, make documentation, make clean
 doc:
-	# generate parent fake module for the functions to be documented
-	python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
 	# generate environment table from dictionary
 	python scripts/make_env_tab.py
 	@$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-	python scripts/modify_html_API.py
 	rm -f *.automodsumm
 	rm -f *.automodapi
 	rm -f moduleAPIs/*.automodsumm
 	rm -f moduleAPIs/*.automodapi
 	rm -f */*.automodsumm
-	rm -f -r coredoc
 # make documentation
 doconly:
-	# generate parent fake module for the functions to be documented
-	python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
 	# generate environment table from dictionary
 	python scripts/make_env_tab.py
 	@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-	python scripts/modify_html_API.py
 # make test, clean up
 testonly:
    # generate parent fake module for the functions to be documented
-	python scripts/make_doc_module.py -p "saqc/funcs" -sr ".." -su "funcSummaries"
-    # generate environment table from dictionary
 	python scripts/make_env_tab.py
 	@$(SPHINXBUILD) -M doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 	rm -f *.automodsumm

--- a/sphinxdoc/cook_books/OutlierDetection.rst
+++ b/sphinxdoc/cook_books/OutlierDetection.rst
@@ -16,7 +16,7 @@ The tutorial guides through the following steps:
     * :ref:`Data <cook_books/OutlierDetection:Data>`
     * :ref:`Initialisation <cook_books/OutlierDetection:Initialisation>`
-#. We will see how to apply different smoothing methods and models to the data in order to obtain usefull residue
+#. We will see how to apply different smoothing methods and models to the data in order to obtain usefull residual
   variables.
@@ -29,12 +29,12 @@ The tutorial guides through the following steps:
   * :ref:`Evaluation and Visualisation <cook_books/OutlierDetection:Visualisation>`
-#. We will see how we can obtain residues and scores from the calculated model curves.
+#. We will see how we can obtain residuals and scores from the calculated model curves.
-   * :ref:`Residues and Scores <cook_books/OutlierDetection:Residues and Scores>`
+   * :ref:`Residuals and Scores <cook_books/OutlierDetection:Residuals and Scores>`
-     * :ref:`Residues <cook_books/OutlierDetection:Residues>`
+     * :ref:`Residuals <cook_books/OutlierDetection:Residuals>`
     * :ref:`Scores <cook_books/OutlierDetection:Scores>`
     * :ref:`Optimization by Decomposition <cook_books/OutlierDetection:Optimization by Decomposition>`
@@ -218,31 +218,31 @@ To see all the results obtained so far, plotted in one figure window, we make us
   :alt:
-Residues and Scores
+Residuals and Scores
 -------------------
-Residues
+Residuals
 ^^^^^^^^
-We want to evaluate the residues of one of our models model, in order to score the outlierish-nes of every point.
+We want to evaluate the residuals of one of our models model, in order to score the outlierish-nes of every point.
 Therefor we just stick to the initially calculated rolling mean curve.
-First, we retrieve the residues via the :py:meth:`~saqc.SaQC.processGeneric` method.
+First, we retrieve the residuals via the :py:meth:`~saqc.SaQC.processGeneric` method.
 This method always comes into play, when we want to obtain variables, resulting from basic algebraic
 manipulations of one or more input variables.
-For obtaining the models residues, we just subtract the model data from the original data and assign the result
+For obtaining the models residuals, we just subtract the model data from the original data and assign the result
-of this operation to a new variable, called ``incidents_residues``. This Assignment, we, as usual,
+of this operation to a new variable, called ``incidents_residuals``. This Assignment, we, as usual,
 control via the ``target`` parameter.
 .. doctest:: exampleOD
-   >>> qc = qc.processGeneric(['incidents', 'incidents_mean'], target='incidents_residues', func=lambda x, y: x - y)
+   >>> qc = qc.processGeneric(['incidents', 'incidents_mean'], target='incidents_residuals', func=lambda x, y: x - y)
 Scores
 ^^^^^^
-Next, we score the residues simply by computing their `Z-scores <https://en.wikipedia.org/wiki/Standard_score>`_.
+Next, we score the residuals simply by computing their `Z-scores <https://en.wikipedia.org/wiki/Standard_score>`_.
 The Z-score of a point $\ ``x``\ $, relative to its surrounding $\ ``D``\ $, evaluates to $\ ``Z(x) = \frac{x - \mu(D)}{\sigma(D)}``\ $.
 So, if we would like to roll with a window of a fixed size of *27* periods through the data and calculate the *Z*\ -score
@@ -257,7 +257,7 @@ function:
 .. doctest:: exampleOD
-   >>> qc = qc.roll(field='incidents_residues', target='incidents_scores', func=z_score, window='27D')
+   >>> qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D')
 Optimization by Decomposition
@@ -277,13 +277,13 @@ Meaning that it has constant temporal distances between subsequent meassurements
 In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring
 into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the
-residues *mean* and *standard deviation* seperately:
+residuals *mean* and *standard deviation* seperately:
 .. doctest:: exampleOD
-   >>> qc = qc.roll(field='incidents_residues', target='residues_mean', window='27D', func=np.mean)
+   >>> qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean)
-   >>> qc = qc.roll(field='incidents_residues', target='residues_std', window='27D', func=np.std)
+   >>> qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std)
-   >>> qc = qc.processGeneric(field=['incidents_scores', "residues_mean", "residues_std"], target="residues_norm", func=lambda this, mean, std: (this - mean) / std)
+   >>> qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", func=lambda this, mean, std: (this - mean) / std)
 With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cook_books/OutlierDetection:Scores>`\ ,
 because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in.
@@ -297,7 +297,7 @@ We simply combine them via the
 .. doctest:: exampleOD
-   >>> qc = qc.processGeneric(field=['incidents_residues','incidents_mean','incidents_median'], target='incidents_scores', func=lambda x,y,z: abs((x-y) / z))
+   >>> qc = qc.processGeneric(field=['incidents_residuals','incidents_mean','incidents_median'], target='incidents_scores', func=lambda x,y,z: abs((x-y) / z))
 Let's have a look at the resulting scores:
@@ -376,7 +376,7 @@ In order to improve our flagging result, we could additionally assume, that the
 are those with an incidents count that is deviating by a margin of more than
 *20* from the 2 week average.
-This is equivalent to imposing the additional condition, that an outlier must relate to a sufficiently large residue.
+This is equivalent to imposing the additional condition, that an outlier must relate to a sufficiently large residual.
 Unflagging
 ^^^^^^^^^^
@@ -385,19 +385,19 @@ We can do that posterior to the preceeding flagging step, by *removing*
 some flags based on some condition.
 In order want to *unflag* those values, that do not relate to
-sufficiently large residues, we assign them the :py:const:`~saqc.constants.UNFLAGGED` flag.
+sufficiently large residuals, we assign them the :py:const:`~saqc.constants.UNFLAGGED` flag.
 Therefore, we make use of the :py:meth:`~saqc.SaQC.flagGeneric` method.
 This method usually comes into play, when we want to assign flags based on the evaluation of logical expressions.
-So, we check out, which residues evaluate to a level below *20*\ , and assign the
+So, we check out, which residuals evaluate to a level below *20*\ , and assign the
 flag value for :py:const:`~saqc.constants.UNFLAGGED`. This value defaults to
 to ``-np.inf`` in the default translation scheme, wich we selected implicitly by not specifying any special scheme in the
 generation of the :py:class:`~Core.Core.SaQC>` object in the :ref:`beginning <cook_books/OutlierDetection:Initialisation>`.
 .. doctest:: exampleOD
-   >>> qc = qc.flagGeneric(field=['incidents','incidents_residues'], target="incidents", func=lambda x,y: isflagged(x) & (y < 50), flag=-np.inf)
+   >>> qc = qc.flagGeneric(field=['incidents','incidents_residuals'], target="incidents", func=lambda x,y: isflagged(x) & (y < 50), flag=-np.inf)
 Notice, that we passed the desired flag level to the :py:attr:`flag` keyword in order to perform an
 "unflagging" instead of the usual flagging. The :py:attr:`flag` keyword can be passed to all the functions
@@ -419,11 +419,11 @@ Including multiple conditions
 If we do not want to first set flags, only to remove the majority of them in the next step, we also
 could circumvent the :ref:`unflagging <cook_books/OutlierDetection:Unflagging>` step, by adding to the call to
-:py:meth:`~saqc.SaQC.flagRange` the condition for the residues having to be above *20*
+:py:meth:`~saqc.SaQC.flagRange` the condition for the residuals having to be above *20*
 .. doctest:: exampleOD
-   >>> qc = qc.flagGeneric(field=['incidents_scores', 'incidents_residues'], target='incidents', func=lambda x, y: (x > 3) & (y > 20))
+   >>> qc = qc.flagGeneric(field=['incidents_scores', 'incidents_residuals'], target='incidents', func=lambda x, y: (x > 3) & (y > 20))
   >>> qc.plot("incidents") # doctest: +SKIP

--- a/sphinxdoc/funcSummaries/residues.rst
+++ b/sphinxdoc/funcSummaries/residues.rst
@@ -2,8 +2,8 @@
 ..
 .. SPDX-License-Identifier: GPL-3.0-or-later
-residues
+residuals
-========
+=========
@@ -11,5 +11,5 @@ residues
 .. autosummary::
-   ~SaQC.calculatePolynomialResidues
+   ~SaQC.calculatePolynomialResiduals
-   ~SaQC.calculateRollingResidues
+   ~SaQC.calculateRollingResiduals
--- a/sphinxdoc/index.rst
+++ b/sphinxdoc/index.rst
@@ -59,7 +59,7 @@ Getting Started
   drift <funcSummaries/drift>
   curvefit <funcSummaries/curvefit>
   interpolation <funcSummaries/interpolation>
-   residues <funcSummaries/residues>
+   residuals <funcSummaries/residuals>
   tools <funcSummaries/tools>
   flagtools <funcSummaries/flagtools>
   rolling <funcSummaries/rolling>

--- a/sphinxdoc/misc/title.rst
+++ b/sphinxdoc/misc/title.rst
@@ -80,6 +80,6 @@ Features
        * define and use custom schemes to translate your flags to and from SaQC
    * - |sacProc|
      - * modify your data by :ref:`interpolations <cook_books/DataRegularisation:Interpolation>`, corrections and :ref:`transformations <cook_books/DataRegularisation:Aggregation>`
-        * calculate data products, such as :ref:`residues or outlier scores <cook_books/OutlierDetection:Residues and Scores>`
+        * calculate data products, such as :ref:`residuals or outlier scores <cook_books/OutlierDetection:Residuals and Scores>`
    * - |sacMV|
      - * apply :ref:`multivariate flagging functions <cook_books/MultivariateFlagging:Multivariate Flagging>`
--- a/sphinxdoc/moduleAPIs/saqcFuncClass.rst
+++ b/sphinxdoc/moduleAPIs/saqcFuncClass.rst
-.. SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
-..
-.. SPDX-License-Identifier: GPL-3.0-or-later
-SaQC
-====
-.. automodapi:: sphinxdoc.coredoc
--- a/sphinxdoc/scripts/make_doc_module.py
+++ b/sphinxdoc/scripts/make_doc_module.py
-# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-import ast
-import os
-import click
-import pkgutil
-import shutil
-import re
-from collections import OrderedDict
-import pickle
-new_line_re = "(\r\n|[\r\n])"
-def rm_section(dcstring, section, _return_section=False):
-    """
-    Detects a section in a docstring and (default) removes it, or (_return_section=True) returns it
-    """
-    section_re = f"{new_line_re}(?P<s_name>[^\n\r]{{2,}}){new_line_re}(?P<s_dash>-{{2,}}){new_line_re}"
-    triggers = re.finditer(section_re, dcstring)
-    matches = [
-        (trigger.groupdict()["s_name"], trigger.span())
-        for trigger in triggers
-        if len(trigger.groupdict()["s_name"]) == len(trigger.groupdict()["s_dash"])
-    ] + [(None, (len(dcstring), None))]
-    sections = [m[0] for m in matches]
-    starts = ends = 0
-    if section in sections:
-        i = sections.index(section)
-        starts = matches[i][1][0]
-        ends = matches[i + 1][1][0]
-    if _return_section:
-        return dcstring[starts:ends]
-    else:
-        return dcstring[:starts] + dcstring[ends:]
-def rm_parameter(dcstring, parameter):
-    """
-    remove a parameters documentation from a function docstring
-    """
-    paramatches = _get_paramatches(dcstring)
-    start = end = 0
-    for p in paramatches:
-        if parameter == p.groupdict()["paraname"]:
-            start = re.search(p[0], dcstring).span()[0]
-            try:
-                end = dcstring.find(next(paramatches)[0])
-            except StopIteration:
-                end = len(re.sub(new_line_re + "$", "", dcstring))
-    return dcstring[0:start] + dcstring[end:]
-def get_parameter(dcstr):
-    """
-    returns the list of parameters and their defaults, documented in a docstrings Parameters section
-    """
-    paramatches = _get_paramatches(dcstr)
-    return [
-        (p.groupdict()["paraname"], p.groupdict()["paradefaults"]) for p in paramatches
-    ]
-def _get_paramatches(dcstr):
-    parastr = rm_section(dcstr, "Parameters", _return_section=True)
-    match_re = f"{new_line_re}(?P<paraname>[\S]+) : [^\n\r]*(default (?P<paradefaults>[^\n\r]*))?"
-    return re.finditer(match_re, parastr)
-def parse_func_dcstrings(m_paths):
-    func_dict = {}
-    for m in m_paths:
-        with open(m) as f:
-            lines = f.readlines()
-        module_ast = ast.parse("".join(lines))
-        funcs = [node for node in module_ast.body if isinstance(node, ast.FunctionDef)]
-        for func in funcs:
-            dcstr = ast.get_docstring(func)
-            if func.name[0] == "_" or (dcstr is None):
-                continue
-            dcstr = rm_section(dcstr, "Returns")
-            dcstr = rm_parameter(dcstr, "data")
-            dcstr = rm_parameter(dcstr, "flags")
-            parameters = get_parameter(dcstr)
-            parameters = [f"{p[0]}={p[1]}" if p[1] else p[0] for p in parameters]
-            signature = f"def {func.name}({', '.join(parameters)}):"
-            # get @register module registration if present
-            reg_module = None
-            r = [d for d in func.decorator_list if d.func.id == "register"]
-            if r:
-                rm = [kw.value.s for kw in r[0].keywords if kw.arg == "module"]
-                if rm:
-                    reg_module = rm[0]
-            func_dict[f"{os.path.splitext(os.path.basename(m))[0]}.{func.name}"] = (
-                signature,
-                dcstr,
-                reg_module,
-            )
-    return func_dict
-def parse_module_dcstrings(m_paths):
-    mod_dict = {}
-    for m in m_paths:
-        with open(m) as f:
-            lines = f.readlines()
-        mod_docstr = ast.get_docstring(ast.parse("".join(lines)))
-        mod_dict[f"{os.path.splitext(os.path.basename(m))[0]}"] = mod_docstr or ""
-    return mod_dict
-def make_doc_module(targetpath, func_dict, doc_mod_structure):
-    for doc_mod in [
-        d for d in doc_mod_structure.keys() if not re.search("_dcstring$", d)
-    ]:
-        with open(os.path.join(targetpath, f"{doc_mod}.py"), "w+") as f:
-            mod_string = [
-                '"""\n' + doc_mod_structure.get(doc_mod + "_dcstring", "") + '\n"""'
-            ]
-            mod_funcs = doc_mod_structure[doc_mod]
-            for func in mod_funcs:
-                mod_string.append(func_dict[func][0])
-                mod_string.append('    """')
-                # indent the docstring:
-                indented_doc_string = "\n".join(
-                    [f"    {l}" for l in func_dict[func][1].splitlines()]
-                )
-                mod_string.append(indented_doc_string)
-                mod_string.append('    """')
-                mod_string.append("    pass")
-                mod_string.append("")
-                mod_string.append("")
-            f.write("\n".join(mod_string))
-    return 0
-def make_doc_core(sphinxroot, func_dict, doc_mod_structure):
-    targetfolder = os.path.join(sphinxroot, "sphinxdoc/coredoc")
-    coresource = os.path.join(sphinxroot, os.path.normpath("saqc/core/core.py"))
-    if os.path.isdir(targetfolder):
-        shutil.rmtree(targetfolder)
-    os.makedirs(targetfolder, exist_ok=True)
-    # parse real core.py
-    with open(coresource) as f:
-        corelines = f.readlines()
-    # find SaQC class def
-    coreast = ast.parse("".join(corelines))
-    startline = None
-    endline = None
-    for node in coreast.body:
-        if isinstance(node, ast.ClassDef):
-            if node.name == "SaQC":
-                startline = node.lineno
-            elif startline and (not endline):
-                endline = node.lineno
-    start = corelines[: endline - 1]
-    end = corelines[endline - 1 :]
-    tab = "    "
-    for doc_mod in [
-        d for d in doc_mod_structure.keys() if not re.search("_dcstring$", d)
-    ]:
-        with open(os.path.join(targetfolder, f"core.py"), "w+") as f:
-            mod_string = []
-            mod_funcs = doc_mod_structure[doc_mod]
-            for func in mod_funcs:
-                def_string = func_dict[func][0]
-                i_pos = re.match("def [^ ]*\(", def_string).span()[-1]
-                def_string = def_string[:i_pos] + "self, " + def_string[i_pos:]
-                def_string = tab + def_string
-                mod_string.append(def_string)
-                mod_string.append(2 * tab + '"""')
-                # indent the docstring:
-                indented_doc_string = "\n".join(
-                    [2 * tab + f"{l}" for l in func_dict[func][1].splitlines()]
-                )
-                mod_string.append(indented_doc_string)
-                mod_string.append(2 * tab + '"""')
-                mod_string.append(2 * tab + "pass")
-                mod_string.append("")
-                mod_string.append("")
-            newcore = (
-                "".join(start) + "\n" + "\n".join(mod_string) + "\n" + "".join(end)
-            )
-            f.write(newcore)
-        with open(os.path.join(targetfolder, f"__init__.py"), "w+") as f:
-            init_content = [
-                "# ! /usr/bin/env python",
-                "# -*- coding: utf-8 -*-",
-                "from sphinxdoc.coredoc.core import SaQC",
-            ]
-            f.write("\n".join(init_content))
-    return 0
-def makeModuleAPIs(modules, folder_path="moduleAPIs", pck_path="Functions"):
-    f_path = os.path.abspath(folder_path)
-    for m in modules:
-        lines = []
-        lines += [m]
-        lines += ["=" * len(m)]
-        lines += [""]
-        lines += [f".. automodapi:: {pck_path}.{m}"]
-        lines += [" " * 3 + ":no-heading:"]
-        with open(os.path.join(f_path, f"{pck_path}{m}.rst"), "w") as f:
-            for l in lines:
-                f.write(l + "\n")
-    pass
-def makeModuleSummaries(modules, folder_path="funcSummaries"):
-    f_path = os.path.abspath(folder_path)
-    if os.path.isdir(f_path):
-        shutil.rmtree(f_path)
-    os.makedirs(f_path, exist_ok=True)
-    for m in [m for m in modules.keys() if m.split("_")[-1] != "dcstring"]:
-        lines = []
-        lines += [m]
-        lines += ["=" * len(m)]
-        lines += [""]
-        lines += [modules[m + "_dcstring"]]
-        lines += [""]
-        lines += [f".. currentmodule:: saqc", ""]
-        lines += [".. autosummary::", ""]
-        for func in modules[m]:
-            lines += [3 * " " + f"~SaQC.{func.split('.')[-1]}"]
-        with open(os.path.join(f_path, f"{m}.rst"), "w") as f:
-            for l in lines:
-                f.write(l + "\n")
-    pass
-@click.command()
-@click.option(
-    "-p",
-    "--pckpath",
-    type=str,
-    required=True,
-    default="saqc/funcs",
-    help="Relative path to the package to be documented (relative to sphinx root).",
-)
-@click.option(
-    "-sr",
-    "--sphinxroot",
-    type=str,
-    required=True,
-    default="../..",
-    help="Relative path to the sphinx root.",
-)
-@click.option(
-    "-su",
-    "--summaries",
-    type=str,
-    required=True,
-    default="funcSummaries",
-    help="Target path for summaries.",
-)
-def main(pckpath, sphinxroot, summaries):
-    root_path = os.path.abspath(sphinxroot)
-    pkg_path = os.path.join(root_path, pckpath)
-    coretrg = os.path.join(sphinxroot, "sphinxdoc/coredoc")
-    modules = []
-    # collect modules
-    for _, modname, _ in pkgutil.walk_packages(path=[pkg_path], onerror=lambda x: None):
-        modules.append(modname)
-    # if os.path.isdir(coretrg):
-    #    shutil.rmtree(coretrg)
-    # os.makedirs(coretrg, exist_ok=True)
-    # parse all the functions
-    module_paths = [os.path.join(pkg_path, f"{m}.py") for m in modules]
-    mod_dict = parse_module_dcstrings(module_paths)
-    mod_dict = dict(
-        zip([k + "_dcstring" for k in mod_dict.keys()], list(mod_dict.values()))
-    )
-    func_dict = parse_func_dcstrings(module_paths)
-    # module docs
-    doc_struct = {m: [] for m in modules}
-    for dm in func_dict.keys():
-        module = re.search("([^ .]*)\.[^ ]*$", dm).group(1)
-        doc_struct[module].append(dm)
-    doc_struct.update(mod_dict)
-    makeModuleSummaries(doc_struct, summaries)
-    doc_mod_structure = {"saqc": [f for f in func_dict.keys()], "saqc_dcstring": ""}
-    make_doc_core(root_path, func_dict, doc_mod_structure)
-if __name__ == "__main__":
-    main()
--- a/sphinxdoc/scripts/make_doc_rst.py
+++ b/sphinxdoc/scripts/make_doc_rst.py
-# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-import os
-import click
-import pkgutil
-import ast
-import shutil
-def parse_imports(path):
-    modules = []
-    file = open(path)
-    lines = file.readlines()
-    for node in ast.iter_child_nodes(ast.parse("".join(lines))):
-        if isinstance(node, ast.ImportFrom) | isinstance(node, ast.Import):
-            modules += [x.name for x in node.names] + [
-                x.asname for x in node.names if x.asname is not None
-            ]
-    file.close()
-    return modules
-@click.command()
-@click.option(
-    "-p",
-    "--pckpath",
-    type=str,
-    required=True,
-    default="saqc/funcs",
-    help="Relative path to the package to be documented (relative to sphinx root).",
-)
-@click.option(
-    "-t",
-    "--targetpath",
-    type=str,
-    required=True,
-    default="sphinxdoc/internal_doc_rst",
-    help="Output folder path (relative to sphinx root). Will be overridden if already existent.",
-)
-@click.option(
-    "-sr",
-    "--sphinxroot",
-    type=str,
-    required=True,
-    default="..",
-    help="Relative path to the sphinx root.",
-)
-def main(pckpath, targetpath, sphinxroot):
-    root_path = os.path.abspath(sphinxroot)
-    targetpath = os.path.join(root_path, targetpath)
-    pkg_path = os.path.join(root_path, pckpath)
-    modules = []
-    for _, modname, _ in pkgutil.walk_packages(path=[pkg_path], onerror=lambda x: None):
-        modules.append(modname)
-    emptyline = [""]
-    # clear target directory:
-    if os.path.isdir(targetpath):
-        shutil.rmtree(targetpath)
-    os.mkdir(targetpath)
-    for module in modules:
-        imports = parse_imports(os.path.join(pkg_path, f"{module}.py"))
-        skiplist = [f"\t:skip: {k}" for k in imports]
-        section = [module] + ["=" * len(module)]
-        automodapi_directive = [
-            ".. automodapi:: " + pckpath.replace("/", ".") + "." + module
-        ]
-        no_heading = [f"\t:no-heading:"]
-        to_write = (
-            emptyline
-            + section
-            + emptyline
-            + automodapi_directive
-            + skiplist
-            + no_heading
-        )
-        to_write = "".join([f"{k}\r\n" for k in to_write])
-        with open(os.path.join(targetpath, f"{module}.rst"), "w+") as f:
-            f.write(to_write)
-if __name__ == "__main__":
-    main()
--- a/sphinxdoc/scripts/modify_html_API.py
+++ b/sphinxdoc/scripts/modify_html_API.py
-# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-import os
-import click
-import time
-@click.command()
-@click.option(
-    "-src",
-    "--source",
-    type=str,
-    required=True,
-    default="sphinxdoc.coredoc.SaQC",
-)
-@click.option(
-    "-trg",
-    "--target",
-    type=str,
-    required=True,
-    default="saqc.SaQC",
-)
-@click.option(
-    "-br",
-    "--builddir",
-    type=str,
-    required=True,
-    default="_build",
-    help="Relative path to the build dir.",
-)
-def main(source, target, builddir):
-    builddir = os.path.abspath(builddir)
-    apidir = os.path.join(builddir, os.path.normpath("html/_api"))
-    os.remove(os.path.join(apidir, target + ".html"))
-    with open(os.path.join(apidir, source + ".html"), "r") as f:
-        APIstring = f.read()
-    # APIstring = APIstring.replace('sphinxdoc.coredoc.core', 'saqc')
-    APIstring = APIstring.replace(source, target)
-    with open(os.path.join(apidir, target + ".html"), "w+") as f:
-        f.write(APIstring)
-if __name__ == "__main__":
-    main()
--- a/tests/funcs/test_modelling.py
+++ b/tests/funcs/test_modelling.py
@@ -15,7 +15,7 @@ import dios
 from saqc import BAD, UNFLAGGED
 from saqc.core import initFlagsLike
 from saqc.funcs.tools import maskTime
-from saqc.funcs.residues import calculatePolynomialResidues, calculateRollingResidues
+from saqc.funcs.residuals import calculatePolynomialResiduals, calculateRollingResiduals
 from tests.fixtures import *
@@ -30,19 +30,19 @@ def test_modelling_polyFit_forRegular(dat):
    data = data + 10 * np.sin(np.arange(0, len(data.indexes[0])))
    data = dios.DictOfSeries(data)
    flags = initFlagsLike(data)
-    result1, _ = calculatePolynomialResidues(data, "data", flags, 11, 2, numba=False)
+    result1, _ = calculatePolynomialResiduals(data, "data", flags, 11, 2, numba=False)
-    result2, _ = calculatePolynomialResidues(data, "data", flags, 11, 2, numba=True)
+    result2, _ = calculatePolynomialResiduals(data, "data", flags, 11, 2, numba=True)
    assert (result1["data"] - result2["data"]).abs().max() < 10**-10
-    result3, _ = calculatePolynomialResidues(
+    result3, _ = calculatePolynomialResiduals(
        data, "data", flags, "110min", 2, numba=False
    )
    assert result3["data"].equals(result1["data"])
-    result4, _ = calculatePolynomialResidues(
+    result4, _ = calculatePolynomialResiduals(
        data, "data", flags, 11, 2, numba=True, min_periods=11
    )
    assert (result4["data"] - result2["data"]).abs().max() < 10**-10
    data.iloc[13:16] = np.nan
-    result5, _ = calculatePolynomialResidues(
+    result5, _ = calculatePolynomialResiduals(
        data, "data", flags, 11, 2, numba=True, min_periods=9
    )
    assert result5["data"].iloc[10:19].isna().all()
@@ -55,7 +55,7 @@ def test_modelling_rollingMean_forRegular(dat):
    )
    data = dios.DictOfSeries(data)
    flags = initFlagsLike(data)
-    calculateRollingResidues(
+    calculateRollingResiduals(
        data,
        "data",
        flags,
@@ -64,7 +64,7 @@ def test_modelling_rollingMean_forRegular(dat):
        min_periods=0,
        center=True,
    )
-    calculateRollingResidues(
+    calculateRollingResiduals(
        data,
        "data",
        flags,
No results found