From 56482aeb37cbe0397f095a9ec1a5e3cde7da54ee Mon Sep 17 00:00:00 2001
From: Peter Luenenschloss <peter.luenenschloss@ufz.de>
Date: Mon, 3 Aug 2020 14:40:10 +0200
Subject: [PATCH] flag generic documented

---
 saqc/funcs/functions.py | 85 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py
index 2b46b7f97..011e4426c 100644
--- a/saqc/funcs/functions.py
+++ b/saqc/funcs/functions.py
@@ -63,6 +63,17 @@ def procGeneric(data, field, flagger, func, nodata=np.nan, **kwargs):
     transport the name of the test function (here: `procGeneric`)
     into the flagger, but as we don't set flags here, we simply
     ignore them
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, where you want the result from the generic expressions evaluation to be projected
+        to.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional Informations related to `data`.
+
     """
     data[field] = _execGeneric(flagger, data, func, field, nodata).squeeze()
     # NOTE:
@@ -83,6 +94,80 @@ def procGeneric(data, field, flagger, func, nodata=np.nan, **kwargs):
 
 @register
 def flagGeneric(data, field, flagger, func, nodata=np.nan, **kwargs):
+    """
+    a function to flag a data column by evaluation of a generic expression.
+
+    The expression can depend on any of the fields present in data.
+
+    Formally, what the function does, is the following:
+
+    Let X be an expression, depending on fields f_1, f_2,...f_K, (X = X(f_1, f_2,...f_K))
+    Than for every timestamp t_i in data[field]:
+    data[field][t_i] is flagged if X(f_1, f_2, ..., f_K) is True.
+
+    Note, that all value series included in the expression to evaluate must be labeled identically to field.
+
+    Note, that the expression is passed in the form of a Callable and that this callables variable names are
+    interpreted as actual names in the data header. See the examples section to get an idea.
+
+    Note, that all the numpy functions are available within the generic expressions.
+
+    Parameters
+    ----------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    field : str
+        The fieldname of the column, where you want the result from the generic expressions evaluation to be projected
+        to.
+    flagger : saqc.flagger
+        A flagger object, holding flags and additional Informations related to `data`.
+    func : Callable
+        The expression that is to be evaluated is passed in form of a callable, with parameter names that will be
+        interpreted as data column entries. The Callable must return an boolen array like.
+        See the examples section to learn more.
+    nodata : any, default np.nan
+        The value that indicates missing/invalid data
+
+    Returns
+    -------
+    data : dios.DictOfSeries
+        A dictionary of pandas.Series, holding all the data.
+    flagger : saqc.flagger
+        The flagger object, holding flags and additional Informations related to `data`.
+        Flags values may have changed relatively to the flagger input.
+
+    Examples
+    --------
+    Some examples on what to pass to the func parameter:
+    To flag the variable `field`, if the sum of the variables
+    "temperature" and "uncertainty" is below zero, you would pass the function:
+
+    >>> lambda temperature, uncertainty: temperature + uncertainty < 0
+
+    There is the reserved name 'This', that always refers to `field`. So, to flag field if field is negative, you can
+    also pass:
+
+    >>> lambda this: this < 0
+
+    If you want to make dependent the flagging from flags already present in the data, you can use the built-in
+    ``isflagged`` method. For example, to flag the 'temperature', if 'level' is flagged, you would use:
+
+    >>> lambda level: isflagged(level)
+
+    You can furthermore specify a flagging level, you want to compare the flags to. For example, for flagging
+    'temperature', if 'level' is flagged at a level named 'doubtfull' or worse, use:
+
+    >>> lambda level: isflagged(level, flag='doubtfull', comparator='<=')
+
+    If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
+    and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
+
+    >>> lambda level: isflagged(level, flag=UNFLAGGED, comparator='==')
+
+    Your expression also is allowed to include pandas and numpy functions
+
+    >>> lambda level: np.sqrt(level) > 7
+    """
     # NOTE:
     # The naming of the func parameter is pretty confusing
     # as it actually holds the result of a generic expression
-- 
GitLab