From 56482aeb37cbe0397f095a9ec1a5e3cde7da54ee Mon Sep 17 00:00:00 2001 From: Peter Luenenschloss <peter.luenenschloss@ufz.de> Date: Mon, 3 Aug 2020 14:40:10 +0200 Subject: [PATCH] flag generic documented --- saqc/funcs/functions.py | 85 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/saqc/funcs/functions.py b/saqc/funcs/functions.py index 2b46b7f97..011e4426c 100644 --- a/saqc/funcs/functions.py +++ b/saqc/funcs/functions.py @@ -63,6 +63,17 @@ def procGeneric(data, field, flagger, func, nodata=np.nan, **kwargs): transport the name of the test function (here: `procGeneric`) into the flagger, but as we don't set flags here, we simply ignore them + + Parameters + ---------- + data : dios.DictOfSeries + A dictionary of pandas.Series, holding all the data. + field : str + The fieldname of the column, where you want the result from the generic expressions evaluation to be projected + to. + flagger : saqc.flagger + A flagger object, holding flags and additional Informations related to `data`. + """ data[field] = _execGeneric(flagger, data, func, field, nodata).squeeze() # NOTE: @@ -83,6 +94,80 @@ def procGeneric(data, field, flagger, func, nodata=np.nan, **kwargs): @register def flagGeneric(data, field, flagger, func, nodata=np.nan, **kwargs): + """ + a function to flag a data column by evaluation of a generic expression. + + The expression can depend on any of the fields present in data. + + Formally, what the function does, is the following: + + Let X be an expression, depending on fields f_1, f_2,...f_K, (X = X(f_1, f_2,...f_K)) + Than for every timestamp t_i in data[field]: + data[field][t_i] is flagged if X(f_1, f_2, ..., f_K) is True. + + Note, that all value series included in the expression to evaluate must be labeled identically to field. + + Note, that the expression is passed in the form of a Callable and that this callables variable names are + interpreted as actual names in the data header. See the examples section to get an idea. + + Note, that all the numpy functions are available within the generic expressions. + + Parameters + ---------- + data : dios.DictOfSeries + A dictionary of pandas.Series, holding all the data. + field : str + The fieldname of the column, where you want the result from the generic expressions evaluation to be projected + to. + flagger : saqc.flagger + A flagger object, holding flags and additional Informations related to `data`. + func : Callable + The expression that is to be evaluated is passed in form of a callable, with parameter names that will be + interpreted as data column entries. The Callable must return an boolen array like. + See the examples section to learn more. + nodata : any, default np.nan + The value that indicates missing/invalid data + + Returns + ------- + data : dios.DictOfSeries + A dictionary of pandas.Series, holding all the data. + flagger : saqc.flagger + The flagger object, holding flags and additional Informations related to `data`. + Flags values may have changed relatively to the flagger input. + + Examples + -------- + Some examples on what to pass to the func parameter: + To flag the variable `field`, if the sum of the variables + "temperature" and "uncertainty" is below zero, you would pass the function: + + >>> lambda temperature, uncertainty: temperature + uncertainty < 0 + + There is the reserved name 'This', that always refers to `field`. So, to flag field if field is negative, you can + also pass: + + >>> lambda this: this < 0 + + If you want to make dependent the flagging from flags already present in the data, you can use the built-in + ``isflagged`` method. For example, to flag the 'temperature', if 'level' is flagged, you would use: + + >>> lambda level: isflagged(level) + + You can furthermore specify a flagging level, you want to compare the flags to. For example, for flagging + 'temperature', if 'level' is flagged at a level named 'doubtfull' or worse, use: + + >>> lambda level: isflagged(level, flag='doubtfull', comparator='<=') + + If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED + and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example. + + >>> lambda level: isflagged(level, flag=UNFLAGGED, comparator='==') + + Your expression also is allowed to include pandas and numpy functions + + >>> lambda level: np.sqrt(level) > 7 + """ # NOTE: # The naming of the func parameter is pretty confusing # as it actually holds the result of a generic expression -- GitLab