Skip to content
Snippets Groups Projects
Commit 410a3ea7 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

docstrings in constants.py.

flagConstants now also take int's in window.
parent 96af27c4
No related branches found
No related tags found
7 merge requests!685Release 2.4,!684Release 2.4,!567Release 2.2.1,!566Release 2.2,!501Release 2.1,!372fix doctest snippets,!355docstring cleanup - part1
#! /usr/bin/env python #! /usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from operator import mod
from typing import Tuple from typing import Tuple
import numpy as np import numpy as np
...@@ -15,7 +14,6 @@ from saqc.core import register, Flags ...@@ -15,7 +14,6 @@ from saqc.core import register, Flags
from saqc.core.register import flagging from saqc.core.register import flagging
from saqc.lib.ts_operators import varQC from saqc.lib.ts_operators import varQC
from saqc.lib.tools import customRoller, getFreqDelta, statPass from saqc.lib.tools import customRoller, getFreqDelta, statPass
from saqc.lib.types import FreqString
@flagging() @flagging()
...@@ -24,46 +22,53 @@ def flagConstants( ...@@ -24,46 +22,53 @@ def flagConstants(
field: str, field: str,
flags: Flags, flags: Flags,
thresh: float, thresh: float,
window: FreqString, window: str,
flag: float = BAD, flag: float = BAD,
**kwargs **kwargs
) -> Tuple[DictOfSeries, Flags]: ) -> Tuple[DictOfSeries, Flags]:
""" """
This functions flags plateaus/series of constant values of length `window` if Flag constant data.
their maximum total change is smaller than thresh.
Flags plateaus of constant data if their maximum total change in
a rolling window does not exceed a certain threshold.
Function flags plateaus/series of constant values. Any interval of values y(t),..y(t+n) is flagged, if: Any interval of values y(t),..y(t+n) is flagged, if:
- (1): n > `window`
(1) n > `window` - (2): |(y(t + i) - (t + j)| < `thresh`, for all i,j in [0, 1, ..., n]
(2) |(y(t + i) - (t + j)| < `thresh`, for all i,j in [0, 1, ..., n]
Flag values are (semi-)constant.
Parameters Parameters
---------- ----------
data : dios.DictOfSeries data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data. The data container.
field : str field : str
Name of the column, holding the data-to-be-flagged. A column in flags and data.
flags : saqc.Flags flags : saqc.Flags
Container to store quality flags to data. The flags container.
thresh : float thresh : float
Upper bound for the maximum total change of an interval to be flagged constant. Maximum total change allowed per window.
window : str
Lower bound for the size of an interval to be flagged constant. window : str | int
Size of the moving window. This is the number of observations used
for calculating the statistic. Each window will be a fixed size.
If its an offset then this will be the time period of each window.
Each window will be a variable sized based on the observations included
in the time-period.
flag : float, default BAD flag : float, default BAD
flag to set. Flag to set.
Returns Returns
------- -------
data : dios.DictOfSeries data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data. Unmodified data container
flags : saqc.Flags flags : saqc.Flags
The flags object, holding flags and additional informations related to `data`. The flags container
Flags values may have changed, relatively to the flags input.
""" """
if not isinstance(window, str): if not isinstance(window, (str, int)):
raise TypeError("window must be offset string.") raise TypeError("window must be offset string or int.")
d = data[field] d = data[field]
# min_periods=2 ensures that at least two non-nan values are present # min_periods=2 ensures that at least two non-nan values are present
...@@ -87,7 +92,7 @@ def flagByVariance( ...@@ -87,7 +92,7 @@ def flagByVariance(
data: DictOfSeries, data: DictOfSeries,
field: str, field: str,
flags: Flags, flags: Flags,
window: FreqString = "12h", window: str,
thresh: float = 0.0005, thresh: float = 0.0005,
maxna: int = None, maxna: int = None,
maxna_group: int = None, maxna_group: int = None,
...@@ -95,7 +100,12 @@ def flagByVariance( ...@@ -95,7 +100,12 @@ def flagByVariance(
**kwargs **kwargs
) -> Tuple[DictOfSeries, Flags]: ) -> Tuple[DictOfSeries, Flags]:
""" """
Function flags plateaus/series of constant values. Any interval of values y(t),..y(t+n) is flagged, if: Flag constant data calculated by its variance.
Flags plateaus of constant data if the variance in a rolling window does not
exceed a certain threshold.
Any interval of values y(t),..y(t+n) is flagged, if:
(1) n > `window` (1) n > `window`
(2) variance(y(t),...,y(t+n) < `thresh` (2) variance(y(t),...,y(t+n) < `thresh`
...@@ -103,30 +113,40 @@ def flagByVariance( ...@@ -103,30 +113,40 @@ def flagByVariance(
Parameters Parameters
---------- ----------
data : dios.DictOfSeries data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data. The data container.
field : str field : str
The fieldname of the column, holding the data-to-be-flagged. A column in flags and data.
flags : saqc.Flags flags : saqc.Flags
Container to store quality flags to data. The flags container.
window : str
Only intervals of minimum size "window" have the chance to get flagged as constant intervals window : str | int
thresh : float Size of the moving window. This is the number of observations used
The upper bound, the variance of an interval must not exceed, if the interval wants to be flagged a plateau. for calculating the statistic. Each window will be a fixed size.
If its an offset then this will be the time period of each window.
Each window will be a variable sized based on the observations included
in the time-period.
thresh : float, default 0.0005
Maximum total variance allowed per window.
maxna : int, default None maxna : int, default None
Maximum number of NaNs tolerated in an interval. If more NaNs are present, the Maximum number of NaNs allowed in window.
interval is not flagged as plateau. If more NaNs are present, the window is not flagged.
maxna_group : int, default None maxna_group : int, default None
Same as `maxna` but for consecutive NaNs. Same as `maxna` but for consecutive NaNs.
flag : float, default BAD flag : float, default BAD
flag to set. Flag to set.
Returns Returns
------- -------
data : dios.DictOfSeries data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data. Unmodified data container
flags : saqc.Flags flags : saqc.Flags
The flags object, holding flags and additional informations related to `data`. The flags container
Flags values may have changed, relatively to the flags input.
""" """
dataseries = data[field] dataseries = data[field]
delta = getFreqDelta(dataseries.index) delta = getFreqDelta(dataseries.index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment