Skip to content
Snippets Groups Projects
Commit a13af8b8 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

fixed breaks.py

parent 6fefd4dd
No related branches found
No related tags found
4 merge requests!271Static expansion of regular expressions,!260Follow-Up Translations,!237Flagger Translations,!232WIP: Fuzzy testing
...@@ -12,6 +12,7 @@ from typing import Tuple ...@@ -12,6 +12,7 @@ from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import pandas.tseries.frequencies
from dios import DictOfSeries from dios import DictOfSeries
...@@ -27,7 +28,7 @@ def flagMissing( ...@@ -27,7 +28,7 @@ def flagMissing(
data: DictOfSeries, data: DictOfSeries,
field: ColumnName, field: ColumnName,
flagger: Flagger, flagger: Flagger,
nodata: float=np.nan, nodata: float = np.nan,
**kwargs **kwargs
) -> Tuple[DictOfSeries, Flagger]: ) -> Tuple[DictOfSeries, Flagger]:
""" """
...@@ -59,7 +60,7 @@ def flagMissing( ...@@ -59,7 +60,7 @@ def flagMissing(
else: else:
mask = datacol == nodata mask = datacol == nodata
flagger = flagger.setFlags(field, loc=mask, **kwargs) flagger[mask, field] = kwargs['flag']
return data, flagger return data, flagger
...@@ -76,7 +77,7 @@ def flagIsolated( ...@@ -76,7 +77,7 @@ def flagIsolated(
The function flags arbitrary large groups of values, if they are surrounded by sufficiently The function flags arbitrary large groups of values, if they are surrounded by sufficiently
large data gaps. large data gaps.
A gap is a timespan containing either no data or invalid (usually `nan`) and flagged data only. A gap is a timespan containing either no data or data invalid only (usually `nan`) .
Parameters Parameters
---------- ----------
...@@ -85,7 +86,7 @@ def flagIsolated( ...@@ -85,7 +86,7 @@ def flagIsolated(
field : str field : str
The fieldname of the column, holding the data-to-be-flagged. The fieldname of the column, holding the data-to-be-flagged.
flagger : saqc.flagger.Flagger flagger : saqc.flagger.Flagger
A flagger object, holding flags and additional informations related to `data`. A flagger object
gap_window : str gap_window : str
The minimum size of the gap before and after a group of valid values, making this group considered an The minimum size of the gap before and after a group of valid values, making this group considered an
isolated group. See condition (2) and (3) isolated group. See condition (2) and (3)
...@@ -98,8 +99,7 @@ def flagIsolated( ...@@ -98,8 +99,7 @@ def flagIsolated(
data : dios.DictOfSeries data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data. A dictionary of pandas.Series, holding all the data.
flagger : saqc.flagger.Flagger flagger : saqc.flagger.Flagger
The flagger object, holding flags and additional Informations related to `data`. The flagger object, holding flags and additional information related to `data`.
Flags values may have changed relatively to the flagger input.
Notes Notes
----- -----
...@@ -107,8 +107,8 @@ def flagIsolated( ...@@ -107,8 +107,8 @@ def flagIsolated(
is considered to be isolated, if: is considered to be isolated, if:
1. :math:`t_{k+1} - t_n <` `group_window` 1. :math:`t_{k+1} - t_n <` `group_window`
2. None of the :math:`x_j` with :math:`0 < t_k - t_j <` `gap_window`, is valid or unflagged (preceeding gap). 2. None of the :math:`x_j` with :math:`0 < t_k - t_j <` `gap_window`, is valid (preceeding gap).
3. None of the :math:`x_j` with :math:`0 < t_j - t_(k+n) <` `gap_window`, is valid or unflagged (succeding gap). 3. None of the :math:`x_j` with :math:`0 < t_j - t_(k+n) <` `gap_window`, is valid (succeding gap).
See Also See Also
-------- --------
...@@ -118,10 +118,9 @@ def flagIsolated( ...@@ -118,10 +118,9 @@ def flagIsolated(
gap_window = pd.tseries.frequencies.to_offset(gap_window) gap_window = pd.tseries.frequencies.to_offset(gap_window)
group_window = pd.tseries.frequencies.to_offset(group_window) group_window = pd.tseries.frequencies.to_offset(group_window)
col = data[field].mask(flagger.isFlagged(field)) mask = data[field].isna()
mask = col.isnull()
flags = pd.Series(data=0, index=col.index, dtype=bool) flags = pd.Series(data=0, index=mask.index, dtype=bool)
for srs in groupConsecutives(mask): for srs in groupConsecutives(mask):
if np.all(~srs): if np.all(~srs):
start = srs.index[0] start = srs.index[0]
...@@ -133,8 +132,7 @@ def flagIsolated( ...@@ -133,8 +132,7 @@ def flagIsolated(
if right.all(): if right.all():
flags[start:stop] = True flags[start:stop] = True
flagger = flagger.setFlags(field, flags, **kwargs) flagger[mask, field] = kwargs['flag']
return data, flagger return data, flagger
...@@ -145,7 +143,7 @@ def flagJumps( ...@@ -145,7 +143,7 @@ def flagJumps(
flagger: Flagger, flagger: Flagger,
thresh: float, thresh: float,
winsz: FreqString, winsz: FreqString,
min_periods: IntegerWindow=1, min_periods: IntegerWindow = 1,
**kwargs **kwargs
) -> Tuple[DictOfSeries, Flagger]: ) -> Tuple[DictOfSeries, Flagger]:
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment