Skip to content
Snippets Groups Projects
Commit c9b6fe73 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

fixed shift and resamle

parent bb792270
No related branches found
No related tags found
3 merge requests!271Static expansion of regular expressions,!260Follow-Up Translations,!237Flagger Translations
......@@ -337,40 +337,16 @@ def mapToOriginal(
@register(masking='none', module="resampling")
def shift(
data: DictOfSeries,
field: str,
flagger: Flagger,
freq: str,
method: Literal["fshift", "bshift", "nshift"]="nshift",
to_drop: Optional[Union[Any, Sequence[Any]]]=None,
empty_intervals_flag: Optional[str]=None,
freq_check: Optional[Literal["check", "auto"]]=None, # TODO: not a user decision
**kwargs
) -> Tuple[DictOfSeries, Flagger]:
data, flagger = copy(data, field, flagger, field + '_original')
data, flagger = _shift(
data, field, flagger, freq, method=method, to_drop=to_drop,
empty_intervals_flag=empty_intervals_flag, freq_check=freq_check, **kwargs
)
return data, flagger
def _shift(
data: DictOfSeries,
field: str,
flagger: Flagger,
freq: str,
method: Literal["fshift", "bshift", "nshift"] = "nshift",
freq_check: Optional[Literal["check", "auto"]] = None,
freq_check: Optional[Literal["check", "auto"]] = None, # TODO: not a user decision
**kwargs
) -> Tuple[DictOfSeries, Flagger]:
"""
Function to shift data points to regular (equidistant) timestamps.
Values and Flags get shifted according to the keyword passed to the `method` parameter.
Note: all data nans get excluded defaultly from shifting. If `to_drop` is ``None``, - all *BAD* flagged values get
excluded as well.
Function to shift data and flags to a regular (equidistant) timestamp grid, according to ``method``.
Parameters
----------
......@@ -412,6 +388,26 @@ def _shift(
The flagger object, holding flags and additional Informations related to `data`.
Flags values and shape may have changed relatively to the flagger input.
"""
data, flagger = copy(data, field, flagger, field + '_original')
return _shift(data, field, flagger, freq, method=method, freq_check=freq_check, **kwargs)
def _shift(
data: DictOfSeries,
field: str,
flagger: Flagger,
freq: str,
method: Literal["fshift", "bshift", "nshift"] = "nshift",
freq_check: Optional[Literal["check", "auto"]] = None,
**kwargs
) -> Tuple[DictOfSeries, Flagger]:
"""
Function to shift data points to regular (equidistant) timestamps.
See Also
--------
shift : Main caller, docstring
"""
flagged = isflagged(flagger[field], kwargs['to_mask'])
datcol = data[field]
datcol[flagged] = np.nan
......@@ -436,7 +432,7 @@ def _shift(
return data, flagger
@register(masking='field', module="resampling")
@register(masking='none', module="resampling")
def resample(
data: DictOfSeries,
field: str,
......@@ -449,9 +445,6 @@ def resample(
max_invalid_consec_f: Optional[int]=None,
max_invalid_total_f: Optional[int]=None,
flag_agg_func: Callable[[pd.Series], float]=max,
empty_intervals_flag: float = BAD,
to_drop: Optional[Union[Any, Sequence[Any]]]=None,
all_na_2_empty: bool=False,
freq_check: Optional[Literal["check", "auto"]]=None,
**kwargs
) -> Tuple[DictOfSeries, Flagger]:
......@@ -480,45 +473,48 @@ def resample(
----------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
field : str
The fieldname of the column, holding the data-to-be-resampled.
flagger : saqc.flagger.Flagger
A flagger object, holding flags and additional Informations related to `data`.
freq : str
An Offset String, that will be interpreted as the frequency you want to resample your data with.
agg_func : Callable
The function you want to use for aggregation.
method: {'fagg', 'bagg', 'nagg'}, default 'bagg'
Specifies which intervals to be aggregated for a certain timestamp. (preceding, succeeding or
"surrounding" interval). See description above for more details.
max_invalid_total_d : {None, int}, default None
Maximum number of invalid (nan) datapoints, allowed per resampling interval. If max_invalid_total_d is
exceeded, the interval gets resampled to nan. By default (``np.inf``), there is no bound to the number of nan
values in an interval and only intervals containing ONLY nan values or those, containing no values at all,
get projected onto nan
max_invalid_consec_d : {None, int}, default None
Maximum number of consecutive invalid (nan) data points, allowed per resampling interval.
If max_invalid_consec_d is exceeded, the interval gets resampled to nan. By default (np.inf),
there is no bound to the number of consecutive nan values in an interval and only intervals
containing ONLY nan values, or those containing no values at all, get projected onto nan.
max_invalid_total_f : {None, int}, default None
Same as `max_invalid_total_d`, only applying for the flags. The flag regarded as "invalid" value,
is the one passed to empty_intervals_flag (default=``BAD``).
Also this is the flag assigned to invalid/empty intervals.
max_invalid_consec_f : {None, int}, default None
Same as `max_invalid_total_f`, only applying onto flags. The flag regarded as "invalid" value, is the one passed
to empty_intervals_flag. Also this is the flag assigned to invalid/empty intervals.
flag_agg_func : Callable, default: max
The function you want to aggregate the flags with. It should be capable of operating on the flags dtype
(usually ordered categorical).
empty_intervals_flag : float, default BAD
A Flag, that you want to assign to invalid intervals. Invalid are those intervals, that contain nan values only,
or no values at all. Furthermore the empty_intervals_flag is the flag, serving as "invalid" identifyer when
checking for `max_total_invalid_f` and `max_consec_invalid_f patterns`.
to_drop : {None, str, List[str]}, default None
Flags that refer to values you want to drop before resampling - effectively excluding values that are flagged
with a flag in to_drop from the resampling process - this means that they also will not be counted in the
the `max_consec`/`max_total evaluation`. `to_drop` = ``None`` results in NO flags being dropped initially.
freq_check : {None, 'check', 'auto'}, default None
* ``None``: do not validate frequency-string passed to `freq`
......@@ -535,63 +531,38 @@ def resample(
The flagger object, holding flags and additional Informations related to `data`.
Flags values and shape may have changed relatively to the flagger input.
"""
data = data.copy()
flagged = isflagged(flagger[field], kwargs['to_mask'])
datcol = data[field]
flagscol = flagger[field]
drop_mask = getDropMask(field, to_drop, flagger, [])
datcol.drop(datcol[drop_mask].index, inplace=True)
datcol[flagged] = np.nan
freq = evalFreqStr(freq, freq_check, datcol.index)
flagscol.drop(flagscol[drop_mask].index, inplace=True)
# create a dummys
if all_na_2_empty and datcol.dropna().empty:
# Todo: This needs discussion. See issue #GL170
datcol = pd.Series([], index=pd.DatetimeIndex([]), name=field)
flagscol = pd.Series([], index=pd.DatetimeIndex([]), name=field)
# clear the past
flagger.history[field] = flagger.history[field].reindex(datcol.index)
flagger[field] = flagscol
# do the resampling
else:
datcol = aggregate2Freq(
datcol,
method,
freq,
agg_func,
fill_value=np.nan,
max_invalid_total=max_invalid_total_d,
max_invalid_consec=max_invalid_consec_d,
)
flagscol = aggregate2Freq(
flagscol,
method,
freq,
flag_agg_func,
fill_value=empty_intervals_flag,
max_invalid_total=max_invalid_total_f,
max_invalid_consec=max_invalid_consec_f,
)
kws = dict(
method=method,
freq=freq,
agg_func=flag_agg_func,
fill_value=UNTOUCHED,
max_invalid_total=max_invalid_total_f,
max_invalid_consec=max_invalid_consec_f,
)
flagger = applyFunctionOnHistory(
flagger, field,
hist_func=aggregate2Freq, hist_kws=kws,
mask_func=aggregate2Freq, mask_kws=kws,
last_column=flagscol
)
datcol = aggregate2Freq(
datcol,
method,
freq,
agg_func,
fill_value=np.nan,
max_invalid_total=max_invalid_total_d,
max_invalid_consec=max_invalid_consec_d,
)
dummy = pd.Series(UNTOUCHED, index=datcol.index, dtype=float)
kws = dict(
method=method,
freq=freq,
agg_func=flag_agg_func,
fill_value=UNTOUCHED,
max_invalid_total=max_invalid_total_f,
max_invalid_consec=max_invalid_consec_f,
)
flagger = applyFunctionOnHistory(
flagger, field,
hist_func=aggregate2Freq, hist_kws=kws,
mask_func=aggregate2Freq, mask_kws=kws,
last_column=dummy
)
data[field] = datcol
return data, flagger
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment