Skip to content
Snippets Groups Projects
Commit 9c7b797c authored by Bert Palm's avatar Bert Palm 🎇
Browse files

moved and simplified `applyFunctionOnHistory`

parent f9d01122
No related branches found
No related tags found
3 merge requests!271Static expansion of regular expressions,!260Follow-Up Translations,!237Flagger Translations
......@@ -345,73 +345,5 @@ def initFlagsLike(
return Flags(result)
def applyFunctionOnHistory(
flags: Flags, column, hist_func, hist_kws, mask_func, mask_kws, last_column=None, func_handle_df=False
):
"""
Apply function on history.
Two functions must be given. Both are called for each column in the History. One on History.hist, the
other on History.mask. Both take a pd.Series as first arg, which is the column from hist or mask respectively.
Parameters
----------
flags : Flags
Flags object holding the History in question
column : str
name of the column holding the history in question
hist_func : callable
function to apply on `History.hist` (flags)
hist_kws : dict
hist-function keywords dict
mask_func : callable
function to apply on `History.mask` (force mask)
mask_kws : dict
mask-function keywords dict
last_column : pd.Series or None, default None
The last column to apply. If None, no extra column is appended.
func_handle_df : bool
If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the
function must handle `pd.Dataframes` as first input. If `False`, each column is passed
separately, thus the functions must handle those.
Notes
-----
After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`,
and the `.mask` is casted to bool, to ensure a consistent History.
Returns
-------
Copy of Flags with altered History (in column)
"""
flags = flags.copy()
history = flags.history[column]
new_history = History()
if func_handle_df:
history.hist = hist_func(history.hist, **hist_kws)
history.mask = hist_func(history.mask, **mask_kws)
else:
for pos in history.columns:
new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws)
new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws)
# handle unstable state
if last_column is None:
new_history.mask.iloc[:, -1:] = True
else:
if isinstance(last_column, str) and last_column == 'dummy':
last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float)
new_history.append(last_column, force=True)
# assure a boolean mask
new_history.mask = new_history.mask.fillna(False).astype(bool)
flags.history[column] = new_history
return flags
# for now we keep this name
Flagger = Flags
#!/usr/bin/env python
from __future__ import annotations
from typing import Tuple, Type
from typing import Tuple, Type, Union, Literal
import pandas as pd
import numpy as np
from saqc.constants import *
......@@ -420,8 +420,6 @@ def appendNewerHistory(original: History, newer: History) -> History:
-------
History with appended columns
"""
original = original.copy()
if not original.index.equals(newer.index):
raise ValueError("Index of histories does not match")
......@@ -432,6 +430,78 @@ def appendNewerHistory(original: History, newer: History) -> History:
original.mask.loc[:, append_mask.columns] = append_mask
assert original.columns.equals(pd.Index(range(len(original.columns))))
return original
def applyFunctionOnHistory(
history: History,
hist_func: callable,
hist_kws: dict,
mask_func: callable,
mask_kws: dict,
last_column: Union[pd.Series, Literal['dummy'], None] = None,
func_handle_df: bool = False,
):
"""
Apply function on each column in history.
Two functions must be given. Both are called for each column in the History unless ``func_handle_df=True`` is
given. One is called on ``History.hist``, the other on ``History.mask``.
Both function must take a pd.Series as first arg, which is the column from hist or mask respectively. If
``func_handle_df=True`` each functions must take a ``pd.DataFrame`` as first argument, holding all columns
at once. The function must return same type as first argument.
Parameters
----------
history : History
History object to alter
hist_func : callable
function to apply on `History.hist` (flags DataFrame)
hist_kws : dict
hist-function keywords dict
mask_func : callable
function to apply on `History.mask` (force mask DataFrame)
mask_kws : dict
mask-function keywords dict
last_column : pd.Series or None, default None
The last column to apply. If None, no extra column is appended.
func_handle_df : bool
If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the
function must handle `pd.Dataframes` as first input. If `False`, each column is passed
separately, thus the functions must handle those.
Notes
-----
After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`,
and the `.mask` is casted to bool, to ensure a consistent History.
Returns
-------
history with altered columns
"""
new_history = History()
if func_handle_df:
history.hist = hist_func(history.hist, **hist_kws)
history.mask = hist_func(history.mask, **mask_kws)
else:
for pos in history.columns:
new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws)
new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws)
# handle unstable state
if last_column is None:
new_history.mask.iloc[:, -1:] = True
else:
if isinstance(last_column, str) and last_column == 'dummy':
last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float)
new_history.append(last_column, force=True)
# assure a boolean mask
new_history.mask = new_history.mask.fillna(False).astype(bool)
return new_history
......@@ -12,7 +12,7 @@ from dios import DictOfSeries
from saqc.constants import *
from saqc.core.register import register, isflagged
from saqc.flagger import Flagger
from saqc.flagger.flags import applyFunctionOnHistory
from saqc.flagger.history import applyFunctionOnHistory
from saqc.lib.ts_operators import interpolateNANs
_SUPPORTED_METHODS = Literal[
......@@ -278,8 +278,8 @@ def interpolateIndex(
data[field] = inter_data[grid_index]
# do the reshaping on the history
flagger = applyFunctionOnHistory(
flagger, field,
flagger.history[field] = applyFunctionOnHistory(
flagger.history[field],
hist_func=_resampleOverlapping, hist_kws=dict(freq=freq, fill_value=UNFLAGGED),
mask_func=_resampleOverlapping, mask_kws=dict(freq=freq, fill_value=False),
last_column='dummy'
......
......@@ -13,8 +13,8 @@ from dios import DictOfSeries
from saqc.constants import *
from saqc.core.register import register, isflagged
from saqc.flagger.history import appendNewerHistory
from saqc.flagger.flags import Flagger, applyFunctionOnHistory
from saqc.flagger.history import appendNewerHistory, applyFunctionOnHistory
from saqc.flagger.flags import Flagger
from saqc.funcs.tools import copy, drop, rename
from saqc.funcs.interpolation import interpolateIndex, _SUPPORTED_METHODS
from saqc.lib.tools import evalFreqStr, getFreqDelta
......@@ -540,8 +540,8 @@ def resample(
max_invalid_consec=max_invalid_consec_f,
)
flagger = applyFunctionOnHistory(
flagger, field,
flagger.history[field] = applyFunctionOnHistory(
flagger.history[field],
hist_func=aggregate2Freq, hist_kws=kws,
mask_func=aggregate2Freq, mask_kws=kws,
last_column='dummy'
......@@ -712,7 +712,6 @@ def reindexFlags(
else:
raise ValueError(f"unknown method {method}")
tmp_flagger = applyFunctionOnHistory(flagger, source, func, func_kws, func, mask_kws, last_column=dummy)
new_hist = appendNewerHistory(flagger.history[field], tmp_flagger.history[source])
flagger.history[field] = new_hist
history = applyFunctionOnHistory(flagger.history[source], func, func_kws, func, mask_kws, last_column=dummy)
flagger.history[field] = appendNewerHistory(flagger.history[field], history)
return data, flagger
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment