From 9c7b797c9d058d71294c40f1553d7a000d02df0e Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Wed, 24 Mar 2021 18:36:21 +0100 Subject: [PATCH] moved and simplified `applyFunctionOnHistory` --- saqc/flagger/flags.py | 68 --------------------------------- saqc/flagger/history.py | 76 +++++++++++++++++++++++++++++++++++-- saqc/funcs/interpolation.py | 6 +-- saqc/funcs/resampling.py | 13 +++---- 4 files changed, 82 insertions(+), 81 deletions(-) diff --git a/saqc/flagger/flags.py b/saqc/flagger/flags.py index d2bd79def..11e5f598f 100644 --- a/saqc/flagger/flags.py +++ b/saqc/flagger/flags.py @@ -345,73 +345,5 @@ def initFlagsLike( return Flags(result) -def applyFunctionOnHistory( - flags: Flags, column, hist_func, hist_kws, mask_func, mask_kws, last_column=None, func_handle_df=False -): - """ - Apply function on history. - - Two functions must be given. Both are called for each column in the History. One on History.hist, the - other on History.mask. Both take a pd.Series as first arg, which is the column from hist or mask respectively. - - Parameters - ---------- - flags : Flags - Flags object holding the History in question - column : str - name of the column holding the history in question - hist_func : callable - function to apply on `History.hist` (flags) - hist_kws : dict - hist-function keywords dict - mask_func : callable - function to apply on `History.mask` (force mask) - mask_kws : dict - mask-function keywords dict - last_column : pd.Series or None, default None - The last column to apply. If None, no extra column is appended. - func_handle_df : bool - If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the - function must handle `pd.Dataframes` as first input. If `False`, each column is passed - separately, thus the functions must handle those. - - Notes - ----- - After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`, - and the `.mask` is casted to bool, to ensure a consistent History. - - Returns - ------- - Copy of Flags with altered History (in column) - """ - flags = flags.copy() - history = flags.history[column] - new_history = History() - - if func_handle_df: - history.hist = hist_func(history.hist, **hist_kws) - history.mask = hist_func(history.mask, **mask_kws) - - else: - for pos in history.columns: - new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws) - new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws) - - # handle unstable state - if last_column is None: - new_history.mask.iloc[:, -1:] = True - else: - if isinstance(last_column, str) and last_column == 'dummy': - last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float) - - new_history.append(last_column, force=True) - - # assure a boolean mask - new_history.mask = new_history.mask.fillna(False).astype(bool) - - flags.history[column] = new_history - return flags - - # for now we keep this name Flagger = Flags diff --git a/saqc/flagger/history.py b/saqc/flagger/history.py index 011a2dd41..0f122db1f 100644 --- a/saqc/flagger/history.py +++ b/saqc/flagger/history.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from __future__ import annotations -from typing import Tuple, Type +from typing import Tuple, Type, Union, Literal import pandas as pd import numpy as np from saqc.constants import * @@ -420,8 +420,6 @@ def appendNewerHistory(original: History, newer: History) -> History: ------- History with appended columns """ - original = original.copy() - if not original.index.equals(newer.index): raise ValueError("Index of histories does not match") @@ -432,6 +430,78 @@ def appendNewerHistory(original: History, newer: History) -> History: original.mask.loc[:, append_mask.columns] = append_mask assert original.columns.equals(pd.Index(range(len(original.columns)))) + return original +def applyFunctionOnHistory( + history: History, + hist_func: callable, + hist_kws: dict, + mask_func: callable, + mask_kws: dict, + last_column: Union[pd.Series, Literal['dummy'], None] = None, + func_handle_df: bool = False, +): + """ + Apply function on each column in history. + + Two functions must be given. Both are called for each column in the History unless ``func_handle_df=True`` is + given. One is called on ``History.hist``, the other on ``History.mask``. + Both function must take a pd.Series as first arg, which is the column from hist or mask respectively. If + ``func_handle_df=True`` each functions must take a ``pd.DataFrame`` as first argument, holding all columns + at once. The function must return same type as first argument. + + Parameters + ---------- + history : History + History object to alter + hist_func : callable + function to apply on `History.hist` (flags DataFrame) + hist_kws : dict + hist-function keywords dict + mask_func : callable + function to apply on `History.mask` (force mask DataFrame) + mask_kws : dict + mask-function keywords dict + last_column : pd.Series or None, default None + The last column to apply. If None, no extra column is appended. + func_handle_df : bool + If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the + function must handle `pd.Dataframes` as first input. If `False`, each column is passed + separately, thus the functions must handle those. + + Notes + ----- + After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`, + and the `.mask` is casted to bool, to ensure a consistent History. + + Returns + ------- + history with altered columns + """ + new_history = History() + + if func_handle_df: + history.hist = hist_func(history.hist, **hist_kws) + history.mask = hist_func(history.mask, **mask_kws) + + else: + for pos in history.columns: + new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws) + new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws) + + # handle unstable state + if last_column is None: + new_history.mask.iloc[:, -1:] = True + else: + if isinstance(last_column, str) and last_column == 'dummy': + last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float) + + new_history.append(last_column, force=True) + + # assure a boolean mask + new_history.mask = new_history.mask.fillna(False).astype(bool) + + return new_history + diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index c5d4f0768..5c9e8974f 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -12,7 +12,7 @@ from dios import DictOfSeries from saqc.constants import * from saqc.core.register import register, isflagged from saqc.flagger import Flagger -from saqc.flagger.flags import applyFunctionOnHistory +from saqc.flagger.history import applyFunctionOnHistory from saqc.lib.ts_operators import interpolateNANs _SUPPORTED_METHODS = Literal[ @@ -278,8 +278,8 @@ def interpolateIndex( data[field] = inter_data[grid_index] # do the reshaping on the history - flagger = applyFunctionOnHistory( - flagger, field, + flagger.history[field] = applyFunctionOnHistory( + flagger.history[field], hist_func=_resampleOverlapping, hist_kws=dict(freq=freq, fill_value=UNFLAGGED), mask_func=_resampleOverlapping, mask_kws=dict(freq=freq, fill_value=False), last_column='dummy' diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py index 33518f96a..a459e22b7 100644 --- a/saqc/funcs/resampling.py +++ b/saqc/funcs/resampling.py @@ -13,8 +13,8 @@ from dios import DictOfSeries from saqc.constants import * from saqc.core.register import register, isflagged -from saqc.flagger.history import appendNewerHistory -from saqc.flagger.flags import Flagger, applyFunctionOnHistory +from saqc.flagger.history import appendNewerHistory, applyFunctionOnHistory +from saqc.flagger.flags import Flagger from saqc.funcs.tools import copy, drop, rename from saqc.funcs.interpolation import interpolateIndex, _SUPPORTED_METHODS from saqc.lib.tools import evalFreqStr, getFreqDelta @@ -540,8 +540,8 @@ def resample( max_invalid_consec=max_invalid_consec_f, ) - flagger = applyFunctionOnHistory( - flagger, field, + flagger.history[field] = applyFunctionOnHistory( + flagger.history[field], hist_func=aggregate2Freq, hist_kws=kws, mask_func=aggregate2Freq, mask_kws=kws, last_column='dummy' @@ -712,7 +712,6 @@ def reindexFlags( else: raise ValueError(f"unknown method {method}") - tmp_flagger = applyFunctionOnHistory(flagger, source, func, func_kws, func, mask_kws, last_column=dummy) - new_hist = appendNewerHistory(flagger.history[field], tmp_flagger.history[source]) - flagger.history[field] = new_hist + history = applyFunctionOnHistory(flagger.history[source], func, func_kws, func, mask_kws, last_column=dummy) + flagger.history[field] = appendNewerHistory(flagger.history[field], history) return data, flagger -- GitLab