From 9c7b797c9d058d71294c40f1553d7a000d02df0e Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 24 Mar 2021 18:36:21 +0100
Subject: [PATCH] moved and simplified `applyFunctionOnHistory`

---
 saqc/flagger/flags.py       | 68 ---------------------------------
 saqc/flagger/history.py     | 76 +++++++++++++++++++++++++++++++++++--
 saqc/funcs/interpolation.py |  6 +--
 saqc/funcs/resampling.py    | 13 +++----
 4 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/saqc/flagger/flags.py b/saqc/flagger/flags.py
index d2bd79def..11e5f598f 100644
--- a/saqc/flagger/flags.py
+++ b/saqc/flagger/flags.py
@@ -345,73 +345,5 @@ def initFlagsLike(
     return Flags(result)
 
 
-def applyFunctionOnHistory(
-        flags: Flags, column, hist_func, hist_kws, mask_func, mask_kws, last_column=None, func_handle_df=False
-):
-    """
-    Apply function on history.
-
-    Two functions must be given. Both are called for each column in the History. One on History.hist, the
-    other on History.mask. Both take a pd.Series as first arg, which is the column from hist or mask respectively.
-
-    Parameters
-    ----------
-    flags : Flags
-        Flags object holding the History in question
-    column : str
-        name of the column holding the history in question
-    hist_func : callable
-        function to apply on `History.hist` (flags)
-    hist_kws : dict
-        hist-function keywords dict
-    mask_func : callable
-        function to apply on `History.mask` (force mask)
-    mask_kws : dict
-        mask-function keywords dict
-    last_column : pd.Series or None, default None
-        The last column to apply. If None, no extra column is appended.
-    func_handle_df : bool
-        If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the
-        function must handle `pd.Dataframes` as first input. If `False`, each column is passed
-        separately, thus the functions must handle those.
-
-    Notes
-    -----
-    After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`,
-    and the `.mask` is casted to bool, to ensure a consistent History.
-
-    Returns
-    -------
-    Copy of Flags with altered History (in column)
-    """
-    flags = flags.copy()
-    history = flags.history[column]
-    new_history = History()
-
-    if func_handle_df:
-        history.hist = hist_func(history.hist, **hist_kws)
-        history.mask = hist_func(history.mask, **mask_kws)
-
-    else:
-        for pos in history.columns:
-            new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws)
-            new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws)
-
-    # handle unstable state
-    if last_column is None:
-        new_history.mask.iloc[:, -1:] = True
-    else:
-        if isinstance(last_column, str) and last_column == 'dummy':
-            last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float)
-
-        new_history.append(last_column, force=True)
-
-    # assure a boolean mask
-    new_history.mask = new_history.mask.fillna(False).astype(bool)
-
-    flags.history[column] = new_history
-    return flags
-
-
 # for now we keep this name
 Flagger = Flags
diff --git a/saqc/flagger/history.py b/saqc/flagger/history.py
index 011a2dd41..0f122db1f 100644
--- a/saqc/flagger/history.py
+++ b/saqc/flagger/history.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 from __future__ import annotations
 
-from typing import Tuple, Type
+from typing import Tuple, Type, Union, Literal
 import pandas as pd
 import numpy as np
 from saqc.constants import *
@@ -420,8 +420,6 @@ def appendNewerHistory(original: History, newer: History) -> History:
     -------
     History with appended columns
     """
-    original = original.copy()
-
     if not original.index.equals(newer.index):
         raise ValueError("Index of histories does not match")
 
@@ -432,6 +430,78 @@ def appendNewerHistory(original: History, newer: History) -> History:
     original.mask.loc[:, append_mask.columns] = append_mask
 
     assert original.columns.equals(pd.Index(range(len(original.columns))))
+
     return original
 
 
+def applyFunctionOnHistory(
+        history: History,
+        hist_func: callable,
+        hist_kws: dict,
+        mask_func: callable,
+        mask_kws: dict,
+        last_column: Union[pd.Series, Literal['dummy'], None] = None,
+        func_handle_df: bool = False,
+):
+    """
+    Apply function on each column in history.
+
+    Two functions must be given. Both are called for each column in the History unless ``func_handle_df=True`` is
+    given. One is called on ``History.hist``, the other on ``History.mask``.
+    Both function must take a pd.Series as first arg, which is the column from hist or mask respectively. If
+    ``func_handle_df=True`` each functions must take a ``pd.DataFrame`` as first argument, holding all columns
+    at once. The function must return same type as first argument.
+
+    Parameters
+    ----------
+    history : History
+        History object to alter
+    hist_func : callable
+        function to apply on `History.hist` (flags DataFrame)
+    hist_kws : dict
+        hist-function keywords dict
+    mask_func : callable
+        function to apply on `History.mask` (force mask DataFrame)
+    mask_kws : dict
+        mask-function keywords dict
+    last_column : pd.Series or None, default None
+        The last column to apply. If None, no extra column is appended.
+    func_handle_df : bool
+        If `True`, the whole History{.hist, .mask} are passed to the given functions, thus the
+        function must handle `pd.Dataframes` as first input. If `False`, each column is passed
+        separately, thus the functions must handle those.
+
+    Notes
+    -----
+    After the functions are called, all `NaN`'s in `History.mask` are replaced with `False`,
+    and the `.mask` is casted to bool, to ensure a consistent History.
+
+    Returns
+    -------
+    history with altered columns
+    """
+    new_history = History()
+
+    if func_handle_df:
+        history.hist = hist_func(history.hist, **hist_kws)
+        history.mask = hist_func(history.mask, **mask_kws)
+
+    else:
+        for pos in history.columns:
+            new_history.hist[pos] = hist_func(history.hist[pos], **hist_kws)
+            new_history.mask[pos] = mask_func(history.mask[pos], **mask_kws)
+
+    # handle unstable state
+    if last_column is None:
+        new_history.mask.iloc[:, -1:] = True
+    else:
+        if isinstance(last_column, str) and last_column == 'dummy':
+            last_column = pd.Series(UNTOUCHED, index=new_history.index, dtype=float)
+
+        new_history.append(last_column, force=True)
+
+    # assure a boolean mask
+    new_history.mask = new_history.mask.fillna(False).astype(bool)
+
+    return new_history
+
diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py
index c5d4f0768..5c9e8974f 100644
--- a/saqc/funcs/interpolation.py
+++ b/saqc/funcs/interpolation.py
@@ -12,7 +12,7 @@ from dios import DictOfSeries
 from saqc.constants import *
 from saqc.core.register import register, isflagged
 from saqc.flagger import Flagger
-from saqc.flagger.flags import applyFunctionOnHistory
+from saqc.flagger.history import applyFunctionOnHistory
 from saqc.lib.ts_operators import interpolateNANs
 
 _SUPPORTED_METHODS = Literal[
@@ -278,8 +278,8 @@ def interpolateIndex(
     data[field] = inter_data[grid_index]
 
     # do the reshaping on the history
-    flagger = applyFunctionOnHistory(
-        flagger, field,
+    flagger.history[field] = applyFunctionOnHistory(
+        flagger.history[field],
         hist_func=_resampleOverlapping, hist_kws=dict(freq=freq, fill_value=UNFLAGGED),
         mask_func=_resampleOverlapping, mask_kws=dict(freq=freq, fill_value=False),
         last_column='dummy'
diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py
index 33518f96a..a459e22b7 100644
--- a/saqc/funcs/resampling.py
+++ b/saqc/funcs/resampling.py
@@ -13,8 +13,8 @@ from dios import DictOfSeries
 
 from saqc.constants import *
 from saqc.core.register import register, isflagged
-from saqc.flagger.history import appendNewerHistory
-from saqc.flagger.flags import Flagger, applyFunctionOnHistory
+from saqc.flagger.history import appendNewerHistory, applyFunctionOnHistory
+from saqc.flagger.flags import Flagger
 from saqc.funcs.tools import copy, drop, rename
 from saqc.funcs.interpolation import interpolateIndex, _SUPPORTED_METHODS
 from saqc.lib.tools import evalFreqStr, getFreqDelta
@@ -540,8 +540,8 @@ def resample(
         max_invalid_consec=max_invalid_consec_f,
     )
 
-    flagger = applyFunctionOnHistory(
-        flagger, field,
+    flagger.history[field] = applyFunctionOnHistory(
+        flagger.history[field],
         hist_func=aggregate2Freq, hist_kws=kws,
         mask_func=aggregate2Freq, mask_kws=kws,
         last_column='dummy'
@@ -712,7 +712,6 @@ def reindexFlags(
     else:
         raise ValueError(f"unknown method {method}")
 
-    tmp_flagger = applyFunctionOnHistory(flagger, source, func, func_kws, func, mask_kws, last_column=dummy)
-    new_hist = appendNewerHistory(flagger.history[field], tmp_flagger.history[source])
-    flagger.history[field] = new_hist
+    history = applyFunctionOnHistory(flagger.history[source], func, func_kws, func, mask_kws, last_column=dummy)
+    flagger.history[field] = appendNewerHistory(flagger.history[field], history)
     return data, flagger
-- 
GitLab