Skip to content
Snippets Groups Projects
Commit 88fab5d3 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

Merge branch 'develop' into interpolation

parents 01aa3f28 64670e1c
No related branches found
No related tags found
3 merge requests!271Static expansion of regular expressions,!260Follow-Up Translations,!237Flagger Translations
...@@ -23,19 +23,19 @@ MaskingStrT = Literal["all", "field", "none"] ...@@ -23,19 +23,19 @@ MaskingStrT = Literal["all", "field", "none"]
@dataclasses.dataclass @dataclasses.dataclass
class CallCtrl: class CallState:
func: callable func: callable
data: dios.DictOfSeries data: dios.DictOfSeries
field: str
flagger: Flagger flagger: Flagger
field: str
args: tuple args: tuple
kwargs: dict kwargs: dict
masking: MaskingStrT = None masking: MaskingStrT
mthresh: float = None mthresh: float
mask: dios.DictOfSeries = None mask: dios.DictOfSeries
def register(masking: MaskingStrT = "all", module: Optional[str] = None): def register(masking: MaskingStrT = "all", module: Optional[str] = None):
...@@ -50,9 +50,9 @@ def register(masking: MaskingStrT = "all", module: Optional[str] = None): ...@@ -50,9 +50,9 @@ def register(masking: MaskingStrT = "all", module: Optional[str] = None):
# nevertheless if it is called plain or via `SaQC.func`. # nevertheless if it is called plain or via `SaQC.func`.
@wraps(func) @wraps(func)
def callWrapper(*args, **kwargs): def callWrapper(*args, **kwargs):
args, kwargs, ctrl = _preCall(func, args, kwargs, masking, func_name) args, kwargs, old_state = _preCall(func, args, kwargs, masking, func_name)
result = func(*args, **kwargs) result = func(*args, **kwargs)
return _postCall(result, ctrl, func_name) return _postCall(result, old_state)
FUNC_MAP[func_name] = SaQCFunction(func_name, callWrapper) FUNC_MAP[func_name] = SaQCFunction(func_name, callWrapper)
...@@ -92,7 +92,7 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn ...@@ -92,7 +92,7 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
arguments to be passed to the actual call arguments to be passed to the actual call
kwargs: dict kwargs: dict
keyword-arguments to be passed to the actual call keyword-arguments to be passed to the actual call
ctrl: CallCtrl state: CallState
control keyword-arguments passed to `_postCall` control keyword-arguments passed to `_postCall`
""" """
...@@ -100,23 +100,28 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn ...@@ -100,23 +100,28 @@ def _preCall(func: callable, args: tuple, kwargs: dict, masking: MaskingStrT, fn
kwargs['to_mask'] = mthresh kwargs['to_mask'] = mthresh
data, field, flagger, *args = args data, field, flagger, *args = args
ctrl = CallCtrl(func, data.copy(), field, flagger.copy(), args, kwargs, masking=masking, mthresh=mthresh)
# handle data - masking # handle data - masking
columns = _getMaskingColumns(data, field, masking) columns = _getMaskingColumns(data, field, masking)
data, mask = _maskData(data, flagger, columns, mthresh) masked_data, mask = _maskData(data, flagger, columns, mthresh)
# store mask # store current state
ctrl.mask = mask state = CallState(
func=func,
data=data, flagger=flagger, field=field,
args=args, kwargs=kwargs,
masking=masking, mthresh=mthresh,
mask=mask
)
# handle flags - clearing # handle flags - clearing
flagger = _prepareFlags(flagger, masking) prepped_flagger = _prepareFlags(flagger, masking)
args = data, field, flagger, *args args = masked_data, field, prepped_flagger, *args
return args, kwargs, ctrl return args, kwargs, state
def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT: def _postCall(result, old_state: CallState) -> FuncReturnT:
""" """
Handler that runs after any call to a saqc-function. Handler that runs after any call to a saqc-function.
...@@ -128,19 +133,16 @@ def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT: ...@@ -128,19 +133,16 @@ def _postCall(result, ctrl: CallCtrl, fname: str) -> FuncReturnT:
result : tuple result : tuple
the result from the called function, namely: data and flagger the result from the called function, namely: data and flagger
ctrl : dict old_state : dict
control keywords from `_preCall` control keywords from `_preCall`
fname : str
Name of the (just) called saqc-function
Returns Returns
------- -------
data, flagger : dios.DictOfSeries, saqc.flagger.Flagger data, flagger : dios.DictOfSeries, saqc.flagger.Flagger
""" """
data, flagger = result data, flagger = result
flagger = _restoreFlags(flagger, ctrl) flagger = _restoreFlags(flagger, old_state)
data = _unmaskData(data, ctrl) data = _unmaskData(data, old_state)
return data, flagger return data, flagger
...@@ -162,7 +164,7 @@ def _getMaskingColumns(data: dios.DictOfSeries, field: str, masking: MaskingStrT ...@@ -162,7 +164,7 @@ def _getMaskingColumns(data: dios.DictOfSeries, field: str, masking: MaskingStrT
if masking == 'field': if masking == 'field':
return pd.Index([field]) return pd.Index([field])
raise ValueError(f"wrong use of `register(masking={ctrl.masking})`") raise ValueError(f"wrong use of `register(masking={masking})`")
def _getMaskingThresh(masking, kwargs, fname): def _getMaskingThresh(masking, kwargs, fname):
...@@ -220,9 +222,18 @@ def _getMaskingThresh(masking, kwargs, fname): ...@@ -220,9 +222,18 @@ def _getMaskingThresh(masking, kwargs, fname):
# TODO: this is heavily undertested # TODO: this is heavily undertested
def _maskData(data, flagger, columns, thresh) -> Tuple[dios.DictOfSeries, dios.DictOfSeries]: def _maskData(data, flagger, columns, thresh) -> Tuple[dios.DictOfSeries, dios.DictOfSeries]:
""" """
Mask data with Nans by flags worse that a threshold and according to masking keyword in decorator. Mask data with Nans by flags worse that a threshold and according to ``masking`` keyword
from the functions decorator.
Returns
-------
masked : dios.DictOfSeries
masked data, same dim as original
mask : dios.DictOfSeries
boolean dios of same dim as `masked`. True, where data was masked, elsewhere False.
""" """
mask = dios.DictOfSeries(columns=columns) mask = dios.DictOfSeries(columns=columns)
data = data.copy()
# we use numpy here because it is faster # we use numpy here because it is faster
for c in columns: for c in columns:
...@@ -250,38 +261,41 @@ def _getMask(flags: Union[np.array, pd.Series], thresh: float) -> Union[np.array ...@@ -250,38 +261,41 @@ def _getMask(flags: Union[np.array, pd.Series], thresh: float) -> Union[np.array
def _prepareFlags(flagger: Flagger, masking) -> Flagger: def _prepareFlags(flagger: Flagger, masking) -> Flagger:
""" """
Clear flags before each call. Prepare flags before each call. Always returns a copy.
Currently this only clears the flags, but in future,
this should be sliced the flagger to the columns, that
the saqc-function needs.
""" """
# Either the index or the columns itself changed # Either the index or the columns itself changed
if masking == 'none': if masking == 'none':
return flagger return flagger.copy()
return initFlagsLike(flagger, initial_value=UNTOUCHED) return initFlagsLike(flagger, initial_value=UNTOUCHED)
def _restoreFlags(flagger: Flagger, ctrl: CallCtrl): def _restoreFlags(flagger: Flagger, old_state: CallState):
if ctrl.masking == 'none': if old_state.masking == 'none':
return flagger return flagger
result = ctrl.flagger
columns = flagger.columns columns = flagger.columns
# take field column and all possibly newly added columns # take field column and all possibly newly added columns
if ctrl.masking == 'field': if old_state.masking == 'field':
columns = columns.difference(ctrl.flagger.columns) columns = columns.difference(old_state.flagger.columns)
columns = columns.append(pd.Index([ctrl.field])) columns = columns.append(pd.Index([old_state.field]))
out = old_state.flagger.copy()
for c in columns: for c in columns:
# this implicitly squash the new-flagger history (RHS) to a single column, which than is appended to # this implicitly squash the new-flagger history (RHS) to a single column, which than is appended to
# the old history (LHS). The new-flagger history possibly consist of multiple columns, one for each # the old history (LHS). The new-flagger history possibly consist of multiple columns, one for each
# time flags was set to the flagger. # time flags was set to the flagger.
result[c] = flagger[c] out[c] = flagger[c]
return result return out
# TODO: this is heavily undertested # TODO: this is heavily undertested
def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries: def _unmaskData(data: dios.DictOfSeries, old_state: CallState) -> dios.DictOfSeries:
""" """
Restore the masked data. Restore the masked data.
...@@ -289,7 +303,7 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries: ...@@ -289,7 +303,7 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
----- -----
Even if this returns data, it work inplace ! Even if this returns data, it work inplace !
""" """
if ctrl.masking == 'none': if old_state.masking == 'none':
return data return data
# we have two options to implement this: # we have two options to implement this:
...@@ -313,28 +327,27 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries: ...@@ -313,28 +327,27 @@ def _unmaskData(data: dios.DictOfSeries, ctrl: CallCtrl) -> dios.DictOfSeries:
# col in new only : new (keep column) # col in new only : new (keep column)
# col in old only : new (ignore, was deleted) # col in old only : new (ignore, was deleted)
old = ctrl # this alias simplifies reading a lot columns = old_state.mask.columns.intersection(data.columns) # in old, in masked, in new
columns = old.mask.columns.intersection(data.columns) # in old, in masked, in new
for c in columns: for c in columns:
# ignore # ignore
if old.data[c].empty or data[c].empty or old.mask[c].empty: if old_state.data[c].empty or data[c].empty or old_state.mask[c].empty:
continue continue
# on index changed, we simply ignore the old data # on index changed, we simply ignore the old data
if not old.data[c].index.equals(data[c].index): if not old_state.data[c].index.equals(data[c].index):
continue continue
restore_old_mask = old.mask[c].to_numpy() & data[c].isna().to_numpy() restore_old_mask = old_state.mask[c].to_numpy() & data[c].isna().to_numpy()
# we have nothing to restore # we have nothing to restore
if not any(restore_old_mask): if not any(restore_old_mask):
continue continue
# restore old values if no new are present # restore old values if no new are present
v_old, v_new = old.data[c].to_numpy(), data[c].to_numpy() old, new = old_state.data[c].to_numpy(), data[c].to_numpy()
data.loc[:, c] = np.where(restore_old_mask, v_old, v_new) data.loc[:, c] = np.where(restore_old_mask, old, new)
return data return data
...@@ -31,11 +31,7 @@ class _HistAccess: ...@@ -31,11 +31,7 @@ class _HistAccess:
self.obj = obj self.obj = obj
def __getitem__(self, key: str) -> History: def __getitem__(self, key: str) -> History:
# we don't know, what the user wants. Although we're not return self.obj._data[key].copy()
# encouraging inplace modification of the history, the
# user may do it, so we remove the cached column here.
self.obj._cache.pop(key, None)
return self.obj._data[key]
def __setitem__(self, key: str, value: Union[History, pd.DataFrame]): def __setitem__(self, key: str, value: Union[History, pd.DataFrame]):
if not isinstance(value, History): if not isinstance(value, History):
......
...@@ -136,6 +136,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd ...@@ -136,6 +136,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
data[field] = _execGeneric(flagger, data, func, field, nodata).squeeze() data[field] = _execGeneric(flagger, data, func, field, nodata).squeeze()
# TODO: the former comment wished to overwrite the column, but i'm not sure -- palmb # TODO: the former comment wished to overwrite the column, but i'm not sure -- palmb
# see #GL177
if field in flagger: if field in flagger:
flagger.drop(field) flagger.drop(field)
...@@ -146,6 +147,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd ...@@ -146,6 +147,7 @@ def process(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd
@register(masking='all', module="generic") @register(masking='all', module="generic")
def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Series], pd.Series], def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Series], pd.Series],
nodata: float = np.nan, flag=BAD, **kwargs) -> Tuple[DictOfSeries, Flagger]: nodata: float = np.nan, flag=BAD, **kwargs) -> Tuple[DictOfSeries, Flagger]:
# TODO : fix docstring, check if all still works
""" """
a function to flag a data column by evaluation of a generic expression. a function to flag a data column by evaluation of a generic expression.
...@@ -211,7 +213,6 @@ def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Se ...@@ -211,7 +213,6 @@ def flag(data: DictOfSeries, field: str, flagger: Flagger, func: Callable[[pd.Se
>>> lambda level: isflagged(level, flag=DOUBTFUL, comparator='>') >>> lambda level: isflagged(level, flag=DOUBTFUL, comparator='>')
# TODO : fix text
If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED If you are unsure about the used flaggers flagging level names, you can use the reserved key words BAD, UNFLAGGED
and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example. and GOOD, to refer to the worst (BAD), best(GOOD) or unflagged (UNFLAGGED) flagging levels. For example.
......
...@@ -44,6 +44,7 @@ def copy(data: DictOfSeries, field: str, flagger: Flagger, new_field: str, **kwa ...@@ -44,6 +44,7 @@ def copy(data: DictOfSeries, field: str, flagger: Flagger, new_field: str, **kwa
raise ValueError(f"{field}: field already exist") raise ValueError(f"{field}: field already exist")
data[new_field] = data[field].copy() data[new_field] = data[field].copy()
# implicit copy in history access
flagger.history[new_field] = flagger.history[field] flagger.history[new_field] = flagger.history[field]
return data, flagger return data, flagger
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment