diff --git a/saqc/funcs/harm_functions.py b/saqc/funcs/harm_functions.py index 093c235c2c811fe81ca803e602cccb9c50097056..88e1fe533da9292bd5fd314257d3aafa92d826b2 100644 --- a/saqc/funcs/harm_functions.py +++ b/saqc/funcs/harm_functions.py @@ -11,8 +11,6 @@ from saqc.funcs.proc_functions import proc_interpolateGrid, proc_shift, proc_for logger = logging.getLogger("SaQC") -# some wrapper functions to mimicking classic harmonization look and feel - @register def harm_shift2Grid(data, field, flagger, freq, method="nshift", drop_flags=None, empty_intervals_flag=None, **kwargs): @@ -32,7 +30,7 @@ def harm_aggregate2Grid( data, flagger = proc_fork(data, field, flagger) data, flagger = proc_resample(data, field, flagger, freq, agg_func=value_func, flag_agg_func=flag_func, method=method, empty_intervals_flag=empty_intervals_flag, drop_flags=drop_flags, - **kwargs) + all_na_2_empty=True, **kwargs) return data, flagger diff --git a/saqc/funcs/proc_functions.py b/saqc/funcs/proc_functions.py index 509e1d0e70e5b18a2b8c1023aeb137259277d7c2..8dbf6c695cb14c28fead3d2b755bf2029f44b7bf 100644 --- a/saqc/funcs/proc_functions.py +++ b/saqc/funcs/proc_functions.py @@ -163,7 +163,7 @@ def proc_interpolateGrid(data, field, flagger, freq, method, inter_order=2, drop a grid point to be interpolated. Parameters - ---------.copy() + --------- freq : Offset String The frequency of the grid you want to interpolate your data at. @@ -198,8 +198,6 @@ def proc_interpolateGrid(data, field, flagger, freq, method, inter_order=2, drop """ datcol = data[field] - if datcol.empty: - return data, flagger datcol = datcol.copy() flagscol = flagger.getFlags(field) if empty_intervals_flag is None: @@ -210,7 +208,11 @@ def proc_interpolateGrid(data, field, flagger, freq, method, inter_order=2, drop drop_mask |= datcol.isna() datcol[drop_mask] = np.nan datcol.dropna(inplace=True) - + if datcol.empty: + data[field] = datcol + reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, **kwargs) + flagger = flagger.slice(drop=field).merge(reshaped_flagger) + return data, flagger # account for annoying case of subsequent frequency aligned values, differing exactly by the margin # 2*freq: spec_case_mask = datcol.index.to_series() @@ -313,7 +315,7 @@ def proc_interpolateGrid(data, field, flagger, freq, method, inter_order=2, drop @register def proc_resample(data, field, flagger, freq, agg_func=np.mean, method='bagg', max_invalid_total_d=np.inf, max_invalid_consec_d=np.inf, max_invalid_consec_f=np.inf, max_invalid_total_f=np.inf, - flag_agg_func=max, empty_intervals_flag=None, drop_flags=None, **kwargs): + flag_agg_func=max, empty_intervals_flag=None, drop_flags=None, all_na_2_empty=False, **kwargs): """ Function to resample the data. Afterwards the data will be sampled at regular (equidistant) timestamps (or Grid points). Sampling intervals therefor get aggregated with a function, specifyed by 'agg_func' parameter and @@ -342,7 +344,7 @@ def proc_resample(data, field, flagger, freq, agg_func=np.mean, method='bagg', m agg_func : function The function you want to use for aggregation. - +na_ser.resample('10min').apply(lambda x: x.count()) method: {'fagg', 'bagg', 'nagg'}, default 'bagg' Specifies which intervals to be aggregated for a certain timestamp. (preceeding, succeeding or "surrounding" interval). See description above for more details. @@ -394,6 +396,17 @@ def proc_resample(data, field, flagger, freq, agg_func=np.mean, method='bagg', m drop_mask = dropper(field, drop_flags, flagger, []) datcol.drop(datcol[drop_mask].index, inplace=True) flagscol.drop(flagscol[drop_mask].index, inplace=True) + if all_na_2_empty: + if datcol.dropna().empty: + datcol = pd.Series([], index=pd.DatetimeIndex([]), name=field) + + if datcol.empty: + # for consistency reasons - return empty data/flags column when there is no valid data left + # after filtering. + data[field] = datcol + reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, **kwargs) + flagger = flagger.slice(drop=field).merge(reshaped_flagger) + return data, flagger datcol = aggregate2Freq(datcol, method, freq, agg_func, fill_value=np.nan, max_invalid_total=max_invalid_total_d, max_invalid_consec=max_invalid_consec_d) @@ -442,9 +455,6 @@ def proc_shift(data, field, flagger, freq, method, drop_flags=None, empty_interv values being dropped initially. """ - if data[field].empty: - return data, flagger - data = data.copy() datcol = data[field] flagscol = flagger.getFlags(field) @@ -456,6 +466,12 @@ def proc_shift(data, field, flagger, freq, method, drop_flags=None, empty_interv drop_mask |= datcol.isna() datcol[drop_mask] = np.nan datcol.dropna(inplace=True) + if datcol.empty: + data[field] = datcol + reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, **kwargs) + flagger = flagger.slice(drop=field).merge(reshaped_flagger) + return data, flagger + flagscol.drop(drop_mask[drop_mask].index, inplace=True) datcol = shift2Freq(datcol, method, freq, fill_value=np.nan) diff --git a/saqc/funcs/spikes_detection.py b/saqc/funcs/spikes_detection.py index e54f936e00c448042aa8f612ca606565ca37152c..0f1f3a6177851f470896f1a94906d8b00992fb1e 100644 --- a/saqc/funcs/spikes_detection.py +++ b/saqc/funcs/spikes_detection.py @@ -391,6 +391,8 @@ def spikes_flagSlidingZscore( # prepare data, work on numpy arrays for the fulfilling pleasure of performance d = data[field].dropna() + if d.empty: + return data, flagger all_indices = np.arange(len(d.index)) x = (d.index - d.index[0]).total_seconds().values y = d.values diff --git a/saqc/lib/plotting.py b/saqc/lib/plotting.py index bbe898b98850d55951d177cf3a43171497f09d04..099c41555bb48e141c5f46a7cc56747834ea35b9 100644 --- a/saqc/lib/plotting.py +++ b/saqc/lib/plotting.py @@ -34,7 +34,6 @@ _figsize = (16, 9) _layout_data_to_table_ratio = [5, 1] _show_info_table = True - # order is important, because # latter may overwrite former _cols = [ @@ -52,7 +51,6 @@ _cols = [ "changed", ] - _plotstyle: Dict[str, dict] = { # flags "unflagged": dict(marker='.', ls='none', c="silver", label="UNFLAGGED"), @@ -71,7 +69,13 @@ def _show(): plt.show() -def plotAllHook(data, flagger, targets=None, show_info_table: bool = True, ): +def plotAllHook( + data, + flagger, + targets=None, + show_info_table: bool = True, + annotations: Optional[dios.DictOfSeries] = None, +): __importHelper() targets = flagger.flags.columns if targets is None else targets _plotMultipleVariables( @@ -80,7 +84,8 @@ def plotAllHook(data, flagger, targets=None, show_info_table: bool = True, ): data_new=data, flagger_new=flagger, targets=targets, - show_info_table=show_info_table + show_info_table=show_info_table, + annotations=annotations, ) plt.tight_layout() _show() @@ -94,6 +99,7 @@ def plotHook( sources: List[str], targets: List[str], plot_name: str = "", + annotations: Optional[dios.DictOfSeries] = None, ): assert len(targets) > 0 __importHelper() @@ -105,6 +111,7 @@ def plotHook( flagger_new=flagger_new, targets=targets, show_info_table=_show_info_table, + annotations=annotations, ) if len(targets) == 1: @@ -122,6 +129,7 @@ def _plotMultipleVariables( flagger_new: BaseFlagger, targets: List[str], show_info_table: bool = True, + annotations=None, ): """ Plot data and flags for a multiple target-variables. @@ -144,7 +152,7 @@ def _plotMultipleVariables( flagger_new flagger that hold flags corresponding to data_new targets - a single(!) string that indicates flags in flagger_new.flags + a list of strings, each indicating a column in flagger_new.flags show_info_table Show a info-table on the right of reference-data and data or not @@ -175,15 +183,22 @@ def _plotMultipleVariables( allaxs = [] for n in range(nfig): fig, axs = plt.subplots(nrows=ncols[n], ncols=2 if show_tab else 1, **layout) + for ax in axs: var = next(tgen) tar, _ = _getDataFromVar(data_old, data_new, flagger_old, flagger_new, var) + if show_tab: plot_ax, tab_ax = ax _plotInfoTable(tab_ax, tar, _plotstyle, len(tar['data'])) else: plot_ax = ax + _plotFromDicts(plot_ax, tar, _plotstyle) + + if annotations is not None and var in annotations: + _annotate(plot_ax, tar, annotations[var]) + plot_ax.set_title(str(var)) allaxs.append(plot_ax) @@ -195,6 +210,28 @@ def _plotMultipleVariables( ax.autoscale() +def simplePlot( + data: dios.DictOfSeries, + flagger: BaseFlagger, + field: str, + plot_name=None, + show_info_table: bool = True, + annotations=None, +): + __importHelper() + _plotSingleVariable(data_old=None, + data_new=data, + flagger_old=None, + flagger_new=flagger, + sources=[], + targets=[field], + show_reference_data=False, + show_info_table=show_info_table, + plot_name=plot_name or str(field), + annotations=annotations,) + _show() + + def _plotSingleVariable( data_old: dios.DictOfSeries, data_new: dios.DictOfSeries, @@ -204,7 +241,8 @@ def _plotSingleVariable( targets: List[str], show_reference_data=True, show_info_table: bool = True, - plot_name="current data" + plot_name="current data", + annotations=None, ): """ Plot data and flags for a single target-variable. @@ -295,14 +333,16 @@ def _plotSingleVariable( gs_count += 1 # plot data - if show_tab: - ax = _plotDataWithTable(fig, outer_gs[gs_count], curr, show_tab=show_tab) - ax.set_title(f"{plot_name}") - # also share y-axis with ref - if ref and show_ref: - ax.get_shared_y_axes().join(ax, allaxs[-1]) - allaxs.append(ax) - gs_count += 1 + ax = _plotDataWithTable(fig, outer_gs[gs_count], curr, show_tab=show_tab) + ax.set_title(f"{plot_name}") + # also share y-axis with ref + if ref and show_ref: + ax.get_shared_y_axes().join(ax, allaxs[-1]) + allaxs.append(ax) + gs_count += 1 + + if annotations is not None and var in annotations: + _annotate(ax, curr, annotations[var]) # share all x-axis ax0 = allaxs[0] @@ -609,6 +649,15 @@ def _plotFromDicts(ax, plotdict, styledict): ax.plot(data, **style) +def _annotate(ax, plotdict, txtseries: pd.Series): + for x, txt in txtseries.iteritems(): + try: + y = plotdict['data'].loc[x] + except KeyError: + continue + ax.annotate(txt, xy=(x, y), rotation=45) + + def _plotInfoTable(ax, plotdict, styledict, total): """ Make a nice table with information about the quantity of elements. diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 27886f064e6b8c91afc08cfaa7819dba42d6ef68..a2395da3d610f8b80bbb1362b9be35b664259de7 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -338,8 +338,8 @@ def dropper(field, drop_flags, flagger, default): if drop_flags is None: drop_flags = default drop_flags = toSequence(drop_flags) - for f in drop_flags: - drop_mask |= flagger.isFlagged(field, flag=f) + if len(drop_flags) > 0: + drop_mask |= flagger.isFlagged(field, flag=drop_flags) return drop_mask diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py index d8791723f83f75a2da52cf13e62079324a2f6cf3..8833f6ee298ad38603b3ce7d54fb4786b0c51903 100644 --- a/saqc/lib/ts_operators.py +++ b/saqc/lib/ts_operators.py @@ -319,6 +319,10 @@ def aggregate2Freq(data, method, freq, agg_func, fill_value=np.nan, max_invalid_ else: check_name = agg_func.__name__ + # another nasty special case: if function "count" was passed, we not want empty intervals to be replaced by nan: + if check_name == 'count': + empty_intervals[:] = False + data = getattr(data_resampler, check_name)() except AttributeError: data = data_resampler.apply(agg_func) diff --git a/test/funcs/test_data_modelling.py b/test/funcs/test_data_modelling.py index 36f966eb0542b3531a7c2ee3da21cbce7f1cd1ea..b239ef03da8a2c273d41029196d8c645bd3eafe7 100644 --- a/test/funcs/test_data_modelling.py +++ b/test/funcs/test_data_modelling.py @@ -1,6 +1,9 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- + +# see test/functs/conftest.py for global fixtures "course_..." + import pytest import numpy as np diff --git a/test/funcs/test_harm_funcs.py b/test/funcs/test_harm_funcs.py index f8742ddc863d0db4f7718e83214b99581e20a531..accde45bc986333805d294247e9278b18c41be43 100644 --- a/test/funcs/test_harm_funcs.py +++ b/test/funcs/test_harm_funcs.py @@ -1,6 +1,8 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- + +# see test/functs/conftest.py for global fixtures "course_..." import pytest import numpy as np diff --git a/test/funcs/test_proc_functions.py b/test/funcs/test_proc_functions.py index 79d7a42e0031fe03f9d63a4f96701da6e133263c..5534d9b00d9ebee3351970d154e7d049cd83953d 100644 --- a/test/funcs/test_proc_functions.py +++ b/test/funcs/test_proc_functions.py @@ -1,6 +1,9 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- + +# see test/functs/conftest.py for global fixtures "course_..." + import pytest import numpy as np import pandas as pd diff --git a/test/funcs/test_spikes_detection.py b/test/funcs/test_spikes_detection.py index e020ec0fe35c4ac6d19c4d0490d2bcf53be6daa4..7a30aeb34de0290e6cfa87fe2c4e51bd2aa83b4a 100644 --- a/test/funcs/test_spikes_detection.py +++ b/test/funcs/test_spikes_detection.py @@ -1,6 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +# see test/functs/conftest.py for global fixtures "course_..." import pytest import numpy as np import pandas as pd