diff --git a/docs/FunctionDescriptions.md b/docs/FunctionDescriptions.md index 66b78a567d5b87329f47880c1a38983b3f78dc0b..2d53066540da9e74a8b93696cb3afdfa934fb5f4 100644 --- a/docs/FunctionDescriptions.md +++ b/docs/FunctionDescriptions.md @@ -25,7 +25,10 @@ Main documentation of the implemented functions, their purpose and parameters an - [machinelearning](#machinelearning) - [harmonize](#harmonize) - [deharmonize](#deharmonize) - - [harmonize_shift2Grid](#harmonize_shift2Grid) + - [harmonize_shift2Grid](#harmonize_shift2grid) + - [harmonize_aggregate2Grid](#harmonize_aggregate2grid) + - [harmonize_linear2Grid](#harmonize_linear2grid) + - [harmonize_interpolate2Grid](#harmonize_interpolate2grid) ## range @@ -774,7 +777,7 @@ and than: (According to the flagging order of the current flagger.) -## harmonize_shift2Grid +## harmonize_shift2grid ``` harmonize_shift2Grid(freq, shift_method='nearest_shift', drop_flags=None) @@ -819,7 +822,7 @@ In detail, the process includes: if there is one available in the succeeding sampling interval. If not, BAD/np.nan - flag gets assigned. * `"nearest_shift"`: every grid point gets assigned the closest flag/datapoint in its range. ( range = +/- `freq`/2 ). -## harmonize_aggregate2Grid +## harmonize_aggregate2grid ``` harmonize_aggregate2Grid(freq, agg_func, agg_method='nearest_agg', flag_agg_func=max, drop_flags=None) @@ -876,7 +879,7 @@ In detail, the process includes: aggregated with the function passed to agg_method and assigned to it. -## harmonize_linear2Grid +## harmonize_linear2grid ``` harmonize_linear2Grid(freq, flag_assignment_method='nearest_agg', flag_agg_func=max, drop_flags=None) @@ -929,7 +932,7 @@ Linear interpolation of an inserted equidistant frequency grid of sampling rate aggregated with the function passed to `agg_func` and assigned to it. -## harmonize_interpolate2Grid +## harmonize_interpolate2grid ``` harmonize_interpolate2Grid(freq, interpolation_method, interpolation_order=1, flag_assignment_method='nearest_agg', diff --git a/saqc/funcs/harm_functions.py b/saqc/funcs/harm_functions.py index 9f96141d1ef884d06db4dca6743fd84f5be4a03b..d5bb8f577946ca246b114420b4ff71a62f2059c6 100644 --- a/saqc/funcs/harm_functions.py +++ b/saqc/funcs/harm_functions.py @@ -829,7 +829,8 @@ def linear2Grid(data, field, flagger, freq, flag_assignment_method='nearest_agg' @register('harmonize_interpolate2Grid') -def interpolate2Grid(data, field, flagger, freq, interpolation_method, interpolation_order=1, flag_assignment_method='nearest_agg', flag_agg_func=max, drop_flags=None, **kwargs): +def interpolate2Grid(data, field, flagger, freq, interpolation_method, interpolation_order=1, + flag_assignment_method='nearest_agg', flag_agg_func=max, drop_flags=None, **kwargs): return harmonize( data, field, @@ -842,4 +843,61 @@ def interpolate2Grid(data, field, flagger, freq, interpolation_method, interpola drop_flags=drop_flags, **kwargs) -#def aggregate(data, field, flagger, freq_base, freq_target, agg_func, **kwargs): \ No newline at end of file + +def aggregate(data, field, flagger, source_freq, target_freq, agg_func=np.mean, sample_func=np.mean, + invalid_flags=None, max_invalid=np.inf, **kwargs): + + # define the "fastest possible" aggregator + if sample_func is None: + if max_invalid < np.inf: + + def aggregator(x): + if x.isna().sum() < max_invalid: + return agg_func(x) + else: + return np.nan + else: + + def aggregator(x): + return agg_func(x) + else: + + dummy_resampler = pd.Series(np.nan, index=[pd.Timedelta('1min')]).resample('1min') + if hasattr(dummy_resampler, sample_func.__name__): + + sample_func_name = sample_func.__name__ + if max_invalid < np.inf: + def aggregator(x): + y = getattr(x.resample(source_freq), sample_func_name)() + if y.isna().sum() < max_invalid: + return agg_func(y) + else: + return np.nan + + else: + def aggregator(x): + return agg_func(getattr(x.resample(source_freq), sample_func_name)()) + + else: + if max_invalid < np.inf: + def aggregator(x): + y = x.resample(source_freq).apply(sample_func) + if y.isna().sum() < max_invalid: + return agg_func(y) + else: + return np.nan + else: + def aggregator(x): + return agg_func(x.resample(source_freq).apply(sample_func)) + + return harmonize( + data, + field, + flagger, + target_freq, + inter_method='bagg', + reshape_method='bagg', + inter_agg=aggregator, + reshape_agg=max, + drop_flags=invalid_flags, + **kwargs) diff --git a/test/funcs/test_harm_funcs.py b/test/funcs/test_harm_funcs.py index 2419370e319243a54417d18225c9aba99586a48d..825f442651cade79ecdf64184e317bbe34458ef3 100644 --- a/test/funcs/test_harm_funcs.py +++ b/test/funcs/test_harm_funcs.py @@ -18,7 +18,8 @@ from saqc.funcs.harm_functions import ( linear2Grid, interpolate2Grid, shift2Grid, - aggregate2Grid + aggregate2Grid, + aggregate ) @@ -334,20 +335,10 @@ def test_wrapper(data, flagger): field = data.columns[0] freq = '15min' flagger = flagger.initFlags(data) + aggregate(data, field, flagger, '15min', '30min', agg_func=np.sum, sample_func=np.mean) + linear2Grid(data, field, flagger, freq, flag_assignment_method='nearest_agg', flag_agg_func=max, drop_flags=None) aggregate2Grid(data, field, flagger, freq, agg_func=sum, agg_method='nearest_agg', flag_agg_func=max, drop_flags=None) shift2Grid(data, field, flagger, freq, shift_method='nearest_shift', drop_flags=None) - -if __name__ == "__main__": - dat = data() - flagger = TESTFLAGGER[1] - test_gridInterpolation(data(), 'polynomial') - flagger2 = TESTFLAGGER[2] - flagger = flagger.initFlags(dat) - flagger2 = flagger.initFlags(dat2) - dat_out, flagger = interpolate2Grid(dat, 'data', flagger, '15min', interpolation_method="polynomial", flag_assignment_method='nearest_agg', - flag_agg_func=max, drop_flags=None) - - print("stop") \ No newline at end of file