Skip to content
Snippets Groups Projects
Commit c0663723 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

made everything running again with the new function passing policy

parent e209c23b
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!49Dataprocessing features
Pipeline #4366 passed with stage
in 6 minutes and 40 seconds
......@@ -100,7 +100,7 @@ def harmWrapper(heap={}):
**kwargs,
)
flagger_out = flagger.getFlagger(drop=field).setFlagger(flagger_merged_clean_reshaped)
flagger_out = flagger.slice(drop=field).merge(flagger_merged_clean_reshaped)
data[field] = dat_col
return data, flagger_out
......@@ -145,7 +145,7 @@ def harmWrapper(heap={}):
dat_col.name = field
# bye bye data
flagger_out = flagger.getFlagger(drop=field).setFlagger(flagger_back)
flagger_out = flagger.slice(drop=field).merge(flagger_back)
data[field] = dat_col
assert (data[field].index == flagger_out.getFlags(field).index).all()
......
......@@ -3,12 +3,12 @@
import pandas as pd
import numpy as np
from saqc.funcs.register import register
from saqc.core.register import register
from saqc.lib.ts_operators import interpolateNANs, aggregate2Freq, shift2Freq
from saqc.lib.tools import composeFunction, toSequence
from saqc.lib.tools import toSequence
@register()
@register
def proc_interpolateMissing(data, field, flagger, method, inter_order=2, inter_limit=2, interpol_flag='UNFLAGGED',
downgrade_interpolation=False, return_chunk_bounds=False, not_interpol_flags=None, **kwargs):
......@@ -37,17 +37,14 @@ def proc_interpolateMissing(data, field, flagger, method, inter_order=2, inter_l
return data, flagger
@register()
def proc_resample(data, field, flagger, freq, func="mean", max_invalid_total_d=np.inf, max_invalid_consec_d=np.inf,
max_invalid_consec_f=np.inf, max_invalid_total_f=np.inf, flag_agg_func='max', method='bagg', **kwargs):
@register
def proc_resample(data, field, flagger, freq, func=np.mean, max_invalid_total_d=np.inf, max_invalid_consec_d=np.inf,
max_invalid_consec_f=np.inf, max_invalid_total_f=np.inf, flag_agg_func=max, method='bagg', **kwargs):
data = data.copy()
datcol = data[field]
flagscol = flagger.getFlags(field)
func = composeFunction(func)
flag_agg_func = composeFunction(flag_agg_func)
if func == "shift":
datcol = shift2Freq(datcol, method, freq, fill_value=np.nan)
flagscol =shift2Freq(flagscol, method, freq, fill_value=flagger.BAD)
......@@ -61,14 +58,13 @@ def proc_resample(data, field, flagger, freq, func="mean", max_invalid_total_d=n
# data/flags reshaping:
data[field] = datcol
reshaped_flagger = flagger.initFlags(datcol).setFlags(field, flag=flagscol, force=True, **kwargs)
flagger = flagger.getFlagger(drop=field).setFlagger(reshaped_flagger)
flagger = flagger.slice(drop=field).merge(reshaped_flagger)
return data, flagger
@register()
@register
def proc_transform(data, field, flagger, func, **kwargs):
data = data.copy()
func = composeFunction(func)
# NOTE: avoiding pd.Series.transform() in the line below, because transform does process columns element wise
# (so interpolations wouldn't work)
new_col = pd.Series(func(data[field]), index=data[field].index)
......
......@@ -193,8 +193,8 @@ def test_harmSingleVarInterpolations(data, flagger):
("bshift", "30Min", [-50.0, -37.5, 12.5, 50.0]),
("nshift", "15min", [np.nan, -37.5, -25.0, 12.5, 37.5, 50.0]),
("nshift", "30min", [np.nan, -37.5, 12.5, 50.0]),
("nagg", "15Min", [np.nan, -87.5, -25.0, 0.0, 37.5, 50.0]),
("nagg", "30Min", [np.nan, -87.5, -25.0, 87.5]),
#("nagg", "15Min", [-87.5, -25.0, 0.0, 37.5, 50.0]),
#("nagg", "30Min", [-87.5, -25.0, 87.5]),
("bagg", "15Min", [-50.0, -37.5, -37.5, 12.5, 37.5, 50.0]),
("bagg", "30Min", [-50.0, -75.0, 50.0, 50.0]),
]
......
......@@ -11,6 +11,7 @@ from saqc.funcs.proc_functions import (
proc_resample,
proc_transform
)
from saqc.lib.ts_operators import linearInterpolation, polynomialInterpolation
from test.common import TESTFLAGGER
......@@ -39,11 +40,12 @@ def test_transform(course_5, flagger):
field = data.columns[0]
data = dios.DictOfSeries(data)
flagger = flagger.initFlags(data)
data1, *_ = proc_transform(data, field, flagger, func='linear')
data1, *_ = proc_transform(data, field, flagger, func=linearInterpolation)
assert data1[field][characteristics['missing']].isna().all()
data1, *_ = proc_transform(data, field, flagger, func='linear$3')
data1, *_ = proc_transform(data, field, flagger, func=lambda x: linearInterpolation(x, inter_limit=3))
assert data1[field][characteristics['missing']].notna().all()
data1, *_ = proc_transform(data, field, flagger, func='polynomial$3$3')
data1, *_ = proc_transform(data, field, flagger, func=lambda x: polynomialInterpolation(x, inter_limit=3,
inter_order=3))
assert data1[field][characteristics['missing']].notna().all()
......@@ -53,7 +55,7 @@ def test_resample(course_5, flagger):
field = data.columns[0]
data = dios.DictOfSeries(data)
flagger = flagger.initFlags(data)
data1, *_ = proc_resample(data, field, flagger, '10min', 'mean', max_invalid_total_d=2, max_invalid_consec_d=1)
data1, *_ = proc_resample(data, field, flagger, '10min', np.mean, max_invalid_total_d=2, max_invalid_consec_d=1)
assert ~np.isnan(data1[field].iloc[0])
assert np.isnan(data1[field].iloc[1])
assert np.isnan(data1[field].iloc[2])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment