Skip to content
Snippets Groups Projects
Commit af25b889 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

added new modelling func: rolling mean.

parent 42f8cdf5
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!49Dataprocessing features
......@@ -21,7 +21,6 @@ def modelling_polyFit(data, field, flagger, winsz, polydeg, numba='auto', eval_f
Note, that calculating the residues tends to be quite cost intensive - because a function fitting is perfomed for every
sample. To improve performance, consider the following possibillities:
data, flagger = modelling_polyFit(SEEFOdata, 'efield', flagger, '1h', 2, eval_flags=False)
In case your data is sampled at an equidistant frequency grid:
......@@ -88,7 +87,7 @@ data, flagger = modelling_polyFit(SEEFOdata, 'efield', flagger, '1h', 2, eval_fl
pos = x.index[int(len(x) - y[x.index[-1]])]
return y.index.get_loc(pos)
centers_iloc = centers.rolling('1h', closed='both').apply(center_func, raw=False).astype(int)
centers_iloc = centers.rolling(winsz, closed='both').apply(center_func, raw=False).astype(int)
temp = residues.copy()
for k in centers_iloc.iteritems():
residues.iloc[k[1]] = temp[k[0]]
......@@ -157,3 +156,83 @@ data, flagger = modelling_polyFit(SEEFOdata, 'efield', flagger, '1h', 2, eval_fl
flagger = flagger.setFlags(field, flags=to_flag, **kwargs)
return data, flagger
@register
def modelling_rollingMean(data, field, flagger, winsz, eval_flags=True, min_periods=0, center=True, **kwargs):
"""
Models the timeseries passed with the rolling mean.
Parameters
----------
winsz : integer or offset String
The size of the window you want to roll with. If an integer is passed, the size
refers to the number of periods for every fitting window. If an offset string is passed,
the size refers to the total temporal extension.
For regularly sampled timeseries, the period number will be casted down to an odd number if
center = True.
eval_flags : boolean, default True
Wheather or not to assign new flags to the calculated residuals. If True, a residual gets assigned the worst
flag present in the interval, the data for its calculation was obtained from.
Currently not implemented in combination with not-harmonized timeseries.
min_periods : integer, default 0
The minimum number of periods, that has to be available in every values fitting surrounding for the mean
fitting to be performed. If there are not enough values, np.nan gets assigned. Default (0) results in fitting
regardless of the number of values present.
center : boolean, default True
Wheather or not to center the window the mean is calculated of around the reference value. If False,
the reference value is placed to the right of the window (classic rolling mean with lag.)
kwargs
Returns
-------
"""
data = data.copy()
to_fit = data[field]
flags = flagger.getFlags(field)
# starting with the annoying case: finding the rolling interval centers of not-harmonized input time series:
if (to_fit.index.freqstr is None) and center:
if isinstance(winsz, int):
raise NotImplementedError('Integer based window size is not supported for not-harmonized'
'sample series when rolling with "center=True".')
# get interval centers
centers = np.floor((to_fit.rolling(pd.Timedelta(winsz) / 2, closed='both', min_periods=min_periods).count()))
centers = centers.drop(centers[centers.isna()].index)
centers = centers.astype(int)
means = to_fit.rolling(pd.Timedelta(winsz), closed='both', min_periods=min_periods).mean()
def center_func(x, y=centers):
pos = x.index[int(len(x) - y[x.index[-1]])]
return y.index.get_loc(pos)
centers_iloc = centers.rolling(winsz, closed='both').apply(center_func, raw=False).astype(int)
temp = means.copy()
for k in centers_iloc.iteritems():
means.iloc[k[1]] = temp[k[0]]
# last values are false, due to structural reasons:
means[means.index[centers_iloc[-1]]:means.index[-1]] = np.nan
# everything is more easy if data[field] is harmonized:
else:
if isinstance(winsz, str):
winsz = int(np.floor(pd.Timedelta(winsz) / pd.Timedelta(to_fit.index.freqstr)))
if (winsz % 2 == 0) & center:
winsz = int(winsz - 1)
means = to_fit.rolling(window=winsz, center=center, closed='both').mean()
residues = means - to_fit
data[field] = residues
if eval_flags:
num_cats, codes = flags.factorize()
num_cats = pd.Series(num_cats, index=flags.index).rolling(winsz, center=True, min_periods=min_periods).max()
nan_samples = num_cats[num_cats.isna()]
num_cats.drop(nan_samples.index, inplace=True)
to_flag = pd.Series(codes[num_cats.astype(int)], index=num_cats.index)
to_flag = to_flag.align(nan_samples)[0]
to_flag[nan_samples.index] = flags[nan_samples.index]
flagger = flagger.setFlags(field, flags=to_flag, **kwargs)
return data, flagger
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment