Skip to content
Snippets Groups Projects
Commit 81ade885 authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

working on NaN marking of interpol chunk bounds...in progress

parent cc9bb117
No related branches found
No related tags found
No related merge requests found
......@@ -89,7 +89,7 @@ def harmWrapper(heap={}):
)
# interpolation! (yeah)
dat_col = _interpolateGrid(
dat_col, chunk_bounds = _interpolateGrid(
dat_col,
freq,
method=inter_method,
......@@ -108,6 +108,7 @@ def harmWrapper(heap={}):
agg_method=reshape_agg,
missing_flag=reshape_missing_flag,
set_shift_comment=reshape_shift_comment,
block_flags=chunk_bounds,
**kwargs
)
......@@ -325,6 +326,7 @@ def _interpolateGrid(
:return: pd.DataFrame. ['data'].
"""
chunk_bounds = None
aggregations = ["nearest_agg", "bagg", "fagg"]
shifts = ["fshift", "bshift", "nearest_shift"]
interpolations = [
......@@ -392,7 +394,7 @@ def _interpolateGrid(
elif method in interpolations:
data = _insertGrid(data, freq)
data = _interpolate(
data, chunk_bounds = _interpolate(
data,
method,
order=order,
......@@ -412,7 +414,7 @@ def _interpolateGrid(
if total_range is not None:
data = data.reindex(total_index)
return data
return data, chunk_bounds
def _interpolate(data, method, order=2, inter_limit=2, downcast_interpolation=False):
......@@ -450,6 +452,11 @@ def _interpolate(data, method, order=2, inter_limit=2, downcast_interpolation=Fa
.replace(np.nan, True)
.astype(bool)
)
# start end ending points of interpolation chunks have to be memorized to block their flagging:
chunk_switches = gap_mask.astype(int).diff()
chunk_starts = chunk_switches[chunk_switches == -1].index
chunk_ends = chunk_switches[(chunk_switches.shift(-1) == 1)].index
chunk_bounds = chunk_starts.join(chunk_ends, how='outer', sort=True)
data = data[gap_mask]
......@@ -486,7 +493,7 @@ def _interpolate(data, method, order=2, inter_limit=2, downcast_interpolation=Fa
# squeezing the 1-dimensional frame resulting from groupby for consistency reasons
data = data.squeeze(axis=1)
data.name = dat_name
return data
return data, chunk_bounds
def _reshapeFlags(
......@@ -497,6 +504,7 @@ def _reshapeFlags(
agg_method=max,
missing_flag=None,
set_shift_comment=True,
block_flags=None,
**kwargs
):
"""To continue processing flags after harmonization/interpolation, old pre-harm flags have to be distributed onto
......@@ -536,6 +544,9 @@ def _reshapeFlags(
however, the methods used, do not allow to 'reflag' and apply eventually passed **kwargs.
Setting set_shift_comment to True, **kwargs will be applied, but the whole process will slow
down significantly.
:block_flags: DatetimeIndex. A DatetimeIndex containing labels that will get the "nan-flag" assigned.
This option mainly is introduced to account for the backtracking inconsistencies at the end
and beginning of interpolation chunks.
:return: flags: pd.Series/pd.DataFrame. The reshaped pandas like Flags object, referring to the harmonized data.
"""
......@@ -565,7 +576,10 @@ def _reshapeFlags(
# if you want to keep previous comments - only newly generated missing flags get commented:
flags_series = flags.squeeze()
# block flagging/backtracking of chunk_starts/chunk_ends
if block_flags is not None:
flags_series[block_flags] = np.nan
# TODO: - here nan values get casted to missing_flag!!!-> thats why interpol chunks wont be properly nan-marked!!
flagger_new = flagger.initFlags(flags=flags).setFlags(
field, loc=flags_series.isna(), flag=missing_flag, force=True, **kwargs
)
......@@ -623,6 +637,10 @@ def _reshapeFlags(
if ref_index[-1] != flags.index[-1]:
flags = flags.append(pd.Series(data=flagger.BAD, index=[ref_index[-1]]).astype(flagger.dtype))
# block flagging/backtracking of chunk_starts/chunk_ends
if block_flags is not None:
flags[block_flags] = np.nan
flagger_new = flagger.initFlags(flags=flags.to_frame(name=field))
else:
......
......@@ -36,7 +36,7 @@ INTERPOLATIONS2 = ["fagg", "time", "polynomial"]
FREQS = ["15min", "30min"]
@pytest.fixture
#@pytest.fixture
def data():
index = pd.date_range(
start="1.1.2011 00:00:00", end="1.1.2011 01:00:00", freq="15min"
......@@ -222,7 +222,7 @@ def test_harmSingleVarInterpolations(data, flagger, interpolation, freq):
data, flagger = deharmonize(data, "data", flagger, co_flagging=True)
data, flagger = deharmonize(data, "data", flagger, co_flagging=True)
#data, flagger = deharmonize(data, "data", flagger, co_flagging=True)
flags = flagger.getFlags()
assert pre_data.equals(data)
......@@ -343,3 +343,12 @@ def test_wrapper(data, flagger):
flag_agg_func="max", drop_flags=None)
shift2Grid(data, field, flagger, freq, shift_method='nearest_shift', drop_flags=None)
if __name__ == "__main__":
dat = data()
dat2 = dat.shift(1, '91min')
dat = dat.append(dat2)
dat = dat.drop(dat.index[8])
flagger = TESTFLAGGER[2]
interpolation = 'linear'
freq="15min"
test_harmSingleVarInterpolations(dat, flagger, interpolation, freq)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment