Skip to content
Snippets Groups Projects
Commit d418d18f authored by Bert Palm's avatar Bert Palm 🎇
Browse files

harm next steps

parent ce170c12
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!24Dios integration
Pipeline #3162 failed with stage
in 7 minutes and 12 seconds
...@@ -127,6 +127,7 @@ class BaseFlagger(ABC): ...@@ -127,6 +127,7 @@ class BaseFlagger(ABC):
If `force=False` (default) only flags with a lower priority are overwritten, If `force=False` (default) only flags with a lower priority are overwritten,
otherwise, if `force=True`, flags are overwritten unconditionally. otherwise, if `force=True`, flags are overwritten unconditionally.
""" """
assert "iloc" not in kwargs, "deprecated keyword, iloc"
assertScalar("field", field, optional=False) assertScalar("field", field, optional=False)
flag = self.BAD if flag is None else flag flag = self.BAD if flag is None else flag
...@@ -144,12 +145,16 @@ class BaseFlagger(ABC): ...@@ -144,12 +145,16 @@ class BaseFlagger(ABC):
def clearFlags(self, field: str, loc: LocT = None, **kwargs) -> BaseFlaggerT: def clearFlags(self, field: str, loc: LocT = None, **kwargs) -> BaseFlaggerT:
assertScalar("field", field, optional=False) assertScalar("field", field, optional=False)
if "force" in kwargs:
raise ValueError("Keyword 'force' is not allowed here.")
if "flag" in kwargs:
raise ValueError("Keyword 'flag' is not allowed here.")
return self.setFlags(field=field, loc=loc, flag=self.UNFLAGGED, force=True, **kwargs) return self.setFlags(field=field, loc=loc, flag=self.UNFLAGGED, force=True, **kwargs)
def isFlagged(self, field=None, loc: LocT = None, flag: FlagT = None, comparator: str = ">", **kwargs) -> PandasT: def isFlagged(self, field=None, loc: LocT = None, flag: FlagT = None, comparator: str = ">") -> PandasT:
assertScalar("flag", flag, optional=True) assertScalar("flag", flag, optional=True)
flag = self.GOOD if flag is None else flag flag = self.GOOD if flag is None else flag
flags = self.getFlags(field, loc, **kwargs) flags = self.getFlags(field, loc)
cp = COMPARATOR_MAP[comparator] cp = COMPARATOR_MAP[comparator]
# use notna() to prevent nans to become True, # use notna() to prevent nans to become True,
......
...@@ -5,6 +5,7 @@ import pdb ...@@ -5,6 +5,7 @@ import pdb
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import logging import logging
import dios
from saqc.funcs.functions import flagMissing from saqc.funcs.functions import flagMissing
from saqc.funcs.register import register from saqc.funcs.register import register
...@@ -113,9 +114,9 @@ def harmWrapper(heap={}): ...@@ -113,9 +114,9 @@ def harmWrapper(heap={}):
# finally we happily blow up the data and flags frame again, # finally we happily blow up the data and flags frame again,
# to release them on their ongoing journey through saqc. # to release them on their ongoing journey through saqc.
data, flagger_out = _toMerged( data, flagger_out = _toMerged(data, flagger, field,
data, flagger, field, data_to_insert=dat_col, flagger_to_insert=flagger_merged_clean_reshaped, **kwargs data_to_insert=dat_col,
) flagger_to_insert=flagger_merged_clean_reshaped, **kwargs)
return data, flagger_out return data, flagger_out
...@@ -194,6 +195,8 @@ def _outsortCrap( ...@@ -194,6 +195,8 @@ def _outsortCrap(
Depending on passed key word options the function will remove nan entries and as-suspicious-flagged values from Depending on passed key word options the function will remove nan entries and as-suspicious-flagged values from
the data and the flags passed. In deharmonization the function is used to reconstruct original flags field shape. the data and the flags passed. In deharmonization the function is used to reconstruct original flags field shape.
FIXME: parameter
:param data: pd.Series. ['data']. :param data: pd.Series. ['data'].
:param flagger: saqc.flagger. :param flagger: saqc.flagger.
:param drop_suspicious: Boolean. Default = True. If True, only values that are flagged GOOD or UNFLAGGED get :param drop_suspicious: Boolean. Default = True. If True, only values that are flagged GOOD or UNFLAGGED get
...@@ -207,6 +210,7 @@ def _outsortCrap( ...@@ -207,6 +210,7 @@ def _outsortCrap(
dropped. dropped.
If return_drops=True. Returns the dropped flags. If return_drops=True. Returns the dropped flags.
""" """
assert isinstance(data, pd.Series), "data must be pd.Series"
drop_mask = pd.Series(data=False, index=data.index) drop_mask = pd.Series(data=False, index=data.index)
...@@ -216,7 +220,7 @@ def _outsortCrap( ...@@ -216,7 +220,7 @@ def _outsortCrap(
flagger_out = flagger.getFlagger(loc=~drop_mask) flagger_out = flagger.getFlagger(loc=~drop_mask)
if return_drops: if return_drops:
return flagger.getFlags(loc=drop_mask), flagger_out return flagger.getFlags(field=field, loc=drop_mask), flagger_out
return data[~drop_mask], flagger_out return data[~drop_mask], flagger_out
...@@ -553,14 +557,15 @@ def _reshapeFlags( ...@@ -553,14 +557,15 @@ def _reshapeFlags(
direction = "nearest" direction = "nearest"
tolerance = pd.Timedelta(freq) / 2 tolerance = pd.Timedelta(freq) / 2
flags = flagger.getFlags().reindex(ref_index, tolerance=tolerance, method=direction, fill_value=np.nan) # if you want to keep previous comments
# only newly generated missing flags get commented:
# if you want to keep previous comments - only newly generated missing flags get commented: f = flagger.getFlags(field)
flags_series = flags.squeeze() flags_series = f.reindex(ref_index, tolerance=tolerance, method=direction, fill_value=np.nan)
flags = flagger.getFlags(loc=flags_series)
flagger_new = flagger.initFlags(flags=flags).setFlags( flagger_new = flagger.initFlags(flags=flags)
field, loc=flags_series.isna(), flag=missing_flag, force=True, **kwargs flagger_new.setFlags(field, loc=flags_series.isna(), flag=missing_flag, force=True, **kwargs)
)
if set_shift_comment: if set_shift_comment:
flagger_new = flagger_new.setFlags(field, flag=flags_series, force=True, **kwargs) flagger_new = flagger_new.setFlags(field, flag=flags_series, force=True, **kwargs)
...@@ -701,9 +706,10 @@ def _backtrackFlags(flagger_post, flagger_pre, freq, track_method="invert_fshift ...@@ -701,9 +706,10 @@ def _backtrackFlags(flagger_post, flagger_pre, freq, track_method="invert_fshift
def _fromMerged(data, flagger, fieldname): def _fromMerged(data, flagger, fieldname):
# we need a not-na mask for the flags data to be retrieved: """kill nans that came from an cross harmonisation"""
mask = flagger.getFlags(fieldname).notna() data_series = data[fieldname].notna()
return data.loc[mask[mask].index, fieldname], flagger.getFlagger(field=fieldname, loc=mask) # fixme flagger = flagger.getFlagger(field=fieldname, loc=data_series)
return data_series, flagger
def _toMerged(data, flagger, fieldname, data_to_insert, flagger_to_insert, target_index=None, **kwargs): def _toMerged(data, flagger, fieldname, data_to_insert, flagger_to_insert, target_index=None, **kwargs):
...@@ -712,16 +718,18 @@ def _toMerged(data, flagger, fieldname, data_to_insert, flagger_to_insert, targe ...@@ -712,16 +718,18 @@ def _toMerged(data, flagger, fieldname, data_to_insert, flagger_to_insert, targe
flags = flagger._flags flags = flagger._flags
flags_to_insert = flagger_to_insert._flags flags_to_insert = flagger_to_insert._flags
if isinstance(data, pd.Series): # this should never happen, but if this could happen in general,
data = data.to_frame() # the caller have to ensure, that we get a dios
assert not isinstance(data, pd.Series)
data.drop(fieldname, axis="columns", errors="ignore", inplace=True) data = data[data.columns.difference([fieldname])]
flags.drop(fieldname, axis="columns", errors="ignore", inplace=True) flags = flags[data.columns.difference([fieldname])]
# first case: there is no data, the data-to-insert would have # first case: there is no data, the data-to-insert would have
# to be merged with, and also are we not deharmonizing: # to be merged with, and also are we not deharmonizing:
if (data.empty) and (target_index is None): if data.empty and target_index is None:
return data_to_insert.to_frame(name=fieldname), flagger_to_insert data_to_insert.name = fieldname
return dios.DictOfSeries(data_to_insert), flagger_to_insert
# if thats not the case: generate the drop mask for the remaining data: # if thats not the case: generate the drop mask for the remaining data:
mask = data.isna().all(axis=1) mask = data.isna().all(axis=1)
......
...@@ -313,22 +313,23 @@ def test_gridInterpolation(data, method): ...@@ -313,22 +313,23 @@ def test_gridInterpolation(data, method):
def test_outsortCrap(data, flagger): def test_outsortCrap(data, flagger):
field = data.columns[0] field = data.columns[0]
s = data[field]
flagger = flagger.initFlags(data) flagger = flagger.initFlags(data)
flagger = flagger.setFlags(field, iloc=slice(5, 7))
drop_index = data.index[5:7] drop_index = s.index[5:7]
d, _ = _outsortCrap(data, field, flagger, drop_flags=flagger.BAD) flagger = flagger.setFlags(field, loc=drop_index)
assert drop_index.difference(d.index).equals(drop_index) res, _ = _outsortCrap(s, field, flagger, drop_flags=flagger.BAD)
assert drop_index.difference(res.index).equals(drop_index)
flagger = flagger.setFlags(field, iloc=slice(0, 1), flag=flagger.GOOD) flagger = flagger.setFlags(field, loc=s.iloc[0:1], flag=flagger.GOOD)
drop_index = drop_index.insert(-1, data.index[0]) drop_index = drop_index.insert(-1, s.index[0])
d, _ = _outsortCrap(data, field, flagger, drop_flags=[flagger.BAD, flagger.GOOD],) to_drop = [flagger.BAD, flagger.GOOD]
assert drop_index.sort_values().difference(d.index).equals(drop_index.sort_values()) res, _ = _outsortCrap(s, field, flagger, drop_flags=to_drop)
assert drop_index.sort_values().difference(res.index).equals(drop_index.sort_values())
res, _ = _outsortCrap(s, field, flagger, drop_flags=to_drop, return_drops=True)
assert res.index.sort_values().equals(drop_index.sort_values())
f_drop, _ = _outsortCrap(
data, field, flagger, drop_flags=[flagger.BAD, flagger.GOOD], return_drops=True,
)
assert f_drop.index.sort_values().equals(drop_index.sort_values())
@pytest.mark.parametrize("flagger", TESTFLAGGER) @pytest.mark.parametrize("flagger", TESTFLAGGER)
def test_wrapper(data, flagger): def test_wrapper(data, flagger):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment