Skip to content
Snippets Groups Projects
Commit b7f296d6 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

renamed 'to_mask' to 'dfilter'

parent 76cdc2b2
No related branches found
No related tags found
2 merge requests!370Release 2.0,!366renamed 'to_mask' to 'dfilter'
Pipeline #53570 passed with stage
in 2 minutes and 57 seconds
Showing with 66 additions and 66 deletions
......@@ -176,7 +176,7 @@ class SaQC(FunctionsMixin):
prepare user function input:
- expand fields and targets
- translate user given ``flag`` values or set the default ``BAD``
- translate user given ``to_mask`` values or set the scheme default
- translate user given ``dfilter`` values or set the scheme default
- dependeing on the workflow: initialize ``target`` variables
Here we add the following parameters to all registered functions, regardless
......@@ -195,7 +195,7 @@ class SaQC(FunctionsMixin):
**kwargs,
) -> SaQC:
kwargs.setdefault("to_mask", self._scheme.TO_MASK)
kwargs.setdefault("dfilter", self._scheme.DFILTER_DEFAULT)
if not isinstance(flag, OptionalNone):
# translation schemes might want to use a flag,
......
......@@ -8,7 +8,7 @@ import saqc
class Breaks:
def flagMissing(
self, field: str, flag: float = BAD, to_mask: float = UNFLAGGED, **kwargs
self, field: str, flag: float = BAD, dfilter: float = UNFLAGGED, **kwargs
) -> saqc.SaQC:
return self._defer("flagMissing", locals())
......
......@@ -16,7 +16,7 @@ class Generic:
func: GenericFunction,
target: str | Sequence[str] = None,
flag: float = UNFLAGGED,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
**kwargs,
) -> saqc.SaQC:
return self._defer("processGeneric", locals())
......@@ -27,7 +27,7 @@ class Generic:
func: GenericFunction,
target: Union[str, Sequence[str]] = None,
flag: float = BAD,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
**kwargs,
) -> saqc.SaQC:
return self._defer("flagGeneric", locals())
......@@ -42,7 +42,7 @@ class Tools:
phaseplot: Optional[str] = None,
stats_dict: Optional[dict] = None,
store_kwargs: Optional[dict] = None,
to_mask: Optional[float] = np.inf,
dfilter: Optional[float] = np.inf,
**kwargs,
) -> saqc.SaQC:
return self._defer("plot", locals())
......@@ -184,10 +184,10 @@ class FunctionWrapper:
@staticmethod
def _checkKwargs(kwargs: dict) -> dict[str, Any]:
if "to_mask" in kwargs and not isinstance(
kwargs["to_mask"], (bool, float, int)
if "dfilter" in kwargs and not isinstance(
kwargs["dfilter"], (bool, float, int)
):
raise TypeError(f"'to_mask' must be of type bool or float")
raise TypeError(f"'dfilter' must be of type bool or float")
return kwargs
def _prepareArgs(self) -> Tuple[tuple, dict[str, Any]]:
......@@ -201,7 +201,7 @@ class FunctionWrapper:
keyword-arguments to be passed to the actual call
"""
kwargs = self.kwargs.copy()
kwargs["to_mask"] = self.mask_thresh
kwargs["dfilter"] = self.mask_thresh
# always pass a list to multivariate functions and
# unpack single element lists for univariate functions
......@@ -215,7 +215,7 @@ class FunctionWrapper:
def _getMaskingThresh(self) -> float:
"""
Generate a float threshold by the value of the `to_mask` keyword
Generate a float threshold by the value of the `dfilter` keyword
Returns
-------
......@@ -224,13 +224,13 @@ class FunctionWrapper:
Notes
-----
If ``to_mask`` is **not** in the kwargs, the threshold defaults to
If ``dfilter`` is **not** in the kwargs, the threshold defaults to
- ``-np.inf``
If a floatish ``to_mask`` is found in the kwargs, this value is taken as the threshold.
If a floatish ``dfilter`` is found in the kwargs, this value is taken as the threshold.
"""
if "to_mask" not in self.kwargs:
if "dfilter" not in self.kwargs:
return UNFLAGGED
return float(self.kwargs["to_mask"]) # handle int
return float(self.kwargs["dfilter"]) # handle int
def _createMeta(self) -> dict:
return {
......@@ -368,7 +368,7 @@ def register(
Generalized decorator for any saqc functions.
Before the call of the decorated function:
- data gets masked by flags according to `to_mask`
- data gets masked by flags according to `dfilter`
After the call of the decorated function:
- data gets demasked (original data is written back)
......@@ -383,7 +383,7 @@ def register(
The masking takes place before the call of the decorated function and
temporary sets data to `NaN` at flagged locations. It is undone by ``demask``.
The threshold of which data is considered to be flagged can be controlled
via ``to_mask``, a parameter each function takes.
via ``dfilter``, a parameter each function takes.
demask : list of string
A list of all parameter of the decorated function, that specify a column in
......@@ -430,7 +430,7 @@ def flagging(**kwargs):
Default decorator for univariate flagging functions.
Before the call of the decorated function:
- `data[field]` gets masked by `flags[field]` according to `to_mask`
- `data[field]` gets masked by `flags[field]` according to `dfilter`
After the call of the decorated function:
- `data[field]` gets demasked (original data is written back)
- `flags[field]` gets squeezed (only one history column append per call) if needed
......
......@@ -49,7 +49,7 @@ class TranslationScheme:
"""
# (internal) threshold flag above which values will be masked
TO_MASK: float = UNFLAGGED
DFILTER_DEFAULT: float = UNFLAGGED
# additional arguments and default values the translation scheme accepts
ARGUMENTS: Dict[str, Any] = {}
......
......@@ -27,7 +27,7 @@ class PositionalScheme(TranslationScheme):
Implements the translation from and to the flagging scheme implemented by CHS
"""
TO_MASK = DOUBTFUL + 1
DFILTER_DEFAULT = DOUBTFUL + 1
_FORWARD: ForwardMap = {
-6: UNFLAGGED,
......
......@@ -30,7 +30,7 @@ def flagMissing(
field: str,
flags: Flags,
flag: float = BAD,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
**kwargs
) -> Tuple[DictOfSeries, Flags]:
"""
......@@ -58,7 +58,7 @@ def flagMissing(
datacol = data[field]
mask = datacol.isna()
mask = ~_isflagged(flags[field], to_mask) & mask
mask = ~_isflagged(flags[field], dfilter) & mask
flags[mask, field] = flag
return data, flags
......
......@@ -73,7 +73,7 @@ def clearFlags(
Notes
-----
This function ignores the ``to_mask`` keyword, because the data is not relevant
This function ignores the ``dfilter`` keyword, because the data is not relevant
for processing.
A warning is triggered if the ``flag`` keyword is given, because the flags are
always set to `UNFLAGGED`.
......@@ -122,7 +122,7 @@ def flagUnflagged(
Notes
-----
This function ignores the ``to_mask`` keyword, because the data is not relevant
This function ignores the ``dfilter`` keyword, because the data is not relevant
for processing.
See Also
......
......@@ -18,11 +18,11 @@ from saqc.core.register import register, _isflagged, FunctionWrapper
def _prepare(
data: DictOfSeries, flags: Flags, columns: Sequence[str], to_mask: float
data: DictOfSeries, flags: Flags, columns: Sequence[str], dfilter: float
) -> Tuple[DictOfSeries, Flags]:
fchunk = Flags({f: flags[f] for f in columns})
dchunk, _ = FunctionWrapper._maskData(
data=data.loc[:, columns].copy(), flags=fchunk, columns=columns, thresh=to_mask
data=data.loc[:, columns].copy(), flags=fchunk, columns=columns, thresh=dfilter
)
return dchunk, fchunk.copy()
......@@ -31,11 +31,11 @@ def _execGeneric(
flags: Flags,
data: PandasLike,
func: GenericFunction,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
) -> DictOfSeries:
globs = {
"isflagged": lambda data: _isflagged(flags[data.name], thresh=to_mask),
"isflagged": lambda data: _isflagged(flags[data.name], thresh=dfilter),
**ENVIRONMENT,
}
......@@ -62,7 +62,7 @@ def processGeneric(
func: GenericFunction,
target: str | Sequence[str] = None,
flag: float = UNFLAGGED,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
**kwargs,
) -> Tuple[DictOfSeries, Flags]:
"""
......@@ -99,8 +99,8 @@ def processGeneric(
The quality flag to set. The default ``UNFLAGGED`` states the general idea, that
``processGeneric`` generates 'new' data without direct relation to the potentially
already present flags.
to_mask: float, default ``UNFLAGGED``
Threshold flag. Flag values greater than ``to_mask`` indicate that the associated
dfilter: float, default ``UNFLAGGED``
Threshold flag. Flag values greater than ``dfilter`` indicate that the associated
data value is inappropiate for further usage.
Returns
......@@ -127,8 +127,8 @@ def processGeneric(
fields = toSequence(field)
targets = fields if target is None else toSequence(target)
dchunk, fchunk = _prepare(data, flags, fields, to_mask)
result = _execGeneric(fchunk, dchunk, func, to_mask=to_mask)
dchunk, fchunk = _prepare(data, flags, fields, dfilter)
result = _execGeneric(fchunk, dchunk, func, dfilter=dfilter)
meta = {
"func": "procGeneric",
......@@ -138,7 +138,7 @@ def processGeneric(
"func": func.__name__,
"target": target,
"flag": flag,
"to_mask": to_mask,
"dfilter": dfilter,
},
}
......@@ -176,7 +176,7 @@ def flagGeneric(
func: GenericFunction,
target: Union[str, Sequence[str]] = None,
flag: float = BAD,
to_mask: float = UNFLAGGED,
dfilter: float = UNFLAGGED,
**kwargs,
) -> Tuple[DictOfSeries, Flags]:
"""
......@@ -209,8 +209,8 @@ def flagGeneric(
The quality flag to set. The default ``UNFLAGGED`` states the general idea, that
``processGeneric`` generates 'new' data without direct relation to the potentially
already present flags.
to_mask: float, default ``UNFLAGGED``
Threshold flag. Flag values greater than ``to_mask`` indicate that the associated
dfilter: float, default ``UNFLAGGED``
Threshold flag. Flag values greater than ``dfilter`` indicate that the associated
data value is inappropiate for further usage.
Returns
......@@ -243,8 +243,8 @@ def flagGeneric(
fields = toSequence(field)
targets = fields if target is None else toSequence(target)
dchunk, fchunk = _prepare(data, flags, fields, to_mask)
result = _execGeneric(fchunk, dchunk, func, to_mask=to_mask)
dchunk, fchunk = _prepare(data, flags, fields, dfilter)
result = _execGeneric(fchunk, dchunk, func, dfilter=dfilter)
if len(targets) != len(result.columns):
raise ValueError(
......@@ -262,7 +262,7 @@ def flagGeneric(
"func": func.__name__,
"target": target,
"flag": flag,
"to_mask": to_mask,
"dfilter": dfilter,
},
}
......@@ -279,7 +279,7 @@ def flagGeneric(
if col not in data:
data[col] = pd.Series(np.nan, index=maskcol.index)
maskcol = maskcol & ~_isflagged(flags[col], to_mask)
maskcol = maskcol & ~_isflagged(flags[col], dfilter)
flagcol = maskcol.replace({False: np.nan, True: flag}).astype(float)
# we need equal indices to work on
......
......@@ -277,7 +277,7 @@ def interpolateIndex(
# todo:
# in future we could use `register(mask=[field], [], [])`
# and dont handle masking manually here
flagged = _isflagged(flags[field], kwargs["to_mask"])
flagged = _isflagged(flags[field], kwargs["dfilter"])
# drop all points that hold no relevant grid information
datcol = datcol[~flagged].dropna()
......
......@@ -141,7 +141,7 @@ def interpolate(
)
# for @processing this would need to handle to_mask
# for @processing this would need to handle dfilter
@register(mask=["field"], demask=[], squeeze=[])
def shift(
data: DictOfSeries,
......@@ -219,7 +219,7 @@ def shift(
return data, flags
# for @processing this would need to handle to_mask
# for @processing this would need to handle dfilter
@register(mask=["field"], demask=[], squeeze=[])
def resample(
data: DictOfSeries,
......@@ -530,7 +530,7 @@ def concatFlags(
elif method[-5:] == "shift":
drop_mask = target_datcol.isna() | _isflagged(
target_flagscol, kwargs["to_mask"]
target_flagscol, kwargs["dfilter"]
)
projection_method = METHOD2ARGS[method][0]
tolerance = METHOD2ARGS[method][1](freq)
......
......@@ -231,15 +231,15 @@ def maskTime(
datcol_idx = data[field].index
if mode == "periodic":
to_mask = periodicMask(datcol_idx, start, end, closed)
dfilter = periodicMask(datcol_idx, start, end, closed)
elif mode == "mask_field":
idx = data[mask_field].index.intersection(datcol_idx)
to_mask = data.loc[idx, mask_field]
dfilter = data.loc[idx, mask_field]
else:
raise ValueError("Keyword passed as masking mode is unknown ({})!".format(mode))
data.aloc[to_mask, field] = np.nan
flags[to_mask, field] = UNFLAGGED
data.aloc[dfilter, field] = np.nan
flags[dfilter, field] = UNFLAGGED
return data, flags
......@@ -256,7 +256,7 @@ def plot(
phaseplot: Optional[str] = None,
stats_dict: Optional[dict] = None,
store_kwargs: Optional[dict] = None,
to_mask: float = np.inf,
dfilter: float = np.inf,
**kwargs,
):
"""
......@@ -350,9 +350,9 @@ def plot(
interactive = path is None
level = kwargs.get("flag", BAD)
if to_mask < np.inf:
if dfilter < np.inf:
data = data.copy()
data.loc[flags[field] >= to_mask, field] = np.nan
data.loc[flags[field] >= dfilter, field] = np.nan
if store_kwargs is None:
store_kwargs = {}
......
......@@ -175,7 +175,7 @@ def periodicMask(dtindex, season_start, season_end, include_bounds):
Returns
-------
to_mask : pandas.Series[bool]
dfilter : pandas.Series[bool]
A series, indexed with the input index and having value `True` for all the values that are to be masked.
Examples
......
......@@ -225,8 +225,8 @@ def _buildupSaQCObjects():
out = []
for _ in range(2):
saqc = SaQC(data=data, flags=flags)
saqc = saqc.flagRange(field=col, min=5, max=6, to_mask=np.inf).flagRange(
col, min=3, max=10, to_mask=np.inf
saqc = saqc.flagRange(field=col, min=5, max=6, dfilter=np.inf).flagRange(
col, min=3, max=10, dfilter=np.inf
)
flags = saqc._flags
out.append(saqc)
......
......@@ -35,7 +35,7 @@ def test_writeTargetFlagGeneric(data):
"func": func.__name__,
"target": targets,
"flag": BAD,
"to_mask": UNFLAGGED,
"dfilter": UNFLAGGED,
},
}
......@@ -67,7 +67,7 @@ def test_overwriteFieldFlagGeneric(data):
"target": fields,
"func": func.__name__,
"flag": flag,
"to_mask": UNFLAGGED,
"dfilter": UNFLAGGED,
},
}
......@@ -100,7 +100,7 @@ def test_writeTargetProcGeneric(data):
(["tmp"], lambda x, y: x + y),
(["tmp1", "tmp2"], lambda x, y: (x + y, y * 2)),
]
to_mask = 128
dfilter = 128
for targets, func in params:
expected_data = DictOfSeries(
......@@ -115,7 +115,7 @@ def test_writeTargetProcGeneric(data):
"target": targets,
"func": func.__name__,
"flag": BAD,
"to_mask": to_mask,
"dfilter": dfilter,
},
}
saqc = SaQC(
......@@ -125,7 +125,7 @@ def test_writeTargetProcGeneric(data):
),
)
res = saqc.processGeneric(
field=fields, target=targets, func=func, flag=BAD, to_mask=to_mask
field=fields, target=targets, func=func, flag=BAD, dfilter=dfilter
)
assert (expected_data == res.data[targets].squeeze()).all(axis=None)
# check that new histories where created
......@@ -139,7 +139,7 @@ def test_overwriteFieldProcGeneric(data):
(["var1"], lambda x: x * 2),
(["var1", "var2"], lambda x, y: (x + y, y * 2)),
]
to_mask = 128
dfilter = 128
flag = 12
for fields, func in params:
expected_data = DictOfSeries(
......@@ -154,7 +154,7 @@ def test_overwriteFieldProcGeneric(data):
"target": fields,
"func": func.__name__,
"flag": flag,
"to_mask": to_mask,
"dfilter": dfilter,
},
}
......@@ -165,7 +165,7 @@ def test_overwriteFieldProcGeneric(data):
),
)
res = saqc.processGeneric(field=fields, func=func, flag=flag, to_mask=to_mask)
res = saqc.processGeneric(field=fields, func=func, flag=flag, dfilter=dfilter)
assert (expected_data == res.data[fields].squeeze()).all(axis=None)
# check that the histories got appended
for field in fields:
......
......@@ -116,10 +116,10 @@ def test_unmaskingInvertsMasking(data_field_flags):
# data_in, field, flags = data_field_flags
# data_masked, mask = _maskData(data_in, flags, columns=[field], to_mask=flags.BAD)
# data_masked, mask = _maskData(data_in, flags, columns=[field], dfilter=flags.BAD)
# func, kwargs = func_kwargs
# data_masked, _ = func(data_masked, field, flags, **kwargs)
# data_out = _unmaskData(data_in, mask, data_masked, flags, to_mask=flags.BAD)
# data_out = _unmaskData(data_in, mask, data_masked, flags, dfilter=flags.BAD)
# flags_in = flags.isFlagged(flag=flags.BAD)
# assert data_in.aloc[flags_in].equals(data_out.aloc[flags_in])
......@@ -139,7 +139,7 @@ def test_unmaskingInvertsMasking(data_field_flags):
# flagged_in = flags.isFlagged(flag=flags.BAD, comparator=">=")
# # mask and call
# data_left, _ = _maskData(data, flags, columns=[field], to_mask=flags.BAD)
# data_left, _ = _maskData(data, flags, columns=[field], dfilter=flags.BAD)
# data_left, _ = func(data_left, field, flags, **kwargs)
# # remove and call
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment