Skip to content
Snippets Groups Projects
Commit 96af27c4 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

docstrings in changepoints.py

parent 0a4a002f
No related branches found
No related tags found
7 merge requests!685Release 2.4,!684Release 2.4,!567Release 2.2.1,!566Release 2.2,!501Release 2.1,!372fix doctest snippets,!355docstring cleanup - part1
......@@ -20,7 +20,6 @@ from dios import DictOfSeries
from saqc.constants import *
from saqc.lib.tools import groupConsecutives
from saqc.lib.types import FreqString
from saqc.funcs.changepoints import _assignChangePointCluster
from saqc.core.flags import Flags
from saqc.core.register import _isflagged, register, flagging
......@@ -80,8 +79,8 @@ def flagIsolated(
data: DictOfSeries,
field: str,
flags: Flags,
gap_window: FreqString,
group_window: FreqString,
gap_window: str,
group_window: str,
flag: float = BAD,
**kwargs
) -> Tuple[DictOfSeries, Flags]:
......@@ -166,7 +165,7 @@ def flagJumps(
field: str,
flags: Flags,
thresh: float,
window: FreqString,
window: str,
min_periods: int = 1,
flag: float = BAD,
**kwargs
......
......@@ -15,7 +15,6 @@ from saqc.constants import *
from saqc.core.register import flagging
from saqc.lib.tools import customRoller, filterKwargs
from saqc.core import register, Flags
from saqc.lib.types import FreqString
@flagging()
......@@ -25,15 +24,17 @@ def flagChangePoints(
flags: Flags,
stat_func: Callable[[np.ndarray, np.ndarray], float],
thresh_func: Callable[[np.ndarray, np.ndarray], float],
window: FreqString | Tuple[FreqString, FreqString],
window: str | Tuple[str, str],
min_periods: int | Tuple[int, int],
closed: Literal["right", "left", "both", "neither"] = "both",
reduce_window: FreqString = None,
reduce_window: str = None,
reduce_func: Callable[[np.ndarray, np.ndarray], int] = lambda x, _: x.argmax(),
flag: float = BAD,
**kwargs,
) -> Tuple[DictOfSeries, Flags]:
"""
Flag data where it significantly changes.
Flag data points, where the parametrization of the process, the data is assumed to
generate by, significantly changes.
......@@ -42,26 +43,29 @@ def flagChangePoints(
Parameters
----------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
The data container.
field : str
The reference variable, the deviation from wich determines the flagging.
A column in flags and data.
flags : saqc.flags
A flags object, holding flags and additional information related to `data`.
flags : saqc.Flags
The flags container.
stat_func : Callable[numpy.array, numpy.array]
stat_func : Callable
A function that assigns a value to every twin window. The backward-facing
window content will be passed as the first array, the forward-facing window
content as the second.
thresh_func : Callable[numpy.array, numpy.array]
thresh_func : Callable
A function that determines the value level, exceeding wich qualifies a
timestamps func value as denoting a change-point.
window : str, tuple of string
Size of the rolling windows the calculation is performed in. If it is a single
frequency offset, it applies for the backward- and the forward-facing window.
window : str, tuple of str
Size of the moving windows. This is the number of observations used for
calculating the statistic.
If it is a single frequency offset, it applies for the backward- and the
forward-facing window.
If two offsets (as a tuple) is passed the first defines the size of the
backward facing window, the second the size of the forward facing window.
......@@ -74,25 +78,35 @@ def flagChangePoints(
closed : {'right', 'left', 'both', 'neither'}, default 'both'
Determines the closure of the sliding windows.
reduce_window : {None, str}, default None
The sliding window search method is not an exact CP search method and usually there wont be
detected a single changepoint, but a "region" of change around a changepoint.
reduce_window : str or None, default None
The sliding window search method is not an exact CP search method and usually
there wont be detected a single changepoint, but a "region" of change around
a changepoint.
If `reduce_window` is given, for every window of size `reduce_window`, there
will be selected the value with index `reduce_func(x, y)` and the others will be dropped.
If `reduce_window` is None, the reduction window size equals the
twin window size, the changepoints have been detected with.
will be selected the value with index `reduce_func(x, y)` and the others will
be dropped.
reduce_func : Callable[[numpy.ndarray, numpy.ndarray], int], default lambda x, y: x.argmax()
If `reduce_window` is None, the reduction window size equals the twin window
size, the changepoints have been detected with.
reduce_func : Callable, default ``lambda x, y: x.argmax()``
A function that must return an index value upon input of two arrays x and y.
First input parameter will hold the result from the stat_func evaluation for every
reduction window. Second input parameter holds the result from the thresh_func evaluation.
The default reduction function just selects the value that maximizes the stat_func.
First input parameter will hold the result from the stat_func evaluation for
every reduction window. Second input parameter holds the result from the
`thresh_func` evaluation.
The default reduction function just selects the value that maximizes the
`stat_func`.
flag : float, default BAD
flag to set.
Returns
-------
data : dios.DictOfSeries
Unmodified data container
flags : saqc.Flags
The flags container
"""
return _assignChangePointCluster(
data,
......@@ -129,12 +143,15 @@ def assignChangePointCluster(
**kwargs,
):
"""
Label data where it changes significantly.
The labels will be stored in data. Unless `target` is given the labels will
overwrite the data in `field`. The flags will always set to `UNFLAGGED`.
Assigns label to the data, aiming to reflect continuous regimes of the processes
the data is assumed to be generated by. The regime change points detection is
based on a sliding window search.
Note, that the cluster labels will be stored to the `field` field of the input
data, so that the data that is clustered gets overridden.
Parameters
----------
......@@ -188,10 +205,14 @@ def assignChangePointCluster(
that maximizes the stat_func.
model_by_resids : bool, default False
If True, the stat_funcs results are written, otherwise the regime labels.
If True, the results of `stat_funcs` are written, otherwise the regime labels.
Returns
-------
data : dios.DictOfSeries
Modified data.
flags : saqc.Flags
The flags container
"""
reserved = ["assign_cluster", "set_flags", "flag"]
kwargs = filterKwargs(kwargs, reserved)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment