Skip to content
Snippets Groups Projects
Commit d01b2fab authored by Peter Lünenschloß's avatar Peter Lünenschloß
Browse files

handling empty dit mat case

parent 276df1f9
No related branches found
No related tags found
3 merge requests!193Release 1.4,!188Release 1.4,!138WIP: Detect and reset offset
Pipeline #9411 passed with stage
in 6 minutes and 8 seconds
......@@ -14,7 +14,7 @@ from saqc.lib.tools import retrieveTrustworthyOriginal, detectDeviants
@register(masking='all')
def breaks_flagRegimeAnomaly(data, field, flagger, cluster_field, norm_spread, linkage_method='single',
metric=lambda x, y: np.abs(np.nanmean(x) - np.nanmean(y)),
norm_frac=0.5, reset_cluster=False, **kwargs):
norm_frac=0.5, reset_cluster=True, **kwargs):
"""
A function to flag values belonging to an anomalous regime regarding modelling regimes of field.
......@@ -49,8 +49,8 @@ def breaks_flagRegimeAnomaly(data, field, flagger, cluster_field, norm_spread, l
norm_frac : float
Has to be in [0,1]. Determines the minimum percentage of samples,
the "normal" group has to comprise to be the normal group actually.
reset_cluster : bool, default False
If True, all data, considered "normal", gets assigned thee cluster Label "0", the remaining
reset_cluster : bool, default True
If True, all data, considered "normal", gets assigned the cluster Label "0" and the remaining
cluster get numbered consecutively.
kwargs
......
......@@ -430,9 +430,9 @@ def _reduceCPCluster(stat_arr, thresh_arr, start, end, obj_func, num_val):
@register(masking='field')
def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func, bwd_window, min_periods_bwd,
fwd_window=None, min_periods_fwd=None, closed='both', try_to_jit=True,
reduce_window=None, reduce_func=lambda x, y: x.argmax(), flag_changepoints=False, **kwargs):
def modelling_changePointCluster(data, field, flagger, stat_func, thresh_func, bwd_window, min_periods_bwd,
fwd_window=None, min_periods_fwd=None, closed='both', try_to_jit=True,
reduce_window=None, reduce_func=lambda x, y: x.argmax(), flag_changepoints=False, **kwargs):
"""
Assigns label to the data, aiming to reflect continous regimes of the processes the data is assumed to be
generated by.
......@@ -502,7 +502,7 @@ def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func
indexer = FreqIndexer()
indexer.index_array = data_ser.index.to_numpy(int)
indexer.win_points = np.array([True]*var_len)
indexer.win_points = None
indexer.window_size = int(pd.Timedelta(bwd_window).total_seconds() * 10 ** 9)
indexer.forward = False
indexer.center = False
......@@ -540,4 +540,4 @@ def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func
flagger = flagger.setFlags(field, flag=flagger.UNFLAGGED, force=True, **kwargs)
if flag_changepoints:
flagger.setFlags(field, loc=det_index)
return data, flagger
\ No newline at end of file
return data, flagger
......@@ -998,3 +998,23 @@ def proc_seefoLinearDriftCorrecture(data, field, flagger, x_field, y_field, **kw
data[field] = datcol
return data, flagger
def proc_correctRegimeAnomaly(data, field, flagger, cluster_field, model):
"""
Function fits the passed model to every regime
Parameters
----------
data
field
flagger
clusterfield
model
Returns
-------
"""
clusterser = data[cluster_field]
# fit phase:
......@@ -566,6 +566,8 @@ def detectDeviants(data, metric, norm_spread, norm_frac, linkage_method='single'
"""
var_num = len(data.columns)
if var_num <= 1:
return []
dist_mat = np.zeros((var_num, var_num))
combs = list(itertools.combinations(range(0, var_num), 2))
for i, j in combs:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment