diff --git a/saqc/funcs/breaks_detection.py b/saqc/funcs/breaks_detection.py index 048a14b0018cf033d9cda938d4dfc23caf012491..8ba8666bddfd0b10c8b6e3a098f24cd0e8f63bd4 100644 --- a/saqc/funcs/breaks_detection.py +++ b/saqc/funcs/breaks_detection.py @@ -14,7 +14,7 @@ from saqc.lib.tools import retrieveTrustworthyOriginal, detectDeviants @register(masking='all') def breaks_flagRegimeAnomaly(data, field, flagger, cluster_field, norm_spread, linkage_method='single', metric=lambda x, y: np.abs(np.nanmean(x) - np.nanmean(y)), - norm_frac=0.5, reset_cluster=False, **kwargs): + norm_frac=0.5, reset_cluster=True, **kwargs): """ A function to flag values belonging to an anomalous regime regarding modelling regimes of field. @@ -49,8 +49,8 @@ def breaks_flagRegimeAnomaly(data, field, flagger, cluster_field, norm_spread, l norm_frac : float Has to be in [0,1]. Determines the minimum percentage of samples, the "normal" group has to comprise to be the normal group actually. - reset_cluster : bool, default False - If True, all data, considered "normal", gets assigned thee cluster Label "0", the remaining + reset_cluster : bool, default True + If True, all data, considered "normal", gets assigned the cluster Label "0" and the remaining cluster get numbered consecutively. kwargs diff --git a/saqc/funcs/modelling.py b/saqc/funcs/modelling.py index a3ae852dd668bbc975eee220d8778591d7f86690..a24b45e713a3171e46fb27c1e7c4bbf675080e45 100644 --- a/saqc/funcs/modelling.py +++ b/saqc/funcs/modelling.py @@ -430,9 +430,9 @@ def _reduceCPCluster(stat_arr, thresh_arr, start, end, obj_func, num_val): @register(masking='field') -def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func, bwd_window, min_periods_bwd, - fwd_window=None, min_periods_fwd=None, closed='both', try_to_jit=True, - reduce_window=None, reduce_func=lambda x, y: x.argmax(), flag_changepoints=False, **kwargs): +def modelling_changePointCluster(data, field, flagger, stat_func, thresh_func, bwd_window, min_periods_bwd, + fwd_window=None, min_periods_fwd=None, closed='both', try_to_jit=True, + reduce_window=None, reduce_func=lambda x, y: x.argmax(), flag_changepoints=False, **kwargs): """ Assigns label to the data, aiming to reflect continous regimes of the processes the data is assumed to be generated by. @@ -502,7 +502,7 @@ def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func indexer = FreqIndexer() indexer.index_array = data_ser.index.to_numpy(int) - indexer.win_points = np.array([True]*var_len) + indexer.win_points = None indexer.window_size = int(pd.Timedelta(bwd_window).total_seconds() * 10 ** 9) indexer.forward = False indexer.center = False @@ -540,4 +540,4 @@ def modelling_clusterByChangePoints(data, field, flagger, stat_func, thresh_func flagger = flagger.setFlags(field, flag=flagger.UNFLAGGED, force=True, **kwargs) if flag_changepoints: flagger.setFlags(field, loc=det_index) - return data, flagger \ No newline at end of file + return data, flagger diff --git a/saqc/funcs/proc_functions.py b/saqc/funcs/proc_functions.py index e7885aa37cf59cd8bf7617c279309b192121dc9b..5c88f5e4b356bf982cebf164d60ee09a32d646c6 100644 --- a/saqc/funcs/proc_functions.py +++ b/saqc/funcs/proc_functions.py @@ -998,3 +998,23 @@ def proc_seefoLinearDriftCorrecture(data, field, flagger, x_field, y_field, **kw data[field] = datcol return data, flagger + +def proc_correctRegimeAnomaly(data, field, flagger, cluster_field, model): + """ + Function fits the passed model to every regime + + Parameters + ---------- + data + field + flagger + clusterfield + model + + Returns + ------- + """ + + clusterser = data[cluster_field] + + # fit phase: diff --git a/saqc/lib/tools.py b/saqc/lib/tools.py index 48a27f4a3c5e13184cf7daf4ea526f40243afd7a..4e722fb4325152ee12d176b76e21e6ee33de666c 100644 --- a/saqc/lib/tools.py +++ b/saqc/lib/tools.py @@ -566,6 +566,8 @@ def detectDeviants(data, metric, norm_spread, norm_frac, linkage_method='single' """ var_num = len(data.columns) + if var_num <= 1: + return [] dist_mat = np.zeros((var_num, var_num)) combs = list(itertools.combinations(range(0, var_num), 2)) for i, j in combs: