Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
79b66a17
Commit
79b66a17
authored
4 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Patches
Plain Diff
added changepoint detection functionality to functions module
parent
6aeb0d34
No related branches found
Branches containing commit
No related tags found
Tags containing commit
3 merge requests
!193
Release 1.4
,
!188
Release 1.4
,
!138
WIP: Detect and reset offset
Pipeline
#8221
passed with stage
in 6 minutes and 20 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
saqc/funcs/functions.py
+59
-1
59 additions, 1 deletion
saqc/funcs/functions.py
with
59 additions
and
1 deletion
saqc/funcs/functions.py
+
59
−
1
View file @
79b66a17
...
@@ -15,7 +15,7 @@ from mlxtend.evaluate import permutation_test
...
@@ -15,7 +15,7 @@ from mlxtend.evaluate import permutation_test
from
scipy.cluster.hierarchy
import
linkage
,
fcluster
from
scipy.cluster.hierarchy
import
linkage
,
fcluster
from
saqc.lib.tools
import
groupConsecutives
,
sesonalMask
from
saqc.lib.tools
import
groupConsecutives
,
sesonalMask
,
FreqIndexer
,
customRolling
from
saqc.core.register
import
register
from
saqc.core.register
import
register
from
saqc.core.visitor
import
ENVIRONMENT
from
saqc.core.visitor
import
ENVIRONMENT
...
@@ -1045,4 +1045,62 @@ def flagDriftFromReference(data, field, flagger, fields, segment_freq, thresh,
...
@@ -1045,4 +1045,62 @@ def flagDriftFromReference(data, field, flagger, fields, segment_freq, thresh,
if
dist
>
thresh
:
if
dist
>
thresh
:
flagger
=
flagger
.
setFlags
(
fields
[
i
],
loc
=
segment
[
1
].
index
,
**
kwargs
)
flagger
=
flagger
.
setFlags
(
fields
[
i
],
loc
=
segment
[
1
].
index
,
**
kwargs
)
return
data
,
flagger
@register
(
masking
=
'
field
'
)
def
flagChangePoints
(
data
,
field
,
flagger
,
stat_func
,
thresh_func
,
bwd_window
,
min_periods_bwd
,
fwd_window
=
None
,
min_periods_fwd
=
None
,
closed
=
'
both
'
):
"""
Function for change point detection based on sliding window search.
The function provides general basic architecture for applying two-sided t-test,
max-likelyhood modelling or piecewise regression modelling in order to detect changepoints
via a sliding
"
twin window
"
search.
See examples in the examples section to get an idea of the interface and functionality.
Parameters
----------
data : dios.DictOfSeries
A dictionary of pandas.Series, holding all the data.
field : str
The reference variable, the deviation from wich determines the flagging.
flagger : saqc.flagger
A flagger object, holding flags and additional informations related to `data`.
stat_func : {Callable[numpy.array], Callable[numpy.array, numpy.array]}
thresh_func : {float, Callable[numpy.array, numpy.array]}
bwd_window : str
min_periods_bwd : {str, int}
fwd_window : {None, str}, default None
min_periods_fwd : {None, str, int}, default None
closed : {
'
right
'
,
'
left
'
,
'
both
'
,
'
neither
'
}, default
'
both
'
Returns
-------
"""
data_ser
=
data
[
field
]
center
=
False
var_len
=
data_ser
.
shape
[
0
]
FI
=
FreqIndexer
()
FI
.
index_array
=
data_ser
.
index
.
to_numpy
(
int
)
FI
.
win_points
=
np
.
array
([
True
]
*
var_len
)
FI
.
window_size
=
int
(
pd
.
Timedelta
(
bwd_window
).
total_seconds
()
*
10
**
9
)
FI
.
forward
=
False
bwd_start
,
bwd_end
=
FI
.
get_window_bounds
(
var_len
,
min_periods_bwd
,
center
,
closed
)
FI
.
window_size
=
int
(
pd
.
Timedelta
(
fwd_window
).
total_seconds
()
*
10
**
9
)
FI
.
forward
=
True
fwd_start
,
fwd_end
=
FI
.
get_window_bounds
(
var_len
,
min_periods_fwd
,
center
,
closed
)
fwd_start
,
fwd_end
=
np
.
roll
(
fwd_start
,
-
1
),
np
.
roll
(
fwd_end
,
-
1
)
data_arr
=
data_ser
.
values
result_arr
=
np
.
zeros
(
len
(
data_arr
)
-
1
)
for
win_i
in
range
(
len
(
data_arr
)
-
1
):
x
=
data_arr
[
bwd_start
[
win_i
]:
bwd_end
[
win_i
]]
y
=
data_arr
[
fwd_start
[
win_i
]:
fwd_end
[
win_i
]]
result_arr
[
win_i
]
=
stat_func
(
x
,
y
)
>
thresh_func
(
x
,
y
)
flagger
=
flagger
.
setFlags
(
field
,
loc
=
result_arr
[
result_arr
])
return
data
,
flagger
return
data
,
flagger
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment