Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
f2d98441
Commit
f2d98441
authored
4 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Patches
Plain Diff
check if tieseries is regularly sampled
parent
477d015e
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
saqc/funcs/curvefit.py
+6
-4
6 additions, 4 deletions
saqc/funcs/curvefit.py
saqc/funcs/outliers.py
+10
-3
10 additions, 3 deletions
saqc/funcs/outliers.py
saqc/funcs/rolling.py
+6
-4
6 additions, 4 deletions
saqc/funcs/rolling.py
saqc/lib/tools.py
+17
-0
17 additions, 0 deletions
saqc/lib/tools.py
with
39 additions
and
11 deletions
saqc/funcs/curvefit.py
+
6
−
4
View file @
f2d98441
...
...
@@ -12,10 +12,13 @@ import pandas as pd
from
dios
import
DictOfSeries
from
saqc.core.register
import
register
from
saqc.lib.tools
import
getFreqDelta
from
saqc.flagger.baseflagger
import
BaseFlagger
from
saqc.lib.ts_operators
import
polyRollerIrregular
,
polyRollerNumba
,
polyRoller
,
polyRollerNoMissingNumba
,
polyRollerNoMissing
@register
(
masking
=
'
field
'
)
def
fitPolynomial
(
data
:
DictOfSeries
,
field
:
str
,
flagger
:
BaseFlagger
,
winsz
:
Union
[
int
,
str
],
...
...
@@ -105,9 +108,8 @@ def fitPolynomial(data: DictOfSeries, field: str, flagger: BaseFlagger,
data
=
data
.
copy
()
to_fit
=
data
[
field
]
flags
=
flagger
.
getFlags
(
field
)
i
=
to_fit
.
index
# checking if index is regular here (index.freqstr property is not reliable)
if
not
pd
.
date_range
(
i
[
0
],
i
[
-
1
],
len
(
i
)).
equals
(
i
):
regular
=
getFreqDelta
(
to_fit
.
index
)
if
not
regular
:
if
isinstance
(
winsz
,
int
):
raise
NotImplementedError
(
"
Integer based window size is not supported for not-harmonized
"
"
sample series.
"
)
# get interval centers
...
...
@@ -130,7 +132,7 @@ def fitPolynomial(data: DictOfSeries, field: str, flagger: BaseFlagger,
residues
[
residues
.
index
[
centers_iloc
[
-
1
]]
:
residues
.
index
[
-
1
]]
=
np
.
nan
else
:
if
isinstance
(
winsz
,
str
):
winsz
=
pd
.
Timedelta
(
winsz
)
//
pd
.
Timedelta
(
to_fit
.
index
.
freqstr
)
winsz
=
pd
.
Timedelta
(
winsz
)
//
regular
if
winsz
%
2
==
0
:
winsz
=
int
(
winsz
-
1
)
if
min_periods
is
None
:
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/outliers.py
+
10
−
3
View file @
f2d98441
...
...
@@ -11,7 +11,8 @@ import numpy.polynomial.polynomial as poly
import
numba
from
saqc.lib.tools
import
(
customRoller
,
findIndex
findIndex
,
getFreqDelta
)
from
saqc.funcs.scores
import
assignKNNScore
from
outliers
import
smirnov_grubbs
...
...
@@ -845,14 +846,20 @@ def flagByGrubbs(data, field, flagger, winsz, alpha=0.05, min_periods=8, check_l
data
=
data
.
copy
()
datcol
=
data
[
field
]
rate
=
getFreqDelta
(
datcol
.
index
)
# if timeseries that is analyzed, is regular, window size can be transformed to a number of periods:
if
rate
and
isinstance
(
winsz
,
str
):
winsz
=
pd
.
Timedelta
(
winsz
)
//
rate
to_group
=
pd
.
DataFrame
(
data
=
{
"
ts
"
:
datcol
.
index
,
"
data
"
:
datcol
})
to_flag
=
pd
.
Series
(
False
,
index
=
datcol
.
index
)
if
isinstance
(
winsz
,
int
):
# period number defined test intervals
grouper_series
=
pd
.
Series
(
data
=
np
.
arange
(
0
,
datcol
.
shape
[
0
]),
index
=
datcol
.
index
)
grouper_series_lagged
=
grouper_series
+
(
winsz
/
2
)
grouper_series
=
grouper_series
.
transform
(
lambda
x
:
int
(
np
.
floor
(
x
/
winsz
)
))
grouper_series_lagged
=
grouper_series_lagged
.
transform
(
lambda
x
:
int
(
np
.
floor
(
x
/
winsz
)
))
grouper_series
=
grouper_series
.
transform
(
lambda
x
:
x
/
/
winsz
)
grouper_series_lagged
=
grouper_series_lagged
.
transform
(
lambda
x
:
x
/
/
winsz
)
partitions
=
to_group
.
groupby
(
grouper_series
)
partitions_lagged
=
to_group
.
groupby
(
grouper_series_lagged
)
else
:
...
...
This diff is collapsed.
Click to expand it.
saqc/funcs/rolling.py
+
6
−
4
View file @
f2d98441
...
...
@@ -4,6 +4,7 @@ import numpy as np
import
pandas
as
pd
from
saqc.core.register
import
register
from
saqc.lib.tools
import
getFreqDelta
@register
(
masking
=
'
field
'
)
...
...
@@ -59,8 +60,9 @@ def roll(data, field, flagger, winsz, func=np.mean, eval_flags=True, min_periods
if
to_fit
.
empty
:
return
data
,
flagger
regular
=
getFreqDelta
(
to_fit
.
index
)
# starting with the annoying case: finding the rolling interval centers of not-harmonized input time series:
if
(
to_fit
.
index
.
freqstr
is
None
)
and
cente
r
:
if
center
and
not
regula
r
:
if
isinstance
(
winsz
,
int
):
raise
NotImplementedError
(
"
Integer based window size is not supported for not-harmonized
"
...
...
@@ -90,7 +92,7 @@ def roll(data, field, flagger, winsz, func=np.mean, eval_flags=True, min_periods
# everything is more easy if data[field] is harmonized:
else
:
if
isinstance
(
winsz
,
str
):
winsz
=
int
(
np
.
floor
(
pd
.
Timedelta
(
winsz
)
/
pd
.
Timedelta
(
to_fit
.
index
.
freqstr
)))
winsz
=
pd
.
Timedelta
(
winsz
)
/
/
regular
if
(
winsz
%
2
==
0
)
&
center
:
winsz
=
int
(
winsz
-
1
)
...
...
@@ -101,9 +103,9 @@ def roll(data, field, flagger, winsz, func=np.mean, eval_flags=True, min_periods
means
=
to_fit
.
rolling
(
window
=
winsz
,
center
=
center
,
closed
=
"
both
"
).
apply
(
func
)
if
_return_residues
:
residue
s
=
means
-
to_fit
mean
s
=
means
-
to_fit
data
[
field
]
=
residue
s
data
[
field
]
=
mean
s
if
eval_flags
:
num_cats
,
codes
=
flags
.
factorize
()
num_cats
=
pd
.
Series
(
num_cats
,
index
=
flags
.
index
).
rolling
(
winsz
,
center
=
True
,
min_periods
=
min_periods
).
max
()
...
...
This diff is collapsed.
Click to expand it.
saqc/lib/tools.py
+
17
−
0
View file @
f2d98441
...
...
@@ -568,3 +568,20 @@ def detectDeviants(data, metric, norm_spread, norm_frac, linkage_method='single'
return
[
i
for
i
,
x
in
enumerate
(
cluster
)
if
x
!=
norm_cluster
]
def
getFreqDelta
(
index
):
"""
Function checks if the passed index is regularly sampled.
If yes, the according timedelta value is returned,
If no, ``None`` is returned.
(``None`` will also be returned for pd.RangeIndex type.)
"""
delta
=
getattr
(
index
,
'
freq
'
,
None
)
if
delta
is
None
and
not
index
.
empty
:
i
=
pd
.
date_range
(
index
[
0
],
index
[
-
1
],
len
(
index
))
if
i
.
equals
(
index
):
return
i
[
1
]
-
i
[
0
]
return
delta
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment