Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
dd787ea0
Commit
dd787ea0
authored
2 years ago
by
Peter Lünenschloß
Browse files
Options
Downloads
Patches
Plain Diff
removed automatic interpolation downgrade mechanic/ added extrapolation mechanic
parent
52fd5d00
No related branches found
No related tags found
1 merge request
!600
Inter limit fix
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
saqc/funcs/interpolation.py
+2
-3
2 additions, 3 deletions
saqc/funcs/interpolation.py
saqc/lib/ts_operators.py
+39
-40
39 additions, 40 deletions
saqc/lib/ts_operators.py
tests/lib/test_ts_operators.py
+1
-1
1 addition, 1 deletion
tests/lib/test_ts_operators.py
with
42 additions
and
44 deletions
saqc/funcs/interpolation.py
+
2
−
3
View file @
dd787ea0
...
@@ -186,8 +186,7 @@ class InterpolationMixin:
...
@@ -186,8 +186,7 @@ class InterpolationMixin:
self
.
_data
[
field
],
self
.
_data
[
field
],
method
,
method
,
order
=
order
,
order
=
order
,
inter_limit
=
limit
,
gap_limit
=
limit
,
downgrade_interpolation
=
downgrade
,
)
)
interpolated
=
self
.
_data
[
field
].
isna
()
&
inter_data
.
notna
()
interpolated
=
self
.
_data
[
field
].
isna
()
&
inter_data
.
notna
()
...
@@ -281,7 +280,7 @@ class InterpolationMixin:
...
@@ -281,7 +280,7 @@ class InterpolationMixin:
data
=
datcol
,
data
=
datcol
,
method
=
method
,
method
=
method
,
order
=
order
,
order
=
order
,
inter
_limit
=
limit
,
gap
_limit
=
limit
,
downgrade_interpolation
=
downgrade
,
downgrade_interpolation
=
downgrade
,
)
)
...
...
This diff is collapsed.
Click to expand it.
saqc/lib/ts_operators.py
+
39
−
40
View file @
dd787ea0
...
@@ -276,46 +276,37 @@ def meanQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
...
@@ -276,46 +276,37 @@ def meanQC(data, max_nan_total=np.inf, max_nan_consec=np.inf):
)
)
def
_interpolWrapper
(
x
,
order
=
2
,
method
=
"
time
"
,
downgrade_interpola
tion
=
Fals
e
):
def
_interpolWrapper
(
x
,
order
=
1
,
method
=
"
time
"
,
limit_area
=
'
inside
'
,
limit_direc
tion
=
Non
e
):
"""
"""
Function that automatically modifies the interpolation level or returns uninterpolated
Function that automatically modifies the interpolation level or returns uninterpolated
input data if the data configuration breaks the interpolation method at the selected degree.
input data if the data configuration breaks the interpolation method at the selected degree.
"""
"""
if
order
<
0
:
return
x
min_vals_dict
=
{
'
nearest
'
:
2
,
'
slinear
'
:
2
,
'
quadratic
'
:
3
,
'
cubic
'
:
4
,
'
spline
'
:
order
+
1
,
'
polynomial
'
:
order
+
1
,
elif
x
.
count
()
>
order
:
'
piecewise_polynomial
'
:
2
,
'
pchip
'
:
2
,
'
akima
'
:
2
,
'
cubicspline
'
:
2
}
try
:
min_vals
=
min_vals_dict
.
get
(
method
,
0
)
return
x
.
interpolate
(
method
=
method
,
order
=
int
(
order
))
except
(
NotImplementedError
,
ValueError
):
if
(
x
.
size
<
3
)
|
(
x
.
count
()
<
min_vals
):
warnings
.
warn
(
f
"
Interpolation with method
{
method
}
is not supported at order
"
f
"
{
order
}
. and will be performed at order
{
order
-
1
}
"
)
return
_interpolWrapper
(
x
,
int
(
order
-
1
),
method
)
elif
x
.
size
<
3
:
return
x
return
x
else
:
else
:
if
downgrade_interpolation
:
return
x
.
interpolate
(
method
=
method
,
order
=
order
,
limit_area
=
limit_area
,
limit_direction
=
limit_direction
)
return
_interpolWrapper
(
x
,
int
(
x
.
count
()
-
1
),
method
)
else
:
return
x
def
interpolateNANs
(
def
interpolateNANs
(
data
,
method
,
order
=
2
,
inter
_limit
=
2
,
downgrade_interpolation
=
Fals
e
data
,
method
,
order
=
2
,
gap
_limit
=
2
,
extrapolate
=
Non
e
):
):
"""
"""
The function interpolates nan-values (and nan-grids) in timeseries data. It can
The function interpolates nan-values (and nan-grids) in timeseries data. It can
be passed all the method keywords from the pd.Series.interpolate method and will
be passed all the method keywords from the pd.Series.interpolate method and will
than apply this very methods. Note, that the limit keyword really restricts
than apply this very methods. Note, that the limit keyword really restricts
the interpolation to
chunk
s, not containing more than
"
limit
"
nan entries (
the interpolation to
gap
s, not containing more than
"
limit
"
nan entries (
thereby not being identical to the
"
limit
"
keyword of pd.Series.interpolate).
thereby not being identical to the
"
limit
"
keyword of pd.Series.interpolate).
:param data: pd.Series or np.array. The data series to be interpolated
:param data: pd.Series or np.array. The data series to be interpolated
:param method: String. Method keyword designating interpolation method to use.
:param method: String. Method keyword designating interpolation method to use.
:param order: Integer. If your desired interpolation method needs an order to be passed -
:param order: Integer. If your desired interpolation method needs an order to be passed -
here you pass it.
here you pass it.
:param
inter
_limit: Integer. Default = 2. Number up to which consecutive nan - values in the data get
:param
gap
_limit: Integer. Default = 2. Number up to which consecutive nan - values in the data get
replaced by interpolation.
replaced by interpolation.
Its default value suits an interpolation that only will apply to points of an
Its default value suits an interpolation that only will apply to points of an
inserted frequency grid. (regularization by interpolation)
inserted frequency grid. (regularization by interpolation)
...
@@ -327,18 +318,18 @@ def interpolateNANs(
...
@@ -327,18 +318,18 @@ def interpolateNANs(
:return:
:return:
"""
"""
data
=
pd
.
Series
(
data
,
copy
=
True
)
data
=
pd
.
Series
(
data
,
copy
=
True
)
limit_area
=
"
inside
"
if
not
extrapolate
else
"
outside
"
if
inter
_limit
is
None
:
if
gap
_limit
is
None
:
# if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps
# if there is actually no limit set to the gaps to-be interpolated, generate a dummy mask for the gaps
gap_mask
=
pd
.
Series
(
True
,
index
=
data
.
index
,
name
=
data
.
name
)
gap_mask
=
pd
.
Series
(
True
,
index
=
data
.
index
,
name
=
data
.
name
)
elif
inter
_limit
<
2
:
elif
gap
_limit
<
2
:
return
data
return
data
else
:
else
:
# if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right side
# if there is a limit to the gaps to be interpolated, generate a mask that evaluates to False at the right side
# of each too-large gap with a rolling.sum combo
# of each too-large gap with a rolling.sum combo
gap_mask
=
data
.
isna
().
rolling
(
inter
_limit
,
min_periods
=
0
).
sum
()
!=
inter
_limit
gap_mask
=
data
.
isna
().
rolling
(
gap
_limit
,
min_periods
=
0
).
sum
()
!=
gap
_limit
if
inter
_limit
==
2
:
if
gap
_limit
==
2
:
# for the common case of
inter
_limit=2 (default "harmonisation"), we efficiently ba
g
propagate the False
# for the common case of
gap
_limit=2 (default "harmonisation"), we efficiently ba
ck
propagate the False
# value to fill the whole too-large gap by a shift and a conjunction.
# value to fill the whole too-large gap by a shift and a conjunction.
gap_mask
&=
gap_mask
&
gap_mask
.
shift
(
-
1
,
fill_value
=
True
)
gap_mask
&=
gap_mask
&
gap_mask
.
shift
(
-
1
,
fill_value
=
True
)
else
:
else
:
...
@@ -346,14 +337,14 @@ def interpolateNANs(
...
@@ -346,14 +337,14 @@ def interpolateNANs(
# Therefor we replace the True values with np.nan so hat they are interpreted as missing periods.
# Therefor we replace the True values with np.nan so hat they are interpreted as missing periods.
gap_mask
=
(
gap_mask
=
(
gap_mask
.
replace
(
True
,
np
.
nan
)
gap_mask
.
replace
(
True
,
np
.
nan
)
.
fillna
(
method
=
"
bfill
"
,
limit
=
inter
_limit
-
1
)
.
fillna
(
method
=
"
bfill
"
,
limit
=
gap
_limit
-
1
)
.
replace
(
np
.
nan
,
True
)
.
replace
(
np
.
nan
,
True
)
.
astype
(
bool
)
.
astype
(
bool
)
)
)
# memorizing the index for later reindexing
# memorizing the index for later reindexing
pre_index
=
data
.
index
pre_index
=
data
.
index
# drop the gaps that are too large with regard to the
inter
_limit from the data-to-be interpolated
# drop the gaps that are too large with regard to the
gap
_limit from the data-to-be interpolated
data
=
data
[
gap_mask
]
data
=
data
[
gap_mask
]
if
data
.
empty
:
if
data
.
empty
:
return
data
return
data
...
@@ -361,22 +352,30 @@ def interpolateNANs(
...
@@ -361,22 +352,30 @@ def interpolateNANs(
if
method
in
[
"
linear
"
,
"
time
"
]:
if
method
in
[
"
linear
"
,
"
time
"
]:
# in the case of linear interpolation, not much can go wrong/break so this conditional branch has efficient
# in the case of linear interpolation, not much can go wrong/break so this conditional branch has efficient
# finish by just calling pandas interpolation routine to fill the gaps remaining in the data:
# finish by just calling pandas interpolation routine to fill the gaps remaining in the data:
data
.
interpolate
(
method
=
method
,
inplace
=
True
,
limit_area
=
"
inside
"
)
data
.
interpolate
(
method
=
method
,
inplace
=
True
,
limit_area
=
limit_area
,
limit_direction
=
extrapolate
)
else
:
else
:
# if the method that is interpolated with depends on not only the left and right border points of any gap,
# if the method that is interpolated with
,
depends on not only the left and right border points of any gap,
# but includes more points, it has to be applied on any data chunk seperated by the too-big gaps individually.
# but includes more points, it has to be applied on any data chunk seperated by the too-big gaps individually.
# So we use the gap_mask to group the data into chunks and perform the interpolation on every chunk seperatly
# So we use the gap_mask to group the data into chunks and perform the interpolation on every chunk seperatly
# with the .transform method of the grouper.
# with the .transform method of the grouper.
gap_mask
=
(
~
gap_mask
).
cumsum
()[
data
.
index
]
gap_mask
=
(
~
gap_mask
).
cumsum
()[
data
.
index
]
data
=
data
.
groupby
(
by
=
gap_mask
).
transform
(
chunk_groups
=
data
.
groupby
(
by
=
gap_mask
)
_interpolWrapper
,
if
extrapolate
:
**
{
if
extrapolate
in
[
'
both
'
,
'
backward
'
]:
"
order
"
:
order
,
lead_idx
=
gap_mask
[
gap_mask
==
gap_mask
.
min
()].
index
"
method
"
:
method
,
data
[
lead_idx
]
=
_interpolWrapper
(
data
[
lead_idx
],
order
=
order
,
method
=
method
,
limit_area
=
limit_area
,
limit_direction
=
'
backward
'
)
"
downgrade_interpolation
"
:
downgrade_interpolation
,
if
extrapolate
in
[
'
both
'
,
'
forward
'
]:
},
trail_idx
=
gap_mask
[
gap_mask
==
gap_mask
.
max
()].
index
)
data
[
trail_idx
]
=
_interpolWrapper
(
data
[
lead_idx
],
order
=
order
,
method
=
method
,
limit_area
=
limit_area
,
limit_direction
=
'
forward
'
)
else
:
data
=
chunk_groups
.
groupby
(
by
=
gap_mask
).
transform
(
_interpolWrapper
,
**
{
"
order
"
:
order
,
"
method
"
:
method
,
},
)
# finally reinsert the dropped data gaps
# finally reinsert the dropped data gaps
data
=
data
.
reindex
(
pre_index
)
data
=
data
.
reindex
(
pre_index
)
return
data
return
data
...
@@ -617,10 +616,10 @@ def linearDriftModel(x, origin, target):
...
@@ -617,10 +616,10 @@ def linearDriftModel(x, origin, target):
def
linearInterpolation
(
data
,
inter_limit
=
2
):
def
linearInterpolation
(
data
,
inter_limit
=
2
):
return
interpolateNANs
(
data
,
"
time
"
,
inter
_limit
=
inter_limit
)
return
interpolateNANs
(
data
,
"
time
"
,
gap
_limit
=
inter_limit
)
def
polynomialInterpolation
(
data
,
inter_limit
=
2
,
inter_order
=
2
):
def
polynomialInterpolation
(
data
,
inter_limit
=
2
,
inter_order
=
2
):
return
interpolateNANs
(
return
interpolateNANs
(
data
,
"
polynomial
"
,
inter
_limit
=
inter_limit
,
order
=
inter_order
data
,
"
polynomial
"
,
gap
_limit
=
inter_limit
,
order
=
inter_order
)
)
This diff is collapsed.
Click to expand it.
tests/lib/test_ts_operators.py
+
1
−
1
View file @
dd787ea0
...
@@ -228,7 +228,7 @@ def test_rateOfChange(data, expected):
...
@@ -228,7 +228,7 @@ def test_rateOfChange(data, expected):
],
],
)
)
def
test_interpolatNANs
(
limit
,
data
,
expected
):
def
test_interpolatNANs
(
limit
,
data
,
expected
):
got
=
interpolateNANs
(
pd
.
Series
(
data
),
inter
_limit
=
limit
,
method
=
"
linear
"
)
got
=
interpolateNANs
(
pd
.
Series
(
data
),
gap
_limit
=
limit
,
method
=
"
linear
"
)
try
:
try
:
assert
got
.
equals
(
pd
.
Series
(
expected
,
dtype
=
float
))
assert
got
.
equals
(
pd
.
Series
(
expected
,
dtype
=
float
))
except
:
except
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment