Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SaQC
Manage
Activity
Members
Labels
Plan
Issues
36
Issue boards
Milestones
Wiki
Code
Merge requests
8
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
rdm-software
SaQC
Commits
364df8d4
Commit
364df8d4
authored
2 years ago
by
Bert Palm
🎇
Browse files
Options
Downloads
Patches
Plain Diff
added more ts_operator test, rm faulty functions with no usage, added docstrings
parent
5efdd533
No related branches found
No related tags found
1 merge request
!462
More tests
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
saqc/lib/ts_operators.py
+93
-23
93 additions, 23 deletions
saqc/lib/ts_operators.py
tests/lib/test_ts_operators.py
+118
-30
118 additions, 30 deletions
tests/lib/test_ts_operators.py
with
211 additions
and
53 deletions
saqc/lib/ts_operators.py
+
93
−
23
View file @
364df8d4
...
...
@@ -25,7 +25,19 @@ from saqc.lib.tools import getFreqDelta
def
identity
(
ts
):
# identity function
"""
Returns the input.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
ts: pd.Series
the original
"""
return
ts
...
...
@@ -36,36 +48,56 @@ def count(ts):
return
ts
.
count
()
def
first
(
ts
):
# first is a dummy to trigger according built in count method of resamplers when
# passed to aggregate2freq. For consistency reasons, it works accordingly when
# applied directly:
return
ts
.
first
()
def
last
(
ts
):
# last is a dummy to trigger according built in count method of resamplers when
# passed to aggregate2freq. For consistency reasons, it works accordingly when
# applied directly:
return
ts
.
last
()
def
zeroLog
(
ts
):
"""
Calculate log of values of series for (0, inf] and NaN otherwise.
Parameters
----------
ts : pd.Series
A series with datetime index.
def
zeroLog
(
ts
):
# zero log returns np.nan instead of -np.inf, when passed 0. Usefull, because
# in internal processing, you only have to check for nan values if you need to
# remove "invalidish" values from the data.
Returns
-------
pd.Series
"""
log_ts
=
np
.
log
(
ts
)
log_ts
[
log_ts
==
-
np
.
inf
]
=
sys
.
float_info
.
min
return
log_ts
def
derivative
(
ts
,
unit
=
"
1min
"
):
# calculates derivative of timeseries, expressed in slope per "unit"
return
ts
/
(
deltaT
(
ts
,
unit
=
unit
))
"""
Calculates derivative of timeseries, expressed in slope per `unit`.
Parameters
----------
ts : pd.Series
A series with datetime index.
unit : str
Datetime offset unit.
Returns
-------
pd.Series
"""
return
ts
/
deltaT
(
ts
,
unit
=
unit
)
def
deltaT
(
ts
,
unit
=
"
1min
"
):
# calculates series of time gaps in ts
"""
Calculate the time difference of the index-values in seconds.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return
(
ts
.
index
.
to_series
().
diff
().
dt
.
total_seconds
()
/
pd
.
Timedelta
(
unit
).
total_seconds
()
...
...
@@ -73,11 +105,34 @@ def deltaT(ts, unit="1min"):
def
difference
(
ts
):
# NOTE: index of input series gets lost!
return
np
.
diff
(
ts
,
prepend
=
np
.
nan
)
"""
Calculate the difference of subsequent values in the series.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return
ts
.
diff
(
1
)
def
rateOfChange
(
ts
):
"""
Calculate the rate of change of the series values.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return
difference
(
ts
)
/
ts
...
...
@@ -89,7 +144,22 @@ def relativeDifference(ts):
def
scale
(
ts
,
target_range
=
1
,
projection_point
=
None
):
# scales input series to have values ranging from - target_rang to + target_range
"""
Scales input series values to a given range.
Parameters
----------
ts : pd.Series
A series with datetime index.
target_range : int
The projection will range from ``[-target_range, target_range]``
Returns
-------
scaled: pd.Series
The scaled Series
"""
if
not
projection_point
:
projection_point
=
np
.
max
(
np
.
abs
(
ts
))
return
(
ts
/
projection_point
)
*
target_range
...
...
This diff is collapsed.
Click to expand it.
tests/lib/test_ts_operators.py
+
118
−
30
View file @
364df8d4
...
...
@@ -7,6 +7,7 @@ import pytest
import
saqc.lib.ts_operators
as
tsops
import
pandas
as
pd
from
pandas.testing
import
assert_series_equal
from
numpy.testing
import
assert_array_equal
,
assert_equal
def
test_butterFilter
():
...
...
@@ -54,54 +55,141 @@ def dtSeries(data, freq="1d"):
@pytest.mark.parametrize
(
"
func,data,expected
"
,
"
data
"
,
[
dtSeries
([
0
,
1
,
2
]),
dtSeries
([
0
,
np
.
nan
,
2
])],
)
def
test_identity
(
data
):
from
saqc.lib.ts_operators
import
identity
result
=
identity
(
data
)
assert
result
is
data
@pytest.mark.parametrize
(
"
data,expected
"
,
[
(
dtSeries
([
0
,
1
,
2
]),
3
),
(
dtSeries
([
0
,
np
.
nan
,
2
]),
2
),
],
)
def
test_count
(
data
,
expected
):
# count is labeled as a dummy function, this means
# we need to ensure it exists with a resampler object.
resampler
=
data
.
resample
(
"
2d
"
)
assert
hasattr
(
resampler
,
"
count
"
)
from
saqc.lib.ts_operators
import
count
result
=
count
(
data
)
assert
result
==
expected
@pytest.mark.parametrize
(
"
data,expected
"
,
[
(
"
identity
"
,
dtSeries
([
1
,
2
]),
dtSeries
([
1
,
2
])),
(
"
count
"
,
dtSeries
([
0
,
0
]),
dtSeries
([
2
])),
pytest
.
param
(
"
first
"
,
dtSeries
([
1
,
2
]),
dtSeries
([
1
,
1
]),
marks
=
pytest
.
mark
.
xfail
(
reason
=
"
BUG (the inner ts.first need an argument)
"
),
),
pytest
.
param
(
"
last
"
,
dtSeries
([
1
,
2
]),
dtSeries
([
1
,
1
]),
marks
=
pytest
.
mark
.
xfail
(
reason
=
"
BUG (the inner ts.last need an argument)
"
),
),
(
"
zeroLog
"
,
dtSeries
([
1
,
2
,
np
.
inf
,
np
.
nan
]),
dtSeries
([
np
.
log
(
1
),
np
.
log
(
2
),
np
.
inf
,
np
.
nan
]),
),
pytest
.
param
(
"
zeroLog
"
,
dtSeries
(
[
#
0,
0
,
-
2
,
-
1
,
-
np
.
inf
,
]
),
dtSeries
([
np
.
nan
,
np
.
nan
,
np
.
nan
]),
marks
=
pytest
.
mark
.
xfail
(
reason
=
"
zeroLog(0) did not return NaN
"
),
dtSeries
([
np
.
nan
,
np
.
nan
,
np
.
nan
,
np
.
nan
]),
marks
=
pytest
.
mark
.
xfail
(
reason
=
"
zeroLog(0) did not return NaN
for 0
"
),
),
],
)
def
test_tsop_functions
(
func
,
data
,
expected
):
f
=
getattr
(
tsops
,
func
)
def
test_zeroLog
(
data
,
expected
):
from
saqc.lib.ts_operators
import
zeroLog
result
=
zeroLog
(
data
)
assert_series_equal
(
result
,
expected
,
check_freq
=
False
,
check_names
=
False
)
resampler
=
data
.
resample
(
"
2d
"
)
result
=
resampler
.
apply
(
f
)
assert
isinstance
(
result
,
pd
.
Series
)
@pytest.mark.parametrize
(
"
data,expected
"
,
[
(
dtSeries
([
1
,
2
,
3
]),
dtSeries
([
np
.
nan
,
1440
,
1440
])),
(
pd
.
Series
(
[
1
,
2
,
3
],
index
=
pd
.
DatetimeIndex
([
"
2020-01-01
"
,
"
2020-01-03
"
,
"
2020-01-13
"
]),
),
dtSeries
([
np
.
nan
,
2880
,
14400
]),
),
],
)
def
test_deltaT
(
data
,
expected
):
from
saqc.lib.ts_operators
import
deltaT
print
()
print
(
result
)
print
()
print
(
expected
)
result
=
deltaT
(
data
)
assert_series_equal
(
result
,
expected
,
check_names
=
False
,
check_freq
=
False
,
check_dtype
=
False
result
,
expected
,
check_dtype
=
False
,
check_names
=
False
,
check_index
=
False
,
check_freq
=
False
,
)
@pytest.mark.parametrize
(
"
data,expected
"
,
[
pytest
.
param
(
pd
.
Series
(
# We use as values the delta of total seconds from the last value.
# This way the 'derivative' should be 1 for each result value.
[
1
,
2880
,
14400
],
index
=
pd
.
DatetimeIndex
([
"
2020-01-01
"
,
"
2020-01-03
"
,
"
2020-01-13
"
]),
),
pd
.
Series
(
[
np
.
nan
,
1
,
1
],
index
=
pd
.
DatetimeIndex
([
"
2020-01-01
"
,
"
2020-01-03
"
,
"
2020-01-13
"
]),
),
),
],
)
def
test_derivative
(
data
,
expected
):
from
saqc.lib.ts_operators
import
derivative
result
=
derivative
(
data
)
assert_series_equal
(
result
,
expected
,
check_dtype
=
False
,
check_names
=
False
)
@pytest.mark.parametrize
(
"
data,expected
"
,
[
(
dtSeries
([
1
,
1
,
1
]),
dtSeries
([
np
.
nan
,
0
,
0
])),
(
dtSeries
([
1
,
10
,
100
]),
dtSeries
([
np
.
nan
,
9
,
90
])),
(
dtSeries
([
-
np
.
inf
,
np
.
inf
,
0
]),
dtSeries
([
np
.
nan
,
np
.
inf
,
-
np
.
inf
])),
(
dtSeries
([
0
,
np
.
nan
,
0
]),
dtSeries
([
np
.
nan
,
np
.
nan
,
np
.
nan
])),
],
)
def
test_difference
(
data
,
expected
):
from
saqc.lib.ts_operators
import
difference
result
=
difference
(
data
)
assert_series_equal
(
result
,
expected
,
check_names
=
False
)
@pytest.mark.parametrize
(
"
data,expected
"
,
[
(
dtSeries
([
1
,
1
,
1
]),
dtSeries
([
np
.
nan
,
0
,
0
])),
(
dtSeries
([
1
,
10
,
100
]),
dtSeries
([
np
.
nan
,
0.9
,
0.9
])),
(
dtSeries
([
-
np
.
inf
,
np
.
inf
,
0
]),
dtSeries
([
np
.
nan
,
np
.
nan
,
-
np
.
inf
])),
(
dtSeries
([
0
,
np
.
nan
,
0
]),
dtSeries
([
np
.
nan
,
np
.
nan
,
np
.
nan
])),
],
)
def
test_rateOfChange
(
data
,
expected
):
from
saqc.lib.ts_operators
import
rateOfChange
result
=
rateOfChange
(
data
)
assert_series_equal
(
result
,
expected
,
check_names
=
False
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment