Skip to content
Snippets Groups Projects
Commit 364df8d4 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

added more ts_operator test, rm faulty functions with no usage, added docstrings

parent 5efdd533
No related branches found
No related tags found
1 merge request!462More tests
......@@ -25,7 +25,19 @@ from saqc.lib.tools import getFreqDelta
def identity(ts):
# identity function
"""
Returns the input.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
ts: pd.Series
the original
"""
return ts
......@@ -36,36 +48,56 @@ def count(ts):
return ts.count()
def first(ts):
# first is a dummy to trigger according built in count method of resamplers when
# passed to aggregate2freq. For consistency reasons, it works accordingly when
# applied directly:
return ts.first()
def last(ts):
# last is a dummy to trigger according built in count method of resamplers when
# passed to aggregate2freq. For consistency reasons, it works accordingly when
# applied directly:
return ts.last()
def zeroLog(ts):
"""
Calculate log of values of series for (0, inf] and NaN otherwise.
Parameters
----------
ts : pd.Series
A series with datetime index.
def zeroLog(ts):
# zero log returns np.nan instead of -np.inf, when passed 0. Usefull, because
# in internal processing, you only have to check for nan values if you need to
# remove "invalidish" values from the data.
Returns
-------
pd.Series
"""
log_ts = np.log(ts)
log_ts[log_ts == -np.inf] = sys.float_info.min
return log_ts
def derivative(ts, unit="1min"):
# calculates derivative of timeseries, expressed in slope per "unit"
return ts / (deltaT(ts, unit=unit))
"""
Calculates derivative of timeseries, expressed in slope per `unit`.
Parameters
----------
ts : pd.Series
A series with datetime index.
unit : str
Datetime offset unit.
Returns
-------
pd.Series
"""
return ts / deltaT(ts, unit=unit)
def deltaT(ts, unit="1min"):
# calculates series of time gaps in ts
"""
Calculate the time difference of the index-values in seconds.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return (
ts.index.to_series().diff().dt.total_seconds()
/ pd.Timedelta(unit).total_seconds()
......@@ -73,11 +105,34 @@ def deltaT(ts, unit="1min"):
def difference(ts):
# NOTE: index of input series gets lost!
return np.diff(ts, prepend=np.nan)
"""
Calculate the difference of subsequent values in the series.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return ts.diff(1)
def rateOfChange(ts):
"""
Calculate the rate of change of the series values.
Parameters
----------
ts : pd.Series
A series with datetime index.
Returns
-------
pd.Series
"""
return difference(ts) / ts
......@@ -89,7 +144,22 @@ def relativeDifference(ts):
def scale(ts, target_range=1, projection_point=None):
# scales input series to have values ranging from - target_rang to + target_range
"""
Scales input series values to a given range.
Parameters
----------
ts : pd.Series
A series with datetime index.
target_range : int
The projection will range from ``[-target_range, target_range]``
Returns
-------
scaled: pd.Series
The scaled Series
"""
if not projection_point:
projection_point = np.max(np.abs(ts))
return (ts / projection_point) * target_range
......
......@@ -7,6 +7,7 @@ import pytest
import saqc.lib.ts_operators as tsops
import pandas as pd
from pandas.testing import assert_series_equal
from numpy.testing import assert_array_equal, assert_equal
def test_butterFilter():
......@@ -54,54 +55,141 @@ def dtSeries(data, freq="1d"):
@pytest.mark.parametrize(
"func,data,expected",
"data",
[dtSeries([0, 1, 2]), dtSeries([0, np.nan, 2])],
)
def test_identity(data):
from saqc.lib.ts_operators import identity
result = identity(data)
assert result is data
@pytest.mark.parametrize(
"data,expected",
[
(dtSeries([0, 1, 2]), 3),
(dtSeries([0, np.nan, 2]), 2),
],
)
def test_count(data, expected):
# count is labeled as a dummy function, this means
# we need to ensure it exists with a resampler object.
resampler = data.resample("2d")
assert hasattr(resampler, "count")
from saqc.lib.ts_operators import count
result = count(data)
assert result == expected
@pytest.mark.parametrize(
"data,expected",
[
("identity", dtSeries([1, 2]), dtSeries([1, 2])),
("count", dtSeries([0, 0]), dtSeries([2])),
pytest.param(
"first",
dtSeries([1, 2]),
dtSeries([1, 1]),
marks=pytest.mark.xfail(reason="BUG (the inner ts.first need an argument)"),
),
pytest.param(
"last",
dtSeries([1, 2]),
dtSeries([1, 1]),
marks=pytest.mark.xfail(reason="BUG (the inner ts.last need an argument)"),
),
(
"zeroLog",
dtSeries([1, 2, np.inf, np.nan]),
dtSeries([np.log(1), np.log(2), np.inf, np.nan]),
),
pytest.param(
"zeroLog",
dtSeries(
[
# 0,
0,
-2,
-1,
-np.inf,
]
),
dtSeries([np.nan, np.nan, np.nan]),
marks=pytest.mark.xfail(reason="zeroLog(0) did not return NaN"),
dtSeries([np.nan, np.nan, np.nan, np.nan]),
marks=pytest.mark.xfail(reason="zeroLog(0) did not return NaN for 0"),
),
],
)
def test_tsop_functions(func, data, expected):
f = getattr(tsops, func)
def test_zeroLog(data, expected):
from saqc.lib.ts_operators import zeroLog
result = zeroLog(data)
assert_series_equal(result, expected, check_freq=False, check_names=False)
resampler = data.resample("2d")
result = resampler.apply(f)
assert isinstance(result, pd.Series)
@pytest.mark.parametrize(
"data,expected",
[
(dtSeries([1, 2, 3]), dtSeries([np.nan, 1440, 1440])),
(
pd.Series(
[1, 2, 3],
index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]),
),
dtSeries([np.nan, 2880, 14400]),
),
],
)
def test_deltaT(data, expected):
from saqc.lib.ts_operators import deltaT
print()
print(result)
print()
print(expected)
result = deltaT(data)
assert_series_equal(
result, expected, check_names=False, check_freq=False, check_dtype=False
result,
expected,
check_dtype=False,
check_names=False,
check_index=False,
check_freq=False,
)
@pytest.mark.parametrize(
"data,expected",
[
pytest.param(
pd.Series(
# We use as values the delta of total seconds from the last value.
# This way the 'derivative' should be 1 for each result value.
[1, 2880, 14400],
index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]),
),
pd.Series(
[np.nan, 1, 1],
index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]),
),
),
],
)
def test_derivative(data, expected):
from saqc.lib.ts_operators import derivative
result = derivative(data)
assert_series_equal(result, expected, check_dtype=False, check_names=False)
@pytest.mark.parametrize(
"data,expected",
[
(dtSeries([1, 1, 1]), dtSeries([np.nan, 0, 0])),
(dtSeries([1, 10, 100]), dtSeries([np.nan, 9, 90])),
(dtSeries([-np.inf, np.inf, 0]), dtSeries([np.nan, np.inf, -np.inf])),
(dtSeries([0, np.nan, 0]), dtSeries([np.nan, np.nan, np.nan])),
],
)
def test_difference(data, expected):
from saqc.lib.ts_operators import difference
result = difference(data)
assert_series_equal(result, expected, check_names=False)
@pytest.mark.parametrize(
"data,expected",
[
(dtSeries([1, 1, 1]), dtSeries([np.nan, 0, 0])),
(dtSeries([1, 10, 100]), dtSeries([np.nan, 0.9, 0.9])),
(dtSeries([-np.inf, np.inf, 0]), dtSeries([np.nan, np.nan, -np.inf])),
(dtSeries([0, np.nan, 0]), dtSeries([np.nan, np.nan, np.nan])),
],
)
def test_rateOfChange(data, expected):
from saqc.lib.ts_operators import rateOfChange
result = rateOfChange(data)
assert_series_equal(result, expected, check_names=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment