diff --git a/saqc/lib/ts_operators.py b/saqc/lib/ts_operators.py index 2c7254bb30b5bfe81524b19554560d036876ad1a..51ba2a6fdf7e431e2496b8683a810c6add218395 100644 --- a/saqc/lib/ts_operators.py +++ b/saqc/lib/ts_operators.py @@ -25,7 +25,19 @@ from saqc.lib.tools import getFreqDelta def identity(ts): - # identity function + """ + Returns the input. + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + + Returns + ------- + ts: pd.Series + the original + """ return ts @@ -36,36 +48,56 @@ def count(ts): return ts.count() -def first(ts): - # first is a dummy to trigger according built in count method of resamplers when - # passed to aggregate2freq. For consistency reasons, it works accordingly when - # applied directly: - return ts.first() - - -def last(ts): - # last is a dummy to trigger according built in count method of resamplers when - # passed to aggregate2freq. For consistency reasons, it works accordingly when - # applied directly: - return ts.last() +def zeroLog(ts): + """ + Calculate log of values of series for (0, inf] and NaN otherwise. + Parameters + ---------- + ts : pd.Series + A series with datetime index. -def zeroLog(ts): - # zero log returns np.nan instead of -np.inf, when passed 0. Usefull, because - # in internal processing, you only have to check for nan values if you need to - # remove "invalidish" values from the data. + Returns + ------- + pd.Series + """ log_ts = np.log(ts) log_ts[log_ts == -np.inf] = sys.float_info.min return log_ts def derivative(ts, unit="1min"): - # calculates derivative of timeseries, expressed in slope per "unit" - return ts / (deltaT(ts, unit=unit)) + """ + Calculates derivative of timeseries, expressed in slope per `unit`. + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + + unit : str + Datetime offset unit. + + Returns + ------- + pd.Series + """ + return ts / deltaT(ts, unit=unit) def deltaT(ts, unit="1min"): - # calculates series of time gaps in ts + """ + Calculate the time difference of the index-values in seconds. + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + + Returns + ------- + pd.Series + """ return ( ts.index.to_series().diff().dt.total_seconds() / pd.Timedelta(unit).total_seconds() @@ -73,11 +105,34 @@ def deltaT(ts, unit="1min"): def difference(ts): - # NOTE: index of input series gets lost! - return np.diff(ts, prepend=np.nan) + """ + Calculate the difference of subsequent values in the series. + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + + Returns + ------- + pd.Series + """ + return ts.diff(1) def rateOfChange(ts): + """ + Calculate the rate of change of the series values. + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + + Returns + ------- + pd.Series + """ return difference(ts) / ts @@ -89,7 +144,22 @@ def relativeDifference(ts): def scale(ts, target_range=1, projection_point=None): - # scales input series to have values ranging from - target_rang to + target_range + """ + Scales input series values to a given range. + + + Parameters + ---------- + ts : pd.Series + A series with datetime index. + target_range : int + The projection will range from ``[-target_range, target_range]`` + + Returns + ------- + scaled: pd.Series + The scaled Series + """ if not projection_point: projection_point = np.max(np.abs(ts)) return (ts / projection_point) * target_range diff --git a/tests/lib/test_ts_operators.py b/tests/lib/test_ts_operators.py index 4b6afaa81914648017c20ccc410d6e36e3ec2642..ec1ff4eb13547129494ec90aa5ab1229e2699050 100644 --- a/tests/lib/test_ts_operators.py +++ b/tests/lib/test_ts_operators.py @@ -7,6 +7,7 @@ import pytest import saqc.lib.ts_operators as tsops import pandas as pd from pandas.testing import assert_series_equal +from numpy.testing import assert_array_equal, assert_equal def test_butterFilter(): @@ -54,54 +55,141 @@ def dtSeries(data, freq="1d"): @pytest.mark.parametrize( - "func,data,expected", + "data", + [dtSeries([0, 1, 2]), dtSeries([0, np.nan, 2])], +) +def test_identity(data): + from saqc.lib.ts_operators import identity + + result = identity(data) + assert result is data + + +@pytest.mark.parametrize( + "data,expected", + [ + (dtSeries([0, 1, 2]), 3), + (dtSeries([0, np.nan, 2]), 2), + ], +) +def test_count(data, expected): + # count is labeled as a dummy function, this means + # we need to ensure it exists with a resampler object. + resampler = data.resample("2d") + assert hasattr(resampler, "count") + + from saqc.lib.ts_operators import count + + result = count(data) + assert result == expected + + +@pytest.mark.parametrize( + "data,expected", [ - ("identity", dtSeries([1, 2]), dtSeries([1, 2])), - ("count", dtSeries([0, 0]), dtSeries([2])), - pytest.param( - "first", - dtSeries([1, 2]), - dtSeries([1, 1]), - marks=pytest.mark.xfail(reason="BUG (the inner ts.first need an argument)"), - ), - pytest.param( - "last", - dtSeries([1, 2]), - dtSeries([1, 1]), - marks=pytest.mark.xfail(reason="BUG (the inner ts.last need an argument)"), - ), ( - "zeroLog", dtSeries([1, 2, np.inf, np.nan]), dtSeries([np.log(1), np.log(2), np.inf, np.nan]), ), pytest.param( - "zeroLog", dtSeries( [ - # 0, + 0, -2, -1, -np.inf, ] ), - dtSeries([np.nan, np.nan, np.nan]), - marks=pytest.mark.xfail(reason="zeroLog(0) did not return NaN"), + dtSeries([np.nan, np.nan, np.nan, np.nan]), + marks=pytest.mark.xfail(reason="zeroLog(0) did not return NaN for 0"), ), ], ) -def test_tsop_functions(func, data, expected): - f = getattr(tsops, func) +def test_zeroLog(data, expected): + from saqc.lib.ts_operators import zeroLog + + result = zeroLog(data) + assert_series_equal(result, expected, check_freq=False, check_names=False) - resampler = data.resample("2d") - result = resampler.apply(f) - assert isinstance(result, pd.Series) +@pytest.mark.parametrize( + "data,expected", + [ + (dtSeries([1, 2, 3]), dtSeries([np.nan, 1440, 1440])), + ( + pd.Series( + [1, 2, 3], + index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]), + ), + dtSeries([np.nan, 2880, 14400]), + ), + ], +) +def test_deltaT(data, expected): + from saqc.lib.ts_operators import deltaT - print() - print(result) - print() - print(expected) + result = deltaT(data) assert_series_equal( - result, expected, check_names=False, check_freq=False, check_dtype=False + result, + expected, + check_dtype=False, + check_names=False, + check_index=False, + check_freq=False, ) + + +@pytest.mark.parametrize( + "data,expected", + [ + pytest.param( + pd.Series( + # We use as values the delta of total seconds from the last value. + # This way the 'derivative' should be 1 for each result value. + [1, 2880, 14400], + index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]), + ), + pd.Series( + [np.nan, 1, 1], + index=pd.DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-13"]), + ), + ), + ], +) +def test_derivative(data, expected): + from saqc.lib.ts_operators import derivative + + result = derivative(data) + assert_series_equal(result, expected, check_dtype=False, check_names=False) + + +@pytest.mark.parametrize( + "data,expected", + [ + (dtSeries([1, 1, 1]), dtSeries([np.nan, 0, 0])), + (dtSeries([1, 10, 100]), dtSeries([np.nan, 9, 90])), + (dtSeries([-np.inf, np.inf, 0]), dtSeries([np.nan, np.inf, -np.inf])), + (dtSeries([0, np.nan, 0]), dtSeries([np.nan, np.nan, np.nan])), + ], +) +def test_difference(data, expected): + from saqc.lib.ts_operators import difference + + result = difference(data) + assert_series_equal(result, expected, check_names=False) + + +@pytest.mark.parametrize( + "data,expected", + [ + (dtSeries([1, 1, 1]), dtSeries([np.nan, 0, 0])), + (dtSeries([1, 10, 100]), dtSeries([np.nan, 0.9, 0.9])), + (dtSeries([-np.inf, np.inf, 0]), dtSeries([np.nan, np.nan, -np.inf])), + (dtSeries([0, np.nan, 0]), dtSeries([np.nan, np.nan, np.nan])), + ], +) +def test_rateOfChange(data, expected): + from saqc.lib.ts_operators import rateOfChange + + result = rateOfChange(data) + assert_series_equal(result, expected, check_names=False)