import pytest import numpy as np import pandas as pd from dios import DictOfSeries # TODO: this is odd # Why not simple fixtures with talking-names, # that also take parameter, if needed @pytest.fixture def char_dict(): return { "raise": pd.DatetimeIndex([]), "drop": pd.DatetimeIndex([]), "peak": pd.DatetimeIndex([]), "return": pd.DatetimeIndex([]), "missing": pd.DatetimeIndex([]), } @pytest.fixture def course_1(char_dict): """ MONOTONOUSLY ASCENDING/DESCENDING values , that monotonously ascend towards a peak level, and thereafter do monotonously decrease the resulting drop/raise per value equals: (peak_level - initial_level) / (0.5*(periods-2)) periods number better be even! """ def fix_funk( freq="10min", periods=10, initial_level=0, peak_level=10, initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), char_dict=char_dict, name="data", ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) left = np.linspace(initial_level, peak_level, int(np.floor(len(t_index) / 2))) right = np.linspace(peak_level, initial_level, int(np.ceil(len(t_index) / 2))) s = pd.Series(np.append(left, right), index=t_index) char_dict["raise"] = s.index[1 : int(np.floor(len(t_index) / 2))] char_dict["drop"] = s.index[int(np.floor(len(t_index) / 2) + 1) :] char_dict["peak"] = s.index[ int(np.floor(len(t_index) / 2)) - 1 : int(np.floor(len(t_index) / 2)) + 1 ] data = DictOfSeries(data=s, columns=[name]) return data, char_dict return fix_funk @pytest.fixture def course_2(char_dict): """ SINGLE_SPIKE values , that linearly develop over the whole timeseries, from "initial_level" to "final_level", exhibiting one "anomalous" or "outlierish" value of magnitude "out_val" at position "periods/2" number of periods better be even! """ # SINGLE_SPIKE def fix_funk( freq="10min", periods=10, initial_level=0, final_level=2, out_val=5, initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) data = np.linspace(initial_level, final_level, int(np.floor(len(t_index)))) data = pd.Series(data=data, index=t_index) data.iloc[int(np.floor(periods / 2))] = out_val if out_val > data.iloc[int(np.floor(periods / 2) - 1)]: kind = "raise" else: kind = "drop" char_dict[kind] = data.index[int(np.floor(periods / 2))] char_dict["return"] = data.index[int(np.floor(len(t_index) / 2)) + 1] data = DictOfSeries(data=data, columns=["data"]) return data, char_dict return fix_funk @pytest.fixture def course_test(char_dict): """ Test function for pattern detection same as test pattern for first three values, than constant function """ def fix_funk( freq="1 D", initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), out_val=5, char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=100) data = pd.Series(data=0, index=t_index) data.iloc[2] = out_val data.iloc[3] = out_val data = DictOfSeries(data=data, columns=["data"]) return data, char_dict return fix_funk @pytest.fixture def course_3(char_dict): """ CROWD IN A PIT/CROWD ON A SUMMIT values , that linearly develop over the whole timeseries, from "initial_level" to "final_level", exhibiting a "crowd" of "anomalous" or "outlierish" values of magnitude "out_val". The "crowd/group" of anomalous values starts at position "periods/2" and continues with an additional amount of "crowd_size" values, that are each spaced "crowd_spacing" minutes from there predecessors. number of periods better be even! chrowd_size * crowd_spacing better be less then freq[minutes]. """ def fix_funk( freq="10min", periods=10, initial_level=0, final_level=2, out_val=-5, initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), char_dict=char_dict, crowd_size=5, crowd_spacing=1, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) data = np.linspace(initial_level, final_level, int(np.floor(len(t_index)))) data = pd.Series(data=data, index=t_index) ind1 = data.index[int(np.floor(periods / 2))] dates = [ ind1 + crowd_spacing * pd.Timedelta(f"{k}min") for k in range(1, crowd_size + 1) ] insertion_index = pd.DatetimeIndex(dates) data.iloc[int(np.floor(periods / 2))] = out_val data = data.append(pd.Series(data=out_val, index=insertion_index)) data.sort_index(inplace=True) anomaly_index = insertion_index.insert( 0, data.index[int(np.floor(periods / 2))] ) if out_val > data.iloc[int(np.floor(periods / 2) - 1)]: kind = "raise" else: kind = "drop" char_dict[kind] = anomaly_index char_dict["return"] = t_index[int(len(t_index) / 2) + 1] data = DictOfSeries(data=data, columns=["data"]) return data, char_dict return fix_funk @pytest.fixture def course_4(char_dict): """ TEETH (ROW OF SPIKES) values , that remain on value level "base_level" and than begin exposing an outlierish or spikey value of magnitude "out_val" every second timestep, starting at periods/2, with the first spike. number of periods better be even! """ def fix_funk( freq="10min", periods=10, base_level=0, out_val=5, initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) data = pd.Series(data=base_level, index=t_index) data[int(len(t_index) / 2) :: 2] = out_val char_dict["raise"] = t_index[int(len(t_index) / 2) :: 2] char_dict["return"] = t_index[int((len(t_index) / 2) + 1) :: 2] data = DictOfSeries(data=data, columns=["data"]) return data, char_dict return fix_funk @pytest.fixture def course_5(char_dict): """ NAN_holes values , that ascend from initial_level to final_level linearly and have missing data(=nan) at positions "nan_slice", (=a slice or a list, for iloc indexing) periods better be even! periods better be greater 5 """ def fix_funk( freq="10min", periods=10, nan_slice=slice(0, None, 5), initial_level=0, final_level=10, initial_index=pd.Timestamp(2000, 1, 1, 0, 0, 0), char_dict=char_dict, ): t_index = pd.date_range(initial_index, freq=freq, periods=periods) values = np.linspace(initial_level, final_level, periods) s = pd.Series(values, index=t_index) s.iloc[nan_slice] = np.nan char_dict["missing"] = s.iloc[nan_slice].index data = DictOfSeries(data=s, columns=["data"]) return data, char_dict return fix_funk