OutlierDetection.rst

>>> data_path = './ressources/data/incidentsLKG.csv'
>>> import pandas as pd
>>> data = pd.read_csv(data_path, index_col=0)
>>> data.index = pd.DatetimeIndex(data.index)
>>> import saqc
>>> qc = saqc.SaQC(data)
>>> qc.plot('incidents') # doctest: +SKIP
>>> import numpy as np
>>> qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D')
>>> qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D')
>>> qc = qc.fitPolynomial(field='incidents', target='incidents_polynomial', order=2, window='13D')
>>> qc = qc.processGeneric(field='incidents', target='incidents_lowPass', func=lambda x: butterFilter(x, cutoff=0.1, nyq=0.5, filter_order=2))
>>> data = qc.data
>>> data.plot()
<AxesSubplot:>
>>> qc = qc.processGeneric(['incidents', 'incidents_mean'], target='incidents_residues', func=lambda x, y: x - y)
>>> z_score = lambda D: abs((D[14] - np.mean(D)) / np.std(D))
>>> qc = qc.roll(field='incidents_residues', target='incidents_scores', func=z_score, window='27D')
>>> qc = qc.roll(field='incidents_residues', target='residues_mean', window='27D', func=np.mean)
>>> qc = qc.roll(field='incidents_residues', target='residues_std', window='27D', func=np.std)
>>> qc = qc.processGeneric(field=['incidents_scores', "residues_mean", "residues_std"], target="residues_norm", func=lambda this, mean, std: (this - mean) / std)
>>> qc = qc.processGeneric(field=['incidents_residues','incidents_mean','incidents_median'], target='incidents_scores', func=lambda x,y,z: abs((x-y) / z))
>>> qc.plot('incidents_scores') # doctest:+SKIP
>>> qc = qc.flagRange('incidents_scores', max=3)
>>> qc = qc.flagGeneric(field=['incidents_scores'], target='incidents', func=lambda x: isflagged(x))
>>> qc = qc.flagGeneric(field=['incidents_scores'], target='incidents', func=lambda x: x > 3)
>>> qc.plot('incidents') # doctest: +SKIP
>>> qc = qc.flagGeneric(field=['incidents','incidents_residues'], target="incidents", func=lambda x,y: isflagged(x) & (y < 50), flag=-np.inf)
>>> qc.plot("incidents") # doctest:+SKIP
>>> qc = qc.flagGeneric(field=['incidents_scores', 'incidents_residues'], target='incidents', func=lambda x, y: (x > 3) & (y > 20))
>>> qc.plot("incidents") # doctest: +SKIP