From db38d1c737800d15eae9ad4fdd76cb9b4512caac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Sch=C3=A4fer?= <david.schaefer@ufz.de> Date: Mon, 15 Apr 2024 13:07:33 +0200 Subject: [PATCH] remove code deprecated with version 2.4 --- CHANGELOG.md | 7 + README.md | 4 +- docs/cookbooks/ResidualOutlierDetection.rst | 28 +- docs/funcs/filling.rst | 1 - docs/funcs/genericWrapper.rst | 2 +- docs/funcs/multivariateAnalysis.rst | 1 - docs/funcs/samplingAlignment.rst | 3 - saqc/core/flags.py | 18 -- saqc/funcs/flagtools.py | 2 +- saqc/funcs/interpolation.py | 322 -------------------- saqc/funcs/noise.py | 3 + saqc/funcs/outliers.py | 102 +------ saqc/funcs/resampling.py | 88 +----- saqc/funcs/rolling.py | 62 ---- saqc/funcs/tools.py | 30 -- tests/core/test_flags.py | 9 - tests/funcs/test_proc_functions.py | 24 -- tests/funcs/test_tools.py | 2 - 18 files changed, 45 insertions(+), 663 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5b6195f1..ab8692604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,12 @@ SPDX-License-Identifier: GPL-3.0-or-later ### Changed - `flagPattern` uses *fastdtw* package now to compute timeseries distances ### Removed +- `SaQC` methods deprecated in version 2.4: `interpolate`, `interpolateIndex`, `interpolateInvalid`, `roll`, `linear`,`shift`, `flagCrossStatistics` +- `Flags` method deprecated in version 2.4: `toDios` +- `DictOfSeries` method deprecated in version 2.4: `index_of` +- Option `"complete"` for parameter `history` of method `plot` +- Option `"cycleskip"` for parameter `ax_kwargs` of method `plot` +- Parameter `phaseplot` from method `plot` ### Fixed - `flagConstants`: fixed flagging of rolling ramps - `Flags`: add meta entry to imported flags @@ -36,6 +42,7 @@ SPDX-License-Identifier: GPL-3.0-or-later - `flagRaise` with delegation to better replacements `flagZScore`, `flagUniLOF`, `flagJumps` or `flagOffset` - `flagByGrubbs` with delegation to better replacements `flagZScore`, `flagUniLOF`s - `flagMVScore` with delegation to manual application of the steps + ## [2.5.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.1...v2.5.0) ### Added diff --git a/README.md b/README.md index 6fb0bf122..22c853333 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ could look like [this](https://git.ufz.de/rdm-software/saqc/raw/develop/docs/res ``` varname ; test #----------; --------------------------------------------------------------------- -SM2 ; shift(freq="15Min") +SM2 ; align(freq="15Min") 'SM(1|2)+' ; flagMissing() SM1 ; flagRange(min=10, max=60) SM2 ; flagRange(min=10, max=40) @@ -103,7 +103,7 @@ data = pd.read_csv( qc = SaQC(data=data) qc = (qc - .shift("SM2", freq="15Min") + .align("SM2", freq="15Min") .flagMissing("SM(1|2)+", regex=True) .flagRange("SM1", min=10, max=60) .flagRange("SM2", min=10, max=40) diff --git a/docs/cookbooks/ResidualOutlierDetection.rst b/docs/cookbooks/ResidualOutlierDetection.rst index 834b5b003..289d12873 100644 --- a/docs/cookbooks/ResidualOutlierDetection.rst +++ b/docs/cookbooks/ResidualOutlierDetection.rst @@ -147,19 +147,19 @@ Rolling Mean ^^^^^^^^^^^^ Easiest thing to do, would be, to apply some rolling mean -model via the method :py:meth:`saqc.SaQC.roll`. +model via the method :py:meth:`saqc.SaQC.rolling`. .. doctest:: exampleOD >>> import numpy as np - >>> qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D') + >>> qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D') .. plot:: :context: :include-source: False import numpy as np - qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D') + qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D') The ``field`` parameter is passed the variable name, we want to calculate the rolling mean of. The ``target`` parameter holds the name, we want to store the results of the calculation to. @@ -174,13 +174,13 @@ under the name ``np.median``. We just calculate another model curve for the ``"i .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D') + >>> qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D') .. plot:: :context: :include-source: False - qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D') + qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D') We chose another :py:attr:`target` value for the rolling *median* calculation, in order to not override our results from the previous rolling *mean* calculation. @@ -318,18 +318,18 @@ for the point lying in the center of every window, we would define our function z_score = lambda D: abs((D[14] - np.mean(D)) / np.std(D)) -And subsequently, use the :py:meth:`~saqc.SaQC.roll` method to make a rolling window application with the scoring +And subsequently, use the :py:meth:`~saqc.SaQC.rolling` method to make a rolling window application with the scoring function: .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) + >>> qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) .. plot:: :context: close-figs :include-source: False - qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) + qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) Optimization by Decomposition ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -347,13 +347,13 @@ So the attempt works fine, only because our data set is small and strictly regul Meaning that it has constant temporal distances between subsequent meassurements. In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring -into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the +into seperate calls to the :py:meth:`~saqc.SaQC.rolling` function, by calculating the series of the residuals *mean* and *standard deviation* seperately: .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) - >>> qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) + >>> qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) + >>> qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) >>> qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", ... func=lambda this, mean, std: (this - mean) / std) @@ -362,15 +362,15 @@ residuals *mean* and *standard deviation* seperately: :context: close-figs :include-source: False - qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) - qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) + qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) + qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", func=lambda this, mean, std: (this - mean) / std) With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cookbooks/ResidualOutlierDetection:Scores>`\ , because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in. -Also, as a result of the :py:meth:`~saqc.SaQC.roll` assigning its results to the center of every window, +Also, as a result of the :py:meth:`~saqc.SaQC.rolling` assigning its results to the center of every window, all the values are centered and we dont have to care about window center indices when we are generating the *Z*\ -Scores from the two series. diff --git a/docs/funcs/filling.rst b/docs/funcs/filling.rst index d158f8ff1..388820942 100644 --- a/docs/funcs/filling.rst +++ b/docs/funcs/filling.rst @@ -11,4 +11,3 @@ Gap filling :nosignatures: ~SaQC.interpolateByRolling - ~SaQC.interpolate diff --git a/docs/funcs/genericWrapper.rst b/docs/funcs/genericWrapper.rst index 5e487b1d7..4c87c212d 100644 --- a/docs/funcs/genericWrapper.rst +++ b/docs/funcs/genericWrapper.rst @@ -13,6 +13,6 @@ Generic Functions ~SaQC.processGeneric ~SaQC.flagGeneric - ~SaQC.roll + ~SaQC.rolling ~SaQC.transform ~SaQC.resample diff --git a/docs/funcs/multivariateAnalysis.rst b/docs/funcs/multivariateAnalysis.rst index 38fe8f504..159d37619 100644 --- a/docs/funcs/multivariateAnalysis.rst +++ b/docs/funcs/multivariateAnalysis.rst @@ -12,7 +12,6 @@ Multivariate outlier detection. .. autosummary:: :nosignatures: - ~SaQC.flagCrossStatistics ~SaQC.flagLOF ~SaQC.flagZScore diff --git a/docs/funcs/samplingAlignment.rst b/docs/funcs/samplingAlignment.rst index 05b876249..660bd1844 100644 --- a/docs/funcs/samplingAlignment.rst +++ b/docs/funcs/samplingAlignment.rst @@ -10,10 +10,7 @@ Sampling Alignment .. autosummary:: :nosignatures: - ~SaQC.linear - ~SaQC.shift ~SaQC.align ~SaQC.concatFlags - ~SaQC.interpolateIndex ~SaQC.resample ~SaQC.reindex diff --git a/saqc/core/flags.py b/saqc/core/flags.py index 1009f540d..e597e0ee1 100644 --- a/saqc/core/flags.py +++ b/saqc/core/flags.py @@ -474,24 +474,6 @@ class Flags: # ---------------------------------------------------------------------- # transformation and representation - - def toDios(self) -> DictOfSeries: - """ - Transform the flags container to a ``DictOfSeries``. - - .. deprecated:: 2.4 - use `saqc.DictOfSeries(obj)` instead. - - Returns - ------- - DictOfSeries - """ - warnings.warn( - "toDios is deprecated, use `saqc.DictOfSeries(obj)` instead.", - category=DeprecationWarning, - ) - return DictOfSeries(self).copy() - def toFrame(self) -> pd.DataFrame: """ Transform the flags container to a ``pd.DataFrame``. diff --git a/saqc/funcs/flagtools.py b/saqc/funcs/flagtools.py index 19862377f..51b7c5f47 100644 --- a/saqc/funcs/flagtools.py +++ b/saqc/funcs/flagtools.py @@ -299,7 +299,7 @@ class FlagtoolsMixin: dtype: bool """ warnings.warn( - "`flagManual` is deprecated and will be removed in version 2.9 of saqc. " + "`flagManual` is deprecated and will be removed in version 2.8 of saqc. " "Please use `setFlags` for similar functionality.", DeprecationWarning, ) diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index 1e7ed3add..481c87d72 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -34,25 +34,6 @@ if TYPE_CHECKING: from saqc import SaQC -# TODO: remove, when `interpolateIndex` and `interpolateInvalid are removed` -INTERPOLATION_METHODS = Literal[ - "linear", - "time", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "spline", - "barycentric", - "polynomial", - "krogh", - "piecewise_polynomial", - "spline", - "pchip", - "akima", -] - DATA_REINDEXER = {"fshift": "last", "bshift": "first", "nshift": "first"} @@ -176,184 +157,6 @@ class InterpolationMixin: return self - @register( - mask=["field"], - demask=[], - squeeze=[], # func handles history by itself - ) - def interpolate( - self: "SaQC", - field: str, - method: INTERPOLATION_METHODS = "time", - order: int = 2, - limit: int | str | None = None, - extrapolate: Literal["forward", "backward", "both"] | None = None, - flag: float = UNFLAGGED, - **kwargs, - ) -> "SaQC": - """ - Fill NaN and flagged values using an interpolation method. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - method : - Interpolation technique to use. One of: - - * ‘linear’: Ignore the index and treat the values as equally spaced. - * ‘time’: Works on daily and higher resolution data to interpolate given length of interval. - * ‘index’, ‘values’: Use the actual numerical values of the index. - * ‘pad’: Fill in NaNs using existing values. - * ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘spline’, ‘barycentric’, ‘polynomial’: - Passed to scipy.interpolate.interp1d. These methods use the numerical values of the index. - Both ‘polynomial’ and ‘spline’ require that you also specify an order (int), e.g. - ``qc.interpolate(method='polynomial', order=5)``. - * ‘krogh’, ‘spline’, ‘pchip’, ‘akima’, ‘cubicspline’: - Wrappers around the SciPy interpolation methods of similar names. - * ‘from_derivatives’: Refers to scipy.interpolate.BPoly.from_derivatives - - order : - Order of the interpolation method, ignored if not supported - by the chosen ``method`` - - limit : - Maximum number of missing values to interpolate. Only gaps - smaller than ``limit`` will be filled. The gap size can be - given as a number of values (integer) or a temporal extensions - (offset string). With ``None``, all missing values will be - interpolated. - - extrapolate : - Use parameter to perform extrapolation instead of interpolation - onto the trailing and/or leading chunks of NaN values in data series. - - * 'None' (default) - perform interpolation - * 'forward'/'backward' - perform forward/backward extrapolation - * 'both' - perform forward and backward extrapolation - - Examples - -------- - See some examples of the keyword interplay below: - - Lets generate some dummy data: - - .. doctest:: interpolate - - >>> data = pd.DataFrame({'data':np.array([np.nan, 0, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan, 8, 9, np.nan, np.nan])}, index=pd.date_range('2000',freq='1H', periods=13)) - >>> data - data - 2000-01-01 00:00:00 NaN - 2000-01-01 01:00:00 0.0 - 2000-01-01 02:00:00 NaN - 2000-01-01 03:00:00 NaN - 2000-01-01 04:00:00 NaN - 2000-01-01 05:00:00 4.0 - 2000-01-01 06:00:00 5.0 - 2000-01-01 07:00:00 NaN - 2000-01-01 08:00:00 NaN - 2000-01-01 09:00:00 8.0 - 2000-01-01 10:00:00 9.0 - 2000-01-01 11:00:00 NaN - 2000-01-01 12:00:00 NaN - - Use :py:meth:`~saqc.SaQC.interpolate` to do linear interpolation - of up to 2 consecutive missing values: - - .. doctest:: interpolate - - >>> qc = saqc.SaQC(data) - >>> qc = qc.interpolate("data", limit=3, method='time') - >>> qc.data # doctest:+NORMALIZE_WHITESPACE - data | - ======================== | - 2000-01-01 00:00:00 NaN | - 2000-01-01 01:00:00 0.0 | - 2000-01-01 02:00:00 NaN | - 2000-01-01 03:00:00 NaN | - 2000-01-01 04:00:00 NaN | - 2000-01-01 05:00:00 4.0 | - 2000-01-01 06:00:00 5.0 | - 2000-01-01 07:00:00 6.0 | - 2000-01-01 08:00:00 7.0 | - 2000-01-01 09:00:00 8.0 | - 2000-01-01 10:00:00 9.0 | - 2000-01-01 11:00:00 NaN | - 2000-01-01 12:00:00 NaN | - <BLANKLINE> - - - Use :py:meth:`~saqc.SaQC.interpolate` to do linear extrapolaiton - of up to 1 consecutive missing values: - - .. doctest:: interpolate - - >>> qc = saqc.SaQC(data) - >>> qc = qc.interpolate("data", limit=2, method='time', extrapolate='both') - >>> qc.data # doctest:+NORMALIZE_WHITESPACE - data | - ======================== | - 2000-01-01 00:00:00 0.0 | - 2000-01-01 01:00:00 0.0 | - 2000-01-01 02:00:00 NaN | - 2000-01-01 03:00:00 NaN | - 2000-01-01 04:00:00 NaN | - 2000-01-01 05:00:00 4.0 | - 2000-01-01 06:00:00 5.0 | - 2000-01-01 07:00:00 NaN | - 2000-01-01 08:00:00 NaN | - 2000-01-01 09:00:00 8.0 | - 2000-01-01 10:00:00 9.0 | - 2000-01-01 11:00:00 NaN | - 2000-01-01 12:00:00 NaN | - <BLANKLINE> - """ - if limit is not None: - validateWindow(limit, "limit") - - validateValueBounds(order, "order", left=0, strict_int=True) - validateChoice( - extrapolate, "extrapolate", ["forward", "backward", "both", None] - ) - - if "freq" in kwargs: - # the old interpolate version - warnings.warn( - f"The method `interpolate` is deprecated and will be removed " - f"in version 2.7 of saqc. To achieve the same behaviour " - f"please use: `qc.align(field={field}, freq={kwargs['freq']}, " - f"method={method}, order={order}, flag={flag})`", - DeprecationWarning, - ) - return self.align( - field=field, - freq=kwargs.pop("freq", method), - method=method, - order=order, - flag=flag, - **kwargs, - ) - - inter_data = interpolateNANs( - self._data[field], - method, - order=order, - gap_limit=limit, - extrapolate=extrapolate, - ) - - interpolated = self._data[field].isna() & inter_data.notna() - self._data[field] = inter_data - new_col = pd.Series(np.nan, index=self._flags[field].index) - new_col.loc[interpolated] = np.nan if flag is None else flag - - # todo kwargs must have all passed args except data,field,flags - self._flags.history[field].append( - new_col, {"func": "interpolateInvalid", "args": (), "kwargs": kwargs} - ) - return self - @register(mask=["field"], demask=[], squeeze=[]) def align( self: "SaQC", @@ -436,131 +239,6 @@ class InterpolationMixin: ) return self - # ============================================================ - ### Deprecated functions - # ============================================================ - - @register(mask=["field"], demask=[], squeeze=[]) - def interpolateIndex( - self: "SaQC", - field: str, - freq: str, - method: INTERPOLATION_METHODS, - order: int = 2, - limit: int | None = 2, - extrapolate: Literal["forward", "backward", "both"] = None, - **kwargs, - ) -> "SaQC": - """ - Function to interpolate the data at regular (equidistant) - timestamps also known as or grid points. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - freq : - An Offset String, interpreted as the frequency of - the grid you want to interpolate your data to. - - method : - The interpolation method you want to apply. - - order : - If your selected interpolation method can be performed at - different 'orders' - here you pass the desired order. - - limit : - Upper limit of missing index values (with respect to ``freq``) - to fill. The limit can either be expressed as the number of - consecutive missing values (integer) or temporal extension - of the gaps to be filled (Offset String). If ``None`` is passed, - no limit is set. - - extrapolate : - Use parameter to perform extrapolation instead of interpolation - onto the trailing and/or leading chunks of NaN values in data - series. - - * ``None`` (default) - perform interpolation - * ``'forward'``/``'backward'`` - perform forward/backward extrapolation - * ``'both'`` - perform forward and backward extrapolation - """ - call = ( - f'qc.align(field="{field}", freq="{freq}", method="{method}", ' - f'order={order}, extrapolate="{extrapolate}")' - ) - if limit != 2: - call = ( - f'qc.interpolate(field="{field}", method="{method}", ' - f'order="{order}", limit="{limit}", extrapolate="{extrapolate}")' - ) - warnings.warn( - f"The method interpolateIndex is deprectated and will be removed with SaQC==3.0. Use `{call}` instead", - DeprecationWarning, - ) - - # HINT: checking is delegated to called functions - - out = self.align( - field=field, - freq=freq, - method=method, - order=order, - extrapolate=extrapolate, - **kwargs, - ) - if limit != 2: - out = out.interpolate( - field=field, - freq=freq, - method=method, - order=order, - limit=limit, - extrapolate=extrapolate, - **kwargs, - ) - return out - - @register( - mask=["field"], - demask=["field"], - squeeze=[], # func handles history by itself - ) - def interpolateInvalid( - self: "SaQC", - field: str, - method: INTERPOLATION_METHODS, - order: int = 2, - limit: int | None = None, - extrapolate: Literal["forward", "backward", "both"] | None = None, - flag: float = UNFLAGGED, - **kwargs, - ) -> "SaQC": - """ - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.interpolate` instead. - """ - warnings.warn( - "The method `intepolateInvalid` is deprecated and will be removed " - "with version 2.7 of saqc. To achieve the same behavior, please " - f"use `qc.interpolate(field={field}, method={method}, order={order}, " - f"limit={limit}, extrapolate={extrapolate}, flag={flag})`", - DeprecationWarning, - ) - - # HINT: checking is delegated to called function - return self.interpolate( - field=field, - method=method, - order=order, - limit=limit, - extrapolate=extrapolate, - flag=flag, - **kwargs, - ) - def _shift( saqc: "SaQC", diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py index 4afc99f33..7e18add8f 100644 --- a/saqc/funcs/noise.py +++ b/saqc/funcs/noise.py @@ -54,6 +54,9 @@ class NoiseMixin: 2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``, exceed ``sub_thresh`` with regard to ``func`` + .. deprecated:: 2.5.0 + Deprecated Function. See :py:meth:`~saqc.SaQC.flagByScatterLowpass`. + Parameters ---------- func : diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index 0ff5a8c52..cc6bfbc08 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -579,6 +579,9 @@ class OutliersMixin: hydrological data. See the notes section for an overview over the algorithms basic steps. + .. deprecated:: 2.6.0 + Deprecated Function. Please refer to :py:meth:`~saqc.SaQC.flagByStray`. + Parameters ---------- trafo : @@ -725,9 +728,9 @@ class OutliersMixin: warnings.warn( """ - FlagMVScores is deprecated and will be removed with Version 2.8. - To replicate the function, transform the different fields involved - via explicit applications of some transformations, than calculate the + flagMVScores is deprecated and will be removed with Version 2.8. + To replicate the function, transform the different fields involved + via explicit applications of some transformations, than calculate the kNN scores via `saqc.SaQC.assignkNScores` and finally assign the STRAY algorithm via `saqc.SaQC.flagByStray`. """, @@ -861,10 +864,10 @@ class OutliersMixin: warnings.warn( "The function flagRaise is deprecated with no 100% exact replacement function." - "When looking for changes in the value course, the use of flagraise can be replicated and more easily aimed " - "for, via the method flagJump.\n" + "When looking for changes in the value course, the use of flagRaise can be replicated and more " + "easily aimed for, via the method flagJump.\n" "When looking for raises to outliers or plateaus, use one of: " - "flagZScore(outliers), flagUniLOF (outliers and small plateaus) or flagOffset(Plateaus)", + "flagZScore (outliers), flagUniLOF (outliers and small plateaus) or flagOffset (plateaus)", DeprecationWarning, ) @@ -971,6 +974,10 @@ class OutliersMixin: See references [1] for more details on the algorithm. + .. deprecated:: 2.6.0 + Deprecated Function. Please refer to :py:meth:`~saqc.SaQC.flagZScore`. + + Note ---- Data needs to be sampled at a regular equidistant time grid. @@ -1262,8 +1269,8 @@ class OutliersMixin: """ warnings.warn( - "The function flagGrubbs is deprecated due to its inferior performance, with no 100% exact replacement function." - "When looking for outliers use one of: " + "The function flagByGrubbs is deprecated due to its inferior performance, with " + "no 100% exact replacement function. When looking for outliers use one of: " "flagZScore, flagUniLOF", DeprecationWarning, ) @@ -1325,85 +1332,6 @@ class OutliersMixin: self._flags[to_flag, field] = flag return self - @register( - mask=["field"], - demask=["field"], - squeeze=["field"], - multivariate=True, - handles_target=False, - docstring={"field": DOC_TEMPLATES["field"]}, - ) - def flagCrossStatistics( - self: "SaQC", - field: Sequence[str], - thresh: float, - method: Literal["modZscore", "Zscore"] = "modZscore", - flag: float = BAD, - **kwargs, - ) -> "SaQC": - """ - Function checks for outliers relatively to the "horizontal" input data axis. - - Notes - ----- - The input variables dont necessarily have to be aligned. If the variables are unaligned, scoring - and flagging will only be performed on the subset of indices shared among all input variables. - - For :py:attr:`field` :math:`=[f_1,f_2,...,f_N]` and timestamps :math:`[t_1,t_2,...,t_K]`, - the following steps are taken for outlier detection: - - 1. All timestamps :math:`t_i`, where there is one :math:`f_k`, with :math:`data[f_K]` having no - entry at :math:`t_i`, are excluded from the following process (inner join of the :math:`f_i` fields.) - 2. for every :math:`0 <= i <= K`, the value - :math:`m_j = median(\\{data[f_1][t_i], data[f_2][t_i], ..., data[f_N][t_i]\\})` is calculated - 3. for every :math:`0 <= i <= K`, the set - :math:`\\{data[f_1][t_i] - m_j, data[f_2][t_i] - m_j, ..., data[f_N][t_i] - m_j\\}` is tested for - outliers with the specified algorithm (:py:attr:`method` parameter). - - Parameters - ---------- - thresh : - Threshold which the outlier score of an value must exceed, for being flagged an outlier. - - method : - Method used for calculating the outlier scores. - - * ``'modZscore'``: Median based "sigma"-ish approach. See References [1]. - * ``'Zscore'``: Score values by how many times the standard deviation they differ from the - median. See References [1]. - - - References - ---------- - [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm - """ - new_method_string = { - "modZscore": "modified", - "Zscore": "standard", - np.mean: "standard", - np.median: "modified", - } - call = ( - f"qc.flagZScore(field={field}, window=1, " - f"method={new_method_string[method]}, " - f"thresh={thresh}, axis=1)" - ) - warnings.warn( - f"The method `flagCrossStatistics` is deprecated and will " - f"be removed in verion 2.7 of saqc. To achieve the same behavior " - f"use:`{call}`", - DeprecationWarning, - ) - - return self.flagZScore( - field=field, - window=1, - method=new_method_string[method], - thresh=thresh, - axis=1, - flag=flag, - ) - @register( mask=["field"], demask=["field"], diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py index cb2840f76..168d16fc0 100644 --- a/saqc/funcs/resampling.py +++ b/saqc/funcs/resampling.py @@ -321,90 +321,6 @@ class ResamplingMixin: ) return idx, idx_source, datcol - @register(mask=["field"], demask=[], squeeze=[]) - def linear( - self: "SaQC", - field: str, - freq: str, - **kwargs, - ) -> "SaQC": - """ - A method to "regularize" data by interpolating linearly the data - at regular timestamp. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` with ``method="linear"`` - instead. - - A series of data is considered "regular", if it is sampled regularly - (= having uniform sampling rate). Interpolated values will get - assigned the worst flag within freq-range. Note, that the data - only gets interpolated at those (regular) timestamps, that have - a valid (existing and not-na) datapoint preceeding them and one - succeeding them within freq range. Regular timestamp that do - not suffice this condition get nan assigned AND The associated - flag will be of value ``UNFLAGGED``. - - Parameters - ---------- - freq : - An offset string. The frequency of the grid you want to interpolate - your data at. - """ - warnings.warn( - f""" - The method `shift` is deprecated and will be removed with version 2.6 of saqc. - To achieve the same behavior please use: - `qc.align(field={field}, freq={freq}. method="linear")` - """, - DeprecationWarning, - ) - reserved = ["method", "order", "limit", "downgrade"] - kwargs = filterKwargs(kwargs, reserved) - return self.interpolateIndex(field, freq, "time", **kwargs) - - @register(mask=["field"], demask=[], squeeze=[]) - def shift( - self: "SaQC", - field: str, - freq: str, - method: Literal["fshift", "bshift", "nshift"] = "nshift", - **kwargs, - ) -> "SaQC": - """ - Shift data points and flags to a regular frequency grid. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - freq : - Offset string. Sampling rate of the target frequency. - - method : - Method to propagate values: - - * 'nshift' : shift grid points to the nearest time stamp in the range = +/- 0.5 * ``freq`` - * 'bshift' : shift grid points to the first succeeding time stamp (if any) - * 'fshift' : shift grid points to the last preceeding time stamp (if any) - """ - warnings.warn( - f""" - The method `shift` is deprecated and will be removed with version 2.6 of saqc. - To achieve the same behavior please use: `qc.align(field={field}, freq={freq}. method={method})` - """, - DeprecationWarning, - ) - validateChoice(method, "method", ["fshift", "bshift", "nshift"]) - return self.reindex( - field=field, - index=freq, - method=method, - data_aggregation=DATA_REINDEXER[method], - **kwargs, - ) - @register(mask=["field"], demask=[], squeeze=[]) def resample( self: "SaQC", @@ -984,7 +900,7 @@ class ResamplingMixin: if method.split("_")[0] == "inverse": warnings.warn( f""" Referring to a method that would invert a method 'A` via 'inverse_A' is deprecated and will - be removed in a future release. Please use method={method.split('_')[-1]} together + be removed in version 2.7. Please use method={method.split('_')[-1]} together with invert=True. """, DeprecationWarning, @@ -995,7 +911,7 @@ class ResamplingMixin: if method == "match": warnings.warn( f"The method 'match' is deprecated and will be removed " - f"in version 2.8 of SaQC. Please use `SaQC.transferFlags(field={field}, " + f"in version 2.7 of SaQC. Please use `SaQC.transferFlags(field={field}, " f"target={target}, squeeze={squeeze}, overwrite={override})` instead", DeprecationWarning, ) diff --git a/saqc/funcs/rolling.py b/saqc/funcs/rolling.py index 6e74a69a8..2a1b95ef2 100644 --- a/saqc/funcs/rolling.py +++ b/saqc/funcs/rolling.py @@ -72,68 +72,6 @@ class RollingMixin: ) return self - @register(mask=["field"], demask=[], squeeze=[]) - def roll( - self: "SaQC", - field: str, - window: Union[str, int], - func: Callable[[pd.Series], np.ndarray] = np.mean, - min_periods: int = 0, - center: bool = True, - **kwargs, - ) -> "SaQC": - """ - Calculate a rolling-window function on the data. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.rolling` instead. - - Note, that the data gets assigned the worst flag present in the original data. - - Parameters - ---------- - window : - The size of the window you want to roll with. If an integer is passed, the size - refers to the number of periods for every fitting window. If an offset string - is passed, the size refers to the total temporal extension. For regularly - sampled timeseries, the period number will be casted down to an odd number if - ``center=True``. - - func : default mean - Function to roll with. - - min_periods : - The minimum number of periods to get a valid value - - center : - If True, center the rolling window. - """ - import warnings - - warnings.warn( - """The function `roll` was renamed to `rolling` and will be removed with version 3.0 of saqc - Please use `SaQC.rolling` with the same arguments, instead - """, - DeprecationWarning, - ) - - validateFuncSelection(func, allow_operator_str=True) - validateWindow(window) - validateMinPeriods(min_periods) - - # HINT: checking in _roll - self._data, self._flags = _roll( - data=self._data, - field=field, - flags=self._flags, - window=window, - func=func, - min_periods=min_periods, - center=center, - **kwargs, - ) - return self - def _roll( data: DictOfSeries, diff --git a/saqc/funcs/tools.py b/saqc/funcs/tools.py index 54cff17af..7348b9b78 100644 --- a/saqc/funcs/tools.py +++ b/saqc/funcs/tools.py @@ -475,36 +475,6 @@ class ToolsMixin: * Check/modify the module parameter `saqc.lib.plotting.SCATTER_KWARGS` to see/modify global marker defaults * Check/modify the module parameter `saqc.lib.plotting.PLOT_KWARGS` to see/modify global plot line defaults """ - if history == "complete": - warnings.warn( - "Plotting with history='complete' is deprecated and will be removed in a future release (2.5)." - "To get access to an saqc variables complete flagging history and analyze or plot it in detail, use flags" - "history acces via `qc._flags.history[variable_name].hist` and a plotting library, such as pyplot.\n" - "Minimal Pseudo example, having a saqc.SaQC instance `qc`, holding a variable `'data1'`, " - "and having matplotlib.pyplot imported as `plt`:\n\n" - "plt.plot(data)\n" - "for f in qc._flags.history['data1'].hist \n" - " markers = qc._flags.history['data1'].hist[f] > level \n" - " markers=data[markers] \n" - " plt.scatter(markers.index, markers.values) \n", - DeprecationWarning, - ) - - if "phaseplot" in kwargs: - warnings.warn( - 'Parameter "phaseplot" is deprecated and will be removed in a future release (2.5). Assign to parameter "mode" instead. (plot(field, mode=phaseplot))', - DeprecationWarning, - ) - mode = kwargs["phaseplot"] - - if "cycleskip" in (ax_kwargs or {}): - warnings.warn( - 'Passing "cycleskip" option with the "ax_kwargs" parameter is deprecated and will be removed in a future release (2.5). ' - 'The option now has to be passed with the "marker_kwargs" parameter', - DeprecationWarning, - ) - marker_kwargs["cycleskip"] = ax_kwargs.pop("cycleskip") - data, flags = self._data.copy(), self._flags.copy() level = kwargs.get("flag", UNFLAGGED) diff --git a/tests/core/test_flags.py b/tests/core/test_flags.py index c9628d49c..9e6765188 100644 --- a/tests/core/test_flags.py +++ b/tests/core/test_flags.py @@ -295,15 +295,6 @@ def _validate_flags_equals_frame(flags, df): assert df[c].equals(flags[c]) # respects nan's -@pytest.mark.parametrize("data", testdata) -def test_to_dios(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): - flags = Flags(data) - with pytest.deprecated_call(): - result = flags.toDios() - assert isinstance(result, DictOfSeries) - _validate_flags_equals_frame(flags, result) - - @pytest.mark.parametrize("data", testdata) def test_toFrame(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) diff --git a/tests/funcs/test_proc_functions.py b/tests/funcs/test_proc_functions.py index f90507697..c47d974da 100644 --- a/tests/funcs/test_proc_functions.py +++ b/tests/funcs/test_proc_functions.py @@ -45,30 +45,6 @@ def test_rollingInterpolateMissing(course_5): assert qc.data[field][characteristics["missing"]].isna().all() -def test_interpolate(course_5): - data, characteristics = course_5(periods=10, nan_slice=[5]) - field = data.columns[0] - data = DictOfSeries(data) - flags = initFlagsLike(data) - qc = SaQC(data, flags) - - qc_lin = qc.interpolate(field, method="linear") - qc_poly = qc.interpolate(field, method="polynomial") - assert qc_lin.data[field][characteristics["missing"]].notna().all() - assert qc_poly.data[field][characteristics["missing"]].notna().all() - - data, characteristics = course_5(periods=10, nan_slice=[5, 6, 7]) - - qc = SaQC(data, flags) - qc_lin_1 = qc.interpolate(field, method="linear", limit=2) - qc_lin_2 = qc.interpolate(field, method="linear", limit=3) - qc_lin_3 = qc.interpolate(field, method="linear", limit=4) - - assert qc_lin_1.data[field][characteristics["missing"]].isna().all() - assert qc_lin_2.data[field][characteristics["missing"]].isna().all() - assert qc_lin_3.data[field][characteristics["missing"]].notna().all() - - def test_transform(course_5): data, characteristics = course_5(periods=10, nan_slice=[5, 6]) field = data.columns[0] diff --git a/tests/funcs/test_tools.py b/tests/funcs/test_tools.py index 231ab5fae..78340fe0c 100644 --- a/tests/funcs/test_tools.py +++ b/tests/funcs/test_tools.py @@ -35,8 +35,6 @@ def test_makeFig(tmp_path): d_saqc = d_saqc.plot( field="data", path=outfile, history="valid", yscope=[(-50, 1000)] ) - with pytest.deprecated_call(): - d_saqc = d_saqc.plot(field="data", path=outfile, history="complete") d_saqc = d_saqc.plot( field="data", -- GitLab