diff --git a/CHANGELOG.md b/CHANGELOG.md index e5b6195f1481e19d55222a97509c7e28e1d04974..ab86926043fdee4a4d0f63776c8c79d263c175e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,12 @@ SPDX-License-Identifier: GPL-3.0-or-later ### Changed - `flagPattern` uses *fastdtw* package now to compute timeseries distances ### Removed +- `SaQC` methods deprecated in version 2.4: `interpolate`, `interpolateIndex`, `interpolateInvalid`, `roll`, `linear`,`shift`, `flagCrossStatistics` +- `Flags` method deprecated in version 2.4: `toDios` +- `DictOfSeries` method deprecated in version 2.4: `index_of` +- Option `"complete"` for parameter `history` of method `plot` +- Option `"cycleskip"` for parameter `ax_kwargs` of method `plot` +- Parameter `phaseplot` from method `plot` ### Fixed - `flagConstants`: fixed flagging of rolling ramps - `Flags`: add meta entry to imported flags @@ -36,6 +42,7 @@ SPDX-License-Identifier: GPL-3.0-or-later - `flagRaise` with delegation to better replacements `flagZScore`, `flagUniLOF`, `flagJumps` or `flagOffset` - `flagByGrubbs` with delegation to better replacements `flagZScore`, `flagUniLOF`s - `flagMVScore` with delegation to manual application of the steps + ## [2.5.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22 [List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.1...v2.5.0) ### Added diff --git a/README.md b/README.md index 6fb0bf122ecbade708efee04053209f54b94a00f..22c8533330fb53bbfac50df3e0b9dcc44fb621e9 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ could look like [this](https://git.ufz.de/rdm-software/saqc/raw/develop/docs/res ``` varname ; test #----------; --------------------------------------------------------------------- -SM2 ; shift(freq="15Min") +SM2 ; align(freq="15Min") 'SM(1|2)+' ; flagMissing() SM1 ; flagRange(min=10, max=60) SM2 ; flagRange(min=10, max=40) @@ -103,7 +103,7 @@ data = pd.read_csv( qc = SaQC(data=data) qc = (qc - .shift("SM2", freq="15Min") + .align("SM2", freq="15Min") .flagMissing("SM(1|2)+", regex=True) .flagRange("SM1", min=10, max=60) .flagRange("SM2", min=10, max=40) diff --git a/docs/cookbooks/ResidualOutlierDetection.rst b/docs/cookbooks/ResidualOutlierDetection.rst index 834b5b003b8e7ce31ef84577465d6da7546de7c9..289d1287381818c57767b28c5fd8f731ce3b406a 100644 --- a/docs/cookbooks/ResidualOutlierDetection.rst +++ b/docs/cookbooks/ResidualOutlierDetection.rst @@ -147,19 +147,19 @@ Rolling Mean ^^^^^^^^^^^^ Easiest thing to do, would be, to apply some rolling mean -model via the method :py:meth:`saqc.SaQC.roll`. +model via the method :py:meth:`saqc.SaQC.rolling`. .. doctest:: exampleOD >>> import numpy as np - >>> qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D') + >>> qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D') .. plot:: :context: :include-source: False import numpy as np - qc = qc.roll(field='incidents', target='incidents_mean', func=np.mean, window='13D') + qc = qc.rolling(field='incidents', target='incidents_mean', func=np.mean, window='13D') The ``field`` parameter is passed the variable name, we want to calculate the rolling mean of. The ``target`` parameter holds the name, we want to store the results of the calculation to. @@ -174,13 +174,13 @@ under the name ``np.median``. We just calculate another model curve for the ``"i .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D') + >>> qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D') .. plot:: :context: :include-source: False - qc = qc.roll(field='incidents', target='incidents_median', func=np.median, window='13D') + qc = qc.rolling(field='incidents', target='incidents_median', func=np.median, window='13D') We chose another :py:attr:`target` value for the rolling *median* calculation, in order to not override our results from the previous rolling *mean* calculation. @@ -318,18 +318,18 @@ for the point lying in the center of every window, we would define our function z_score = lambda D: abs((D[14] - np.mean(D)) / np.std(D)) -And subsequently, use the :py:meth:`~saqc.SaQC.roll` method to make a rolling window application with the scoring +And subsequently, use the :py:meth:`~saqc.SaQC.rolling` method to make a rolling window application with the scoring function: .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) + >>> qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) .. plot:: :context: close-figs :include-source: False - qc = qc.roll(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) + qc = qc.rolling(field='incidents_residuals', target='incidents_scores', func=z_score, window='27D', min_periods=27) Optimization by Decomposition ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -347,13 +347,13 @@ So the attempt works fine, only because our data set is small and strictly regul Meaning that it has constant temporal distances between subsequent meassurements. In order to tweak our calculations and make them much more stable, it might be useful to decompose the scoring -into seperate calls to the :py:meth:`~saqc.SaQC.roll` function, by calculating the series of the +into seperate calls to the :py:meth:`~saqc.SaQC.rolling` function, by calculating the series of the residuals *mean* and *standard deviation* seperately: .. doctest:: exampleOD - >>> qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) - >>> qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) + >>> qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) + >>> qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) >>> qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", ... func=lambda this, mean, std: (this - mean) / std) @@ -362,15 +362,15 @@ residuals *mean* and *standard deviation* seperately: :context: close-figs :include-source: False - qc = qc.roll(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) - qc = qc.roll(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) + qc = qc.rolling(field='incidents_residuals', target='residuals_mean', window='27D', func=np.mean) + qc = qc.rolling(field='incidents_residuals', target='residuals_std', window='27D', func=np.std) qc = qc.processGeneric(field=['incidents_scores', "residuals_mean", "residuals_std"], target="residuals_norm", func=lambda this, mean, std: (this - mean) / std) With huge datasets, this will be noticably faster, compared to the method presented :ref:`initially <cookbooks/ResidualOutlierDetection:Scores>`\ , because ``saqc`` dispatches the rolling with the basic numpy statistic methods to an optimized pandas built-in. -Also, as a result of the :py:meth:`~saqc.SaQC.roll` assigning its results to the center of every window, +Also, as a result of the :py:meth:`~saqc.SaQC.rolling` assigning its results to the center of every window, all the values are centered and we dont have to care about window center indices when we are generating the *Z*\ -Scores from the two series. diff --git a/docs/funcs/filling.rst b/docs/funcs/filling.rst index d158f8ff1c38a6947b7d8ad46ae32ce222eff569..38882094267fe80ce4a75d0a09a0b755c1e818d9 100644 --- a/docs/funcs/filling.rst +++ b/docs/funcs/filling.rst @@ -11,4 +11,3 @@ Gap filling :nosignatures: ~SaQC.interpolateByRolling - ~SaQC.interpolate diff --git a/docs/funcs/genericWrapper.rst b/docs/funcs/genericWrapper.rst index 5e487b1d7ce8ea1968288bb6c91069231ff4118b..4c87c212d10d9a74859f6755220960555c68a3fb 100644 --- a/docs/funcs/genericWrapper.rst +++ b/docs/funcs/genericWrapper.rst @@ -13,6 +13,6 @@ Generic Functions ~SaQC.processGeneric ~SaQC.flagGeneric - ~SaQC.roll + ~SaQC.rolling ~SaQC.transform ~SaQC.resample diff --git a/docs/funcs/multivariateAnalysis.rst b/docs/funcs/multivariateAnalysis.rst index 38fe8f5043afc352546837e59755dce96be3d74d..159d37619bf93051fe2dfb1218d230d37b30cf69 100644 --- a/docs/funcs/multivariateAnalysis.rst +++ b/docs/funcs/multivariateAnalysis.rst @@ -12,7 +12,6 @@ Multivariate outlier detection. .. autosummary:: :nosignatures: - ~SaQC.flagCrossStatistics ~SaQC.flagLOF ~SaQC.flagZScore diff --git a/docs/funcs/samplingAlignment.rst b/docs/funcs/samplingAlignment.rst index 05b8762495e9fba7f8bac6416d7d50c8936ee75a..660bd1844dec872072465faa3df17d3df96b1931 100644 --- a/docs/funcs/samplingAlignment.rst +++ b/docs/funcs/samplingAlignment.rst @@ -10,10 +10,7 @@ Sampling Alignment .. autosummary:: :nosignatures: - ~SaQC.linear - ~SaQC.shift ~SaQC.align ~SaQC.concatFlags - ~SaQC.interpolateIndex ~SaQC.resample ~SaQC.reindex diff --git a/saqc/core/flags.py b/saqc/core/flags.py index 1009f540d15fa8f1975d18978c4464ae0f79b1c4..e597e0ee184483abf593c815cd375897a29e0fe4 100644 --- a/saqc/core/flags.py +++ b/saqc/core/flags.py @@ -474,24 +474,6 @@ class Flags: # ---------------------------------------------------------------------- # transformation and representation - - def toDios(self) -> DictOfSeries: - """ - Transform the flags container to a ``DictOfSeries``. - - .. deprecated:: 2.4 - use `saqc.DictOfSeries(obj)` instead. - - Returns - ------- - DictOfSeries - """ - warnings.warn( - "toDios is deprecated, use `saqc.DictOfSeries(obj)` instead.", - category=DeprecationWarning, - ) - return DictOfSeries(self).copy() - def toFrame(self) -> pd.DataFrame: """ Transform the flags container to a ``pd.DataFrame``. diff --git a/saqc/funcs/flagtools.py b/saqc/funcs/flagtools.py index 19862377fd47e4e8e11ebd534450bd3b39413717..51b7c5f4751f1c5949461664c2d460401496597a 100644 --- a/saqc/funcs/flagtools.py +++ b/saqc/funcs/flagtools.py @@ -299,7 +299,7 @@ class FlagtoolsMixin: dtype: bool """ warnings.warn( - "`flagManual` is deprecated and will be removed in version 2.9 of saqc. " + "`flagManual` is deprecated and will be removed in version 2.8 of saqc. " "Please use `setFlags` for similar functionality.", DeprecationWarning, ) diff --git a/saqc/funcs/interpolation.py b/saqc/funcs/interpolation.py index 1e7ed3add6b85f234fb3ceed5ef5a8b569a5b373..481c87d725d55acad95ca63b91413453953fc31a 100644 --- a/saqc/funcs/interpolation.py +++ b/saqc/funcs/interpolation.py @@ -34,25 +34,6 @@ if TYPE_CHECKING: from saqc import SaQC -# TODO: remove, when `interpolateIndex` and `interpolateInvalid are removed` -INTERPOLATION_METHODS = Literal[ - "linear", - "time", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "spline", - "barycentric", - "polynomial", - "krogh", - "piecewise_polynomial", - "spline", - "pchip", - "akima", -] - DATA_REINDEXER = {"fshift": "last", "bshift": "first", "nshift": "first"} @@ -176,184 +157,6 @@ class InterpolationMixin: return self - @register( - mask=["field"], - demask=[], - squeeze=[], # func handles history by itself - ) - def interpolate( - self: "SaQC", - field: str, - method: INTERPOLATION_METHODS = "time", - order: int = 2, - limit: int | str | None = None, - extrapolate: Literal["forward", "backward", "both"] | None = None, - flag: float = UNFLAGGED, - **kwargs, - ) -> "SaQC": - """ - Fill NaN and flagged values using an interpolation method. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - method : - Interpolation technique to use. One of: - - * ‘linear’: Ignore the index and treat the values as equally spaced. - * ‘time’: Works on daily and higher resolution data to interpolate given length of interval. - * ‘index’, ‘values’: Use the actual numerical values of the index. - * ‘pad’: Fill in NaNs using existing values. - * ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘spline’, ‘barycentric’, ‘polynomial’: - Passed to scipy.interpolate.interp1d. These methods use the numerical values of the index. - Both ‘polynomial’ and ‘spline’ require that you also specify an order (int), e.g. - ``qc.interpolate(method='polynomial', order=5)``. - * ‘krogh’, ‘spline’, ‘pchip’, ‘akima’, ‘cubicspline’: - Wrappers around the SciPy interpolation methods of similar names. - * ‘from_derivatives’: Refers to scipy.interpolate.BPoly.from_derivatives - - order : - Order of the interpolation method, ignored if not supported - by the chosen ``method`` - - limit : - Maximum number of missing values to interpolate. Only gaps - smaller than ``limit`` will be filled. The gap size can be - given as a number of values (integer) or a temporal extensions - (offset string). With ``None``, all missing values will be - interpolated. - - extrapolate : - Use parameter to perform extrapolation instead of interpolation - onto the trailing and/or leading chunks of NaN values in data series. - - * 'None' (default) - perform interpolation - * 'forward'/'backward' - perform forward/backward extrapolation - * 'both' - perform forward and backward extrapolation - - Examples - -------- - See some examples of the keyword interplay below: - - Lets generate some dummy data: - - .. doctest:: interpolate - - >>> data = pd.DataFrame({'data':np.array([np.nan, 0, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan, 8, 9, np.nan, np.nan])}, index=pd.date_range('2000',freq='1H', periods=13)) - >>> data - data - 2000-01-01 00:00:00 NaN - 2000-01-01 01:00:00 0.0 - 2000-01-01 02:00:00 NaN - 2000-01-01 03:00:00 NaN - 2000-01-01 04:00:00 NaN - 2000-01-01 05:00:00 4.0 - 2000-01-01 06:00:00 5.0 - 2000-01-01 07:00:00 NaN - 2000-01-01 08:00:00 NaN - 2000-01-01 09:00:00 8.0 - 2000-01-01 10:00:00 9.0 - 2000-01-01 11:00:00 NaN - 2000-01-01 12:00:00 NaN - - Use :py:meth:`~saqc.SaQC.interpolate` to do linear interpolation - of up to 2 consecutive missing values: - - .. doctest:: interpolate - - >>> qc = saqc.SaQC(data) - >>> qc = qc.interpolate("data", limit=3, method='time') - >>> qc.data # doctest:+NORMALIZE_WHITESPACE - data | - ======================== | - 2000-01-01 00:00:00 NaN | - 2000-01-01 01:00:00 0.0 | - 2000-01-01 02:00:00 NaN | - 2000-01-01 03:00:00 NaN | - 2000-01-01 04:00:00 NaN | - 2000-01-01 05:00:00 4.0 | - 2000-01-01 06:00:00 5.0 | - 2000-01-01 07:00:00 6.0 | - 2000-01-01 08:00:00 7.0 | - 2000-01-01 09:00:00 8.0 | - 2000-01-01 10:00:00 9.0 | - 2000-01-01 11:00:00 NaN | - 2000-01-01 12:00:00 NaN | - <BLANKLINE> - - - Use :py:meth:`~saqc.SaQC.interpolate` to do linear extrapolaiton - of up to 1 consecutive missing values: - - .. doctest:: interpolate - - >>> qc = saqc.SaQC(data) - >>> qc = qc.interpolate("data", limit=2, method='time', extrapolate='both') - >>> qc.data # doctest:+NORMALIZE_WHITESPACE - data | - ======================== | - 2000-01-01 00:00:00 0.0 | - 2000-01-01 01:00:00 0.0 | - 2000-01-01 02:00:00 NaN | - 2000-01-01 03:00:00 NaN | - 2000-01-01 04:00:00 NaN | - 2000-01-01 05:00:00 4.0 | - 2000-01-01 06:00:00 5.0 | - 2000-01-01 07:00:00 NaN | - 2000-01-01 08:00:00 NaN | - 2000-01-01 09:00:00 8.0 | - 2000-01-01 10:00:00 9.0 | - 2000-01-01 11:00:00 NaN | - 2000-01-01 12:00:00 NaN | - <BLANKLINE> - """ - if limit is not None: - validateWindow(limit, "limit") - - validateValueBounds(order, "order", left=0, strict_int=True) - validateChoice( - extrapolate, "extrapolate", ["forward", "backward", "both", None] - ) - - if "freq" in kwargs: - # the old interpolate version - warnings.warn( - f"The method `interpolate` is deprecated and will be removed " - f"in version 2.7 of saqc. To achieve the same behaviour " - f"please use: `qc.align(field={field}, freq={kwargs['freq']}, " - f"method={method}, order={order}, flag={flag})`", - DeprecationWarning, - ) - return self.align( - field=field, - freq=kwargs.pop("freq", method), - method=method, - order=order, - flag=flag, - **kwargs, - ) - - inter_data = interpolateNANs( - self._data[field], - method, - order=order, - gap_limit=limit, - extrapolate=extrapolate, - ) - - interpolated = self._data[field].isna() & inter_data.notna() - self._data[field] = inter_data - new_col = pd.Series(np.nan, index=self._flags[field].index) - new_col.loc[interpolated] = np.nan if flag is None else flag - - # todo kwargs must have all passed args except data,field,flags - self._flags.history[field].append( - new_col, {"func": "interpolateInvalid", "args": (), "kwargs": kwargs} - ) - return self - @register(mask=["field"], demask=[], squeeze=[]) def align( self: "SaQC", @@ -436,131 +239,6 @@ class InterpolationMixin: ) return self - # ============================================================ - ### Deprecated functions - # ============================================================ - - @register(mask=["field"], demask=[], squeeze=[]) - def interpolateIndex( - self: "SaQC", - field: str, - freq: str, - method: INTERPOLATION_METHODS, - order: int = 2, - limit: int | None = 2, - extrapolate: Literal["forward", "backward", "both"] = None, - **kwargs, - ) -> "SaQC": - """ - Function to interpolate the data at regular (equidistant) - timestamps also known as or grid points. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - freq : - An Offset String, interpreted as the frequency of - the grid you want to interpolate your data to. - - method : - The interpolation method you want to apply. - - order : - If your selected interpolation method can be performed at - different 'orders' - here you pass the desired order. - - limit : - Upper limit of missing index values (with respect to ``freq``) - to fill. The limit can either be expressed as the number of - consecutive missing values (integer) or temporal extension - of the gaps to be filled (Offset String). If ``None`` is passed, - no limit is set. - - extrapolate : - Use parameter to perform extrapolation instead of interpolation - onto the trailing and/or leading chunks of NaN values in data - series. - - * ``None`` (default) - perform interpolation - * ``'forward'``/``'backward'`` - perform forward/backward extrapolation - * ``'both'`` - perform forward and backward extrapolation - """ - call = ( - f'qc.align(field="{field}", freq="{freq}", method="{method}", ' - f'order={order}, extrapolate="{extrapolate}")' - ) - if limit != 2: - call = ( - f'qc.interpolate(field="{field}", method="{method}", ' - f'order="{order}", limit="{limit}", extrapolate="{extrapolate}")' - ) - warnings.warn( - f"The method interpolateIndex is deprectated and will be removed with SaQC==3.0. Use `{call}` instead", - DeprecationWarning, - ) - - # HINT: checking is delegated to called functions - - out = self.align( - field=field, - freq=freq, - method=method, - order=order, - extrapolate=extrapolate, - **kwargs, - ) - if limit != 2: - out = out.interpolate( - field=field, - freq=freq, - method=method, - order=order, - limit=limit, - extrapolate=extrapolate, - **kwargs, - ) - return out - - @register( - mask=["field"], - demask=["field"], - squeeze=[], # func handles history by itself - ) - def interpolateInvalid( - self: "SaQC", - field: str, - method: INTERPOLATION_METHODS, - order: int = 2, - limit: int | None = None, - extrapolate: Literal["forward", "backward", "both"] | None = None, - flag: float = UNFLAGGED, - **kwargs, - ) -> "SaQC": - """ - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.interpolate` instead. - """ - warnings.warn( - "The method `intepolateInvalid` is deprecated and will be removed " - "with version 2.7 of saqc. To achieve the same behavior, please " - f"use `qc.interpolate(field={field}, method={method}, order={order}, " - f"limit={limit}, extrapolate={extrapolate}, flag={flag})`", - DeprecationWarning, - ) - - # HINT: checking is delegated to called function - return self.interpolate( - field=field, - method=method, - order=order, - limit=limit, - extrapolate=extrapolate, - flag=flag, - **kwargs, - ) - def _shift( saqc: "SaQC", diff --git a/saqc/funcs/noise.py b/saqc/funcs/noise.py index 4afc99f338b9705a75a8f5b27f5969d4b1e8d234..7e18add8f6477fb6e57f4f3614331a402ead144d 100644 --- a/saqc/funcs/noise.py +++ b/saqc/funcs/noise.py @@ -54,6 +54,9 @@ class NoiseMixin: 2. all (maybe overlapping) sub-chunks of the data chunks with length ``sub_window``, exceed ``sub_thresh`` with regard to ``func`` + .. deprecated:: 2.5.0 + Deprecated Function. See :py:meth:`~saqc.SaQC.flagByScatterLowpass`. + Parameters ---------- func : diff --git a/saqc/funcs/outliers.py b/saqc/funcs/outliers.py index 0ff5a8c52c21004f90313d42e17bdc4db12c47ad..cc6bfbc08a05a307d2efe9c3d70e445dd379adec 100644 --- a/saqc/funcs/outliers.py +++ b/saqc/funcs/outliers.py @@ -579,6 +579,9 @@ class OutliersMixin: hydrological data. See the notes section for an overview over the algorithms basic steps. + .. deprecated:: 2.6.0 + Deprecated Function. Please refer to :py:meth:`~saqc.SaQC.flagByStray`. + Parameters ---------- trafo : @@ -725,9 +728,9 @@ class OutliersMixin: warnings.warn( """ - FlagMVScores is deprecated and will be removed with Version 2.8. - To replicate the function, transform the different fields involved - via explicit applications of some transformations, than calculate the + flagMVScores is deprecated and will be removed with Version 2.8. + To replicate the function, transform the different fields involved + via explicit applications of some transformations, than calculate the kNN scores via `saqc.SaQC.assignkNScores` and finally assign the STRAY algorithm via `saqc.SaQC.flagByStray`. """, @@ -861,10 +864,10 @@ class OutliersMixin: warnings.warn( "The function flagRaise is deprecated with no 100% exact replacement function." - "When looking for changes in the value course, the use of flagraise can be replicated and more easily aimed " - "for, via the method flagJump.\n" + "When looking for changes in the value course, the use of flagRaise can be replicated and more " + "easily aimed for, via the method flagJump.\n" "When looking for raises to outliers or plateaus, use one of: " - "flagZScore(outliers), flagUniLOF (outliers and small plateaus) or flagOffset(Plateaus)", + "flagZScore (outliers), flagUniLOF (outliers and small plateaus) or flagOffset (plateaus)", DeprecationWarning, ) @@ -971,6 +974,10 @@ class OutliersMixin: See references [1] for more details on the algorithm. + .. deprecated:: 2.6.0 + Deprecated Function. Please refer to :py:meth:`~saqc.SaQC.flagZScore`. + + Note ---- Data needs to be sampled at a regular equidistant time grid. @@ -1262,8 +1269,8 @@ class OutliersMixin: """ warnings.warn( - "The function flagGrubbs is deprecated due to its inferior performance, with no 100% exact replacement function." - "When looking for outliers use one of: " + "The function flagByGrubbs is deprecated due to its inferior performance, with " + "no 100% exact replacement function. When looking for outliers use one of: " "flagZScore, flagUniLOF", DeprecationWarning, ) @@ -1325,85 +1332,6 @@ class OutliersMixin: self._flags[to_flag, field] = flag return self - @register( - mask=["field"], - demask=["field"], - squeeze=["field"], - multivariate=True, - handles_target=False, - docstring={"field": DOC_TEMPLATES["field"]}, - ) - def flagCrossStatistics( - self: "SaQC", - field: Sequence[str], - thresh: float, - method: Literal["modZscore", "Zscore"] = "modZscore", - flag: float = BAD, - **kwargs, - ) -> "SaQC": - """ - Function checks for outliers relatively to the "horizontal" input data axis. - - Notes - ----- - The input variables dont necessarily have to be aligned. If the variables are unaligned, scoring - and flagging will only be performed on the subset of indices shared among all input variables. - - For :py:attr:`field` :math:`=[f_1,f_2,...,f_N]` and timestamps :math:`[t_1,t_2,...,t_K]`, - the following steps are taken for outlier detection: - - 1. All timestamps :math:`t_i`, where there is one :math:`f_k`, with :math:`data[f_K]` having no - entry at :math:`t_i`, are excluded from the following process (inner join of the :math:`f_i` fields.) - 2. for every :math:`0 <= i <= K`, the value - :math:`m_j = median(\\{data[f_1][t_i], data[f_2][t_i], ..., data[f_N][t_i]\\})` is calculated - 3. for every :math:`0 <= i <= K`, the set - :math:`\\{data[f_1][t_i] - m_j, data[f_2][t_i] - m_j, ..., data[f_N][t_i] - m_j\\}` is tested for - outliers with the specified algorithm (:py:attr:`method` parameter). - - Parameters - ---------- - thresh : - Threshold which the outlier score of an value must exceed, for being flagged an outlier. - - method : - Method used for calculating the outlier scores. - - * ``'modZscore'``: Median based "sigma"-ish approach. See References [1]. - * ``'Zscore'``: Score values by how many times the standard deviation they differ from the - median. See References [1]. - - - References - ---------- - [1] https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm - """ - new_method_string = { - "modZscore": "modified", - "Zscore": "standard", - np.mean: "standard", - np.median: "modified", - } - call = ( - f"qc.flagZScore(field={field}, window=1, " - f"method={new_method_string[method]}, " - f"thresh={thresh}, axis=1)" - ) - warnings.warn( - f"The method `flagCrossStatistics` is deprecated and will " - f"be removed in verion 2.7 of saqc. To achieve the same behavior " - f"use:`{call}`", - DeprecationWarning, - ) - - return self.flagZScore( - field=field, - window=1, - method=new_method_string[method], - thresh=thresh, - axis=1, - flag=flag, - ) - @register( mask=["field"], demask=["field"], diff --git a/saqc/funcs/resampling.py b/saqc/funcs/resampling.py index cb2840f767dc42e8d8e274efacf6913b840a861e..168d16fc0f41b803c281974e2b9d26b38185cb2d 100644 --- a/saqc/funcs/resampling.py +++ b/saqc/funcs/resampling.py @@ -321,90 +321,6 @@ class ResamplingMixin: ) return idx, idx_source, datcol - @register(mask=["field"], demask=[], squeeze=[]) - def linear( - self: "SaQC", - field: str, - freq: str, - **kwargs, - ) -> "SaQC": - """ - A method to "regularize" data by interpolating linearly the data - at regular timestamp. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` with ``method="linear"`` - instead. - - A series of data is considered "regular", if it is sampled regularly - (= having uniform sampling rate). Interpolated values will get - assigned the worst flag within freq-range. Note, that the data - only gets interpolated at those (regular) timestamps, that have - a valid (existing and not-na) datapoint preceeding them and one - succeeding them within freq range. Regular timestamp that do - not suffice this condition get nan assigned AND The associated - flag will be of value ``UNFLAGGED``. - - Parameters - ---------- - freq : - An offset string. The frequency of the grid you want to interpolate - your data at. - """ - warnings.warn( - f""" - The method `shift` is deprecated and will be removed with version 2.6 of saqc. - To achieve the same behavior please use: - `qc.align(field={field}, freq={freq}. method="linear")` - """, - DeprecationWarning, - ) - reserved = ["method", "order", "limit", "downgrade"] - kwargs = filterKwargs(kwargs, reserved) - return self.interpolateIndex(field, freq, "time", **kwargs) - - @register(mask=["field"], demask=[], squeeze=[]) - def shift( - self: "SaQC", - field: str, - freq: str, - method: Literal["fshift", "bshift", "nshift"] = "nshift", - **kwargs, - ) -> "SaQC": - """ - Shift data points and flags to a regular frequency grid. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.align` instead. - - Parameters - ---------- - freq : - Offset string. Sampling rate of the target frequency. - - method : - Method to propagate values: - - * 'nshift' : shift grid points to the nearest time stamp in the range = +/- 0.5 * ``freq`` - * 'bshift' : shift grid points to the first succeeding time stamp (if any) - * 'fshift' : shift grid points to the last preceeding time stamp (if any) - """ - warnings.warn( - f""" - The method `shift` is deprecated and will be removed with version 2.6 of saqc. - To achieve the same behavior please use: `qc.align(field={field}, freq={freq}. method={method})` - """, - DeprecationWarning, - ) - validateChoice(method, "method", ["fshift", "bshift", "nshift"]) - return self.reindex( - field=field, - index=freq, - method=method, - data_aggregation=DATA_REINDEXER[method], - **kwargs, - ) - @register(mask=["field"], demask=[], squeeze=[]) def resample( self: "SaQC", @@ -984,7 +900,7 @@ class ResamplingMixin: if method.split("_")[0] == "inverse": warnings.warn( f""" Referring to a method that would invert a method 'A` via 'inverse_A' is deprecated and will - be removed in a future release. Please use method={method.split('_')[-1]} together + be removed in version 2.7. Please use method={method.split('_')[-1]} together with invert=True. """, DeprecationWarning, @@ -995,7 +911,7 @@ class ResamplingMixin: if method == "match": warnings.warn( f"The method 'match' is deprecated and will be removed " - f"in version 2.8 of SaQC. Please use `SaQC.transferFlags(field={field}, " + f"in version 2.7 of SaQC. Please use `SaQC.transferFlags(field={field}, " f"target={target}, squeeze={squeeze}, overwrite={override})` instead", DeprecationWarning, ) diff --git a/saqc/funcs/rolling.py b/saqc/funcs/rolling.py index 6e74a69a823bc97f428defeadc417f7b3c0a6ab3..2a1b95ef203131aa564c59575f0bb413bf4c8fc2 100644 --- a/saqc/funcs/rolling.py +++ b/saqc/funcs/rolling.py @@ -72,68 +72,6 @@ class RollingMixin: ) return self - @register(mask=["field"], demask=[], squeeze=[]) - def roll( - self: "SaQC", - field: str, - window: Union[str, int], - func: Callable[[pd.Series], np.ndarray] = np.mean, - min_periods: int = 0, - center: bool = True, - **kwargs, - ) -> "SaQC": - """ - Calculate a rolling-window function on the data. - - .. deprecated:: 2.4.0 - Use :py:meth:`~saqc.SaQC.rolling` instead. - - Note, that the data gets assigned the worst flag present in the original data. - - Parameters - ---------- - window : - The size of the window you want to roll with. If an integer is passed, the size - refers to the number of periods for every fitting window. If an offset string - is passed, the size refers to the total temporal extension. For regularly - sampled timeseries, the period number will be casted down to an odd number if - ``center=True``. - - func : default mean - Function to roll with. - - min_periods : - The minimum number of periods to get a valid value - - center : - If True, center the rolling window. - """ - import warnings - - warnings.warn( - """The function `roll` was renamed to `rolling` and will be removed with version 3.0 of saqc - Please use `SaQC.rolling` with the same arguments, instead - """, - DeprecationWarning, - ) - - validateFuncSelection(func, allow_operator_str=True) - validateWindow(window) - validateMinPeriods(min_periods) - - # HINT: checking in _roll - self._data, self._flags = _roll( - data=self._data, - field=field, - flags=self._flags, - window=window, - func=func, - min_periods=min_periods, - center=center, - **kwargs, - ) - return self - def _roll( data: DictOfSeries, diff --git a/saqc/funcs/tools.py b/saqc/funcs/tools.py index 54cff17af101e8b99eec64c232a16b3107e9441e..7348b9b78585b465388941a5f9b8edd1f320d168 100644 --- a/saqc/funcs/tools.py +++ b/saqc/funcs/tools.py @@ -475,36 +475,6 @@ class ToolsMixin: * Check/modify the module parameter `saqc.lib.plotting.SCATTER_KWARGS` to see/modify global marker defaults * Check/modify the module parameter `saqc.lib.plotting.PLOT_KWARGS` to see/modify global plot line defaults """ - if history == "complete": - warnings.warn( - "Plotting with history='complete' is deprecated and will be removed in a future release (2.5)." - "To get access to an saqc variables complete flagging history and analyze or plot it in detail, use flags" - "history acces via `qc._flags.history[variable_name].hist` and a plotting library, such as pyplot.\n" - "Minimal Pseudo example, having a saqc.SaQC instance `qc`, holding a variable `'data1'`, " - "and having matplotlib.pyplot imported as `plt`:\n\n" - "plt.plot(data)\n" - "for f in qc._flags.history['data1'].hist \n" - " markers = qc._flags.history['data1'].hist[f] > level \n" - " markers=data[markers] \n" - " plt.scatter(markers.index, markers.values) \n", - DeprecationWarning, - ) - - if "phaseplot" in kwargs: - warnings.warn( - 'Parameter "phaseplot" is deprecated and will be removed in a future release (2.5). Assign to parameter "mode" instead. (plot(field, mode=phaseplot))', - DeprecationWarning, - ) - mode = kwargs["phaseplot"] - - if "cycleskip" in (ax_kwargs or {}): - warnings.warn( - 'Passing "cycleskip" option with the "ax_kwargs" parameter is deprecated and will be removed in a future release (2.5). ' - 'The option now has to be passed with the "marker_kwargs" parameter', - DeprecationWarning, - ) - marker_kwargs["cycleskip"] = ax_kwargs.pop("cycleskip") - data, flags = self._data.copy(), self._flags.copy() level = kwargs.get("flag", UNFLAGGED) diff --git a/tests/core/test_flags.py b/tests/core/test_flags.py index c9628d49cd7f333e9fc7527dd05db48ddaf0cffc..9e67651880f5981349d70444049b0164085a1e77 100644 --- a/tests/core/test_flags.py +++ b/tests/core/test_flags.py @@ -295,15 +295,6 @@ def _validate_flags_equals_frame(flags, df): assert df[c].equals(flags[c]) # respects nan's -@pytest.mark.parametrize("data", testdata) -def test_to_dios(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): - flags = Flags(data) - with pytest.deprecated_call(): - result = flags.toDios() - assert isinstance(result, DictOfSeries) - _validate_flags_equals_frame(flags, result) - - @pytest.mark.parametrize("data", testdata) def test_toFrame(data: Union[pd.DataFrame, DictOfSeries, Dict[str, pd.Series]]): flags = Flags(data) diff --git a/tests/funcs/test_proc_functions.py b/tests/funcs/test_proc_functions.py index f905076975929e173315fdc7616c6ee6c79d7f61..c47d974daf1b347d702be183039dfca1cc444cf4 100644 --- a/tests/funcs/test_proc_functions.py +++ b/tests/funcs/test_proc_functions.py @@ -45,30 +45,6 @@ def test_rollingInterpolateMissing(course_5): assert qc.data[field][characteristics["missing"]].isna().all() -def test_interpolate(course_5): - data, characteristics = course_5(periods=10, nan_slice=[5]) - field = data.columns[0] - data = DictOfSeries(data) - flags = initFlagsLike(data) - qc = SaQC(data, flags) - - qc_lin = qc.interpolate(field, method="linear") - qc_poly = qc.interpolate(field, method="polynomial") - assert qc_lin.data[field][characteristics["missing"]].notna().all() - assert qc_poly.data[field][characteristics["missing"]].notna().all() - - data, characteristics = course_5(periods=10, nan_slice=[5, 6, 7]) - - qc = SaQC(data, flags) - qc_lin_1 = qc.interpolate(field, method="linear", limit=2) - qc_lin_2 = qc.interpolate(field, method="linear", limit=3) - qc_lin_3 = qc.interpolate(field, method="linear", limit=4) - - assert qc_lin_1.data[field][characteristics["missing"]].isna().all() - assert qc_lin_2.data[field][characteristics["missing"]].isna().all() - assert qc_lin_3.data[field][characteristics["missing"]].notna().all() - - def test_transform(course_5): data, characteristics = course_5(periods=10, nan_slice=[5, 6]) field = data.columns[0] diff --git a/tests/funcs/test_tools.py b/tests/funcs/test_tools.py index 231ab5faef59e1b9aa1111bd8bce4803acc3430d..78340fe0c3055be2b2da8d30ea989947945e3b31 100644 --- a/tests/funcs/test_tools.py +++ b/tests/funcs/test_tools.py @@ -35,8 +35,6 @@ def test_makeFig(tmp_path): d_saqc = d_saqc.plot( field="data", path=outfile, history="valid", yscope=[(-50, 1000)] ) - with pytest.deprecated_call(): - d_saqc = d_saqc.plot(field="data", path=outfile, history="complete") d_saqc = d_saqc.plot( field="data",