Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • berntm/saqc
  • rdm-software/saqc
  • schueler/saqc
3 results
Show changes
Commits on Source (84)
Showing
with 199 additions and 235 deletions
......@@ -5,3 +5,4 @@
*.feather filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
resources/machine_learning/data/soil_moisture_mwe.feather filter=lfs diff=lfs merge=lfs -text
saqc/_version.py export-subst
......@@ -29,7 +29,7 @@ jobs:
fail-fast: false
matrix:
os: ["windows-latest", "ubuntu-latest", "macos-latest"]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]
defaults:
run:
# somehow this also works for windows O.o ??
......
......@@ -2,6 +2,20 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
# ===========================================================
# Hints
# ===========================================================
# $PYPI_PKG_NAME
# The variable PYPI_PKG_NAME is used in setup.py to determine
# how to name the tarball package. If not set the package is
# named 'saqc'.
# $TESTPYPI_TOKEN
# The upload token used for testpypi, set it on the gitlab
# page and enable masking to prevent revealing
# ===========================================================
# preparation
# ===========================================================
......@@ -75,9 +89,9 @@ coverage:
path: coverage.xml
python38:
python39:
stage: test
image: python:3.8
image: python:3.9
script:
- pytest tests -Werror --junitxml=report.xml
- python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
......@@ -87,9 +101,9 @@ python38:
junit: report.xml
python39:
python310:
stage: test
image: python:3.9
image: python:3.10
script:
- pytest tests -Werror --junitxml=report.xml
- python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
......@@ -98,10 +112,9 @@ python39:
reports:
junit: report.xml
python310:
python311:
stage: test
image: python:3.10
image: python:3.11
script:
- pytest tests -Werror --junitxml=report.xml
- python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
......@@ -110,17 +123,6 @@ python310:
reports:
junit: report.xml
# python311:
# stage: test
# image: python:3.11
# script:
# - pytest tests -Werror --junitxml=report.xml
# - python -m saqc --config docs/resources/data/config.csv --data docs/resources/data/data.csv --outfile /tmp/test.csv
# artifacts:
# when: always
# reports:
# junit: report.xml
doctest:
stage: test
script:
......@@ -135,50 +137,61 @@ doctest:
# ===========================================================
# check if we are able to build a wheel
# and if the import works
wheel38:
stage: build
image: python:3.8
script:
- pip install wheel
- pip wheel .
- pip install .
- python -c 'import saqc; print(f"{saqc.__version__=}")'
wheel39:
stage: build
image: python:3.9
variables:
PYPI_PKG_NAME: "saqc-dev"
script:
- pip install wheel
- pip wheel .
- pip install .
- python -c 'import saqc; print(f"{saqc.__version__=}")'
wheel310:
stage: build
image: python:3.10
variables:
PYPI_PKG_NAME: "saqc-dev"
script:
- pip install wheel
- pip wheel .
- pip install .
- python -c 'import saqc; print(f"{saqc.__version__=}")'
wheel311:
stage: build
image: python:3.11
variables:
PYPI_PKG_NAME: "saqc-dev"
script:
- pip install wheel
- pip wheel .
- pip install .
- python -c 'import saqc; print(f"{saqc.__version__=}")'
docs:
stage: build
script:
- cd docs
- pip install -r requirements.txt
- make doc
# ===========================================================
# Extra Pipeline (run with a successful run of all other jobs on develop)
# ===========================================================
upload_testpypi:
stage: deploy
only:
- develop
except:
- schedules
variables:
PYPI_PKG_NAME: "saqc-dev"
TWINE_USERNAME: __token__
TWINE_PASSWORD: $TESTPYPI_TOKEN
script:
- pip install build twine
- python -m build
- twine check --strict dist/*
- twine upload -r testpypi dist/*
# make html docu with sphinx
pages:
stage: deploy
......
......@@ -5,19 +5,48 @@ SPDX-License-Identifier: GPL-3.0-or-later
-->
# Changelog
## Unreleased
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.5.0...develop)
### Added
### Changed
- pin pandas to versions >= 2.0
### Removed
- removed deprecated `DictOfSeries.to_df`
### Fixed
## [2.5.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.1...v2.5.0)
### Added
- `SaQC.plot`:
- enable multivariate plots
- keyword `plot_kwargs` to pass matplotlib related arguments
- CLI:
- `--version` to print the SaQC version
- `-ll` as a shorthand for `--log-level`
- `--json-field` to use a non-root element of a json file.
- basic json support for CLI config files, which are detected by `.json`-extension.
- `SaQC.flagScatterLowpass`: option to select function based on string names.
- Checks and unified error message for common function inputs.
### Changed
- Require pandas >= 2.0
- `SaQC.flagUniLOF` and `SaQC.assignUniLOF`: changed parameter `fill_na` to type `bool`.
- `SaQC.plot`:
- changed default color for single variables to `black` with `80% transparency`
- added seperate legend for flags
### Removed
- `SaQC.plot`: option to plot with complete history (`history="complete"`)
- Support for Python 3.8
### Fixed
- `SaQC.assignChangePointCluster` and `SaQC.flagChangePoints`: A tuple passed `min_period`
was only recognised if `window` was also a tuple.
- `SaQC.propagateFlags` was overwriting existing flags
### Deprecated
- `SaQC.andGroup` and `SaQC.orGroup`: option to pass dictionaries to `group`.
- `SaQC.plot`:
- `phaseplot` in favor of usage with `mode="biplot"`
- `cyclestart` in favor of usage with `marker_kwargs`
- `SaQC.flagStatLowPass` in favor of `SaQC.flagScatterLowpass`
## [2.4.1](https://git.ufz.de/rdm-software/saqc/-/tags/v2.4.1) - 2023-06-22
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...develop)
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.4.0...v.2.4.1)
### Added
### Changed
- pin pandas to versions >= 2.0
......@@ -44,14 +73,13 @@ SPDX-License-Identifier: GPL-3.0-or-later
- `func` arguments in text configurations were not parsed correctly
- fail on duplicated arguments to test methods
- `reample` was not writing meta entries
- `flagByStatLowPass` was overwriting existing flags
- `flagByScatterLowpass` was overwriting existing flags
- `flagUniLOF` and `flagLOF` were overwriting existing flags
### Deprecated
- Deprecate `flagMVScore` parameters: `partition` in favor of `window`, `partition_min` in favor of `min_periods`, `min_periods` in favor of `min_periods_r`
- Deprecate `interpolate`, `linear` and `shift` in favor of `align`
- Deprecate `roll` in favor of `rolling`
- Deprecate `DictOfSeries.to_df` in favor of `DictOfSeries.to_pandas`
## [2.3.0](https://git.ufz.de/rdm-software/saqc/-/tags/v2.3.0) - 2023-01-17
[List of commits](https://git.ufz.de/rdm-software/saqc/-/compare/v2.2.1...v2.3.0)
### Added
......
......@@ -59,7 +59,7 @@ It is not a shame to name a parameter just `n` or `alpha` etc., if, for example,
### Test Functions
- testnames: [testmodule_]flagTestName
- testnames: flagTestName
## Formatting
We use [black](https://black.readthedocs.io/en/stable/) in its default settings.
......@@ -70,13 +70,17 @@ Only absolute imports are accepted.
# Development Workflow
## Releases
Every release is planned by an associated Milestone. This milestone should have a end date, usually, the first of the month the next release is planned and contain all issue/merge requests to include.
## Repository Structure
- `master` - branch:
- `main` - branch:
+ Stable and usually protected.
+ Regular merges from `develop`, these merges are tagged and increasing at least the minor version.
+ Irregular merges from `develop` in case of critical bugs. Such merges increase at least the patch level.
+ Merges into `master` usually lead to a PyPI release.
+ Merges into `main` usually lead to a PyPI release.
- `develop` - branch:
+ The main development branch, no hard stability requirements/guarantees.
+ Merges into `develop` should mostly follow a [Merge Request Workflow](#merge-request-workflow), minor changes can however be committed directly. Such minor changes include:
......@@ -105,6 +109,6 @@ Only absolute imports are accepted.
release date. Commits to `develop` after the merge window of a release closes need to be integrated during the subsequent release
cycle
- The release cycle is organized by Gitlab Milestones, the expiration date of a certain milestone indicates the end of the
related merge window, the actual merge into `master` and the accompanying release is scheduled for the week after the
related merge window, the actual merge into `main` and the accompanying release is scheduled for the week after the
milestones expiration date.
- Issues and Merge Requests can and should be associated to these milestone as this help in the organization of review activities.
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.
In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>
......@@ -14,30 +14,39 @@ SPDX-License-Identifier: GPL-3.0-or-later
# SaQC: System for automated Quality Control
Anomalies and errors are the rule not the exception when working with
time series data. This is especially true, if such data originates
from in-situ measurements of environmental properties.
Almost all applications, however, implicily rely on data, that complies
with some definition of 'correct'.
In order to infer reliable data products and tools, there is no alternative
to quality control. SaQC provides all the building blocks to comfortably
bridge the gap between 'usually faulty' and 'expected to be corrected' in
a accessible, consistent, objective and reproducible way.
`SaQC` is a tool/framework/application to quality control time series data.
It provides
a growing collection of algorithms and methods to analyze, annotate and
process timeseries data. It supports the end to end enrichment of metadata
and provides various user interfaces: 1) a Python API, 2) a command line interface
with a text based configuration system and a
[web based user interface](https://webapp.ufz.de/saqc-config-app/)
`SaQC` is designed with a particular focus on the needs of active data professionals,
including sensor hardware-oriented engineers, domain experts, and data scientists,
all of whom can benefit from its capabilities to improve the quality standards of given data products.
For a (continously improving) overview of features, typical usage patterns,
the specific system components and how to customize `SaQC` to your specific
the specific system components and how to customize `SaQC` to your own
needs, please refer to our
[online documentation](https://rdm-software.pages.ufz.de/saqc/index.html).
## Installation
SaQC is available on the Python Package Index ([PyPI](https://pypi.org/)) and
`SaQC` is available on the Python Package Index ([PyPI](https://pypi.org/)) and
can be installed using [pip](https://pip.pypa.io/en/stable/):
```sh
python -m pip install saqc
```
For a more detailed installion guide, see the [installation guide](https://rdm-software.pages.ufz.de/saqc/gettingstarted/InstallationGuide.html).
Additionally `SaQC` is available via conda and can be installed with:
```sh
conda create -c conda-forge -n saqc saqc
```
For more details, see the [installation guide](https://rdm-software.pages.ufz.de/saqc/gettingstarted/InstallationGuide.html).
## Usage
......@@ -57,7 +66,7 @@ SM2 ; shift(freq="15Min")
'SM(1|2)+' ; flagMissing()
SM1 ; flagRange(min=10, max=60)
SM2 ; flagRange(min=10, max=40)
SM2 ; flagMAD(window="30d", z=3.5)
SM2 ; flagZScore(window="30d", thresh=3.5, method='modified', center=False)
Dummy ; flagGeneric(field=["SM1", "SM2"], func=(isflagged(x) | isflagged(y)))
```
......@@ -92,30 +101,27 @@ data = pd.read_csv(
index_col=0, parse_dates=True,
)
saqc = SaQC(data=data)
saqc = (saqc
.shift("SM2", freq="15Min")
.flagMissing("SM(1|2)+", regex=True)
.flagRange("SM1", min=10, max=60)
.flagRange("SM2", min=10, max=40)
.flagMAD("SM2", window="30d", z=3.5)
.flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y))))
qc = SaQC(data=data)
qc = (qc
.shift("SM2", freq="15Min")
.flagMissing("SM(1|2)+", regex=True)
.flagRange("SM1", min=10, max=60)
.flagRange("SM2", min=10, max=40)
.flagZScore("SM2", window="30d", thresh=3.5, method='modified', center=False)
.flagGeneric(field=["SM1", "SM2"], target="Dummy", func=lambda x, y: (isflagged(x) | isflagged(y))))
```
A more detailed description of the Python API is available in the
A more detailed description of the Python API is available in the
[respective section](https://rdm-software.pages.ufz.de/saqc/gettingstarted/TutorialAPI.html)
of the documentation.
## Changelog
All notable changes to this project will be documented in [CHANGELOG.md](CHANGELOG.md).
## Get involved
### Contributing
You found a bug or you want to suggest some cool features? Please refer to our [contributing guidelines](CONTRIBUTING.md) to see how you can contribute to SaQC.
You found a bug or you want to suggest new features? Please refer to our [contributing guidelines](CONTRIBUTING.md) to see how you can contribute to SaQC.
### User support
If you need help or have a question, you can use the SaQC user support mailing list: [saqc-support@ufz.de](mailto:saqc-support@ufz.de)
If you need help or have questions, send us an email to [saqc-support@ufz.de](mailto:saqc-support@ufz.de)
## Copyright and License
Copyright(c) 2021, [Helmholtz-Zentrum für Umweltforschung GmbH -- UFZ](https://www.ufz.de). All rights reserved.
......@@ -125,17 +131,18 @@ Copyright(c) 2021, [Helmholtz-Zentrum für Umweltforschung GmbH -- UFZ](https://
For full details, see [LICENSE](LICENSE.md).
## Acknowledgements
...
## Publications
coming soon...
> Lennart Schmidt, David Schäfer, Juliane Geller, Peter Lünenschloss, Bert Palm, Karsten Rinke, Corinna Rebmann, Michael Rode, Jan Bumberger, System for automated Quality Control (SaQC) to enable traceable and reproducible data streams in environmental science, Environmental Modelling & Software, 2023, 105809, ISSN 1364-8152, https://doi.org/10.1016/j.envsoft.2023.105809. (https://www.sciencedirect.com/science/article/pii/S1364815223001950)
## How to cite SaQC
If SaQC is advancing your research, please cite as:
> Schäfer, David, Palm, Bert, Lünenschloß, Peter, Schmidt, Lennart, & Bumberger, Jan. (2023). System for automated Quality Control - SaQC (2.3.0). Zenodo. https://doi.org/10.5281/zenodo.5888547
or
> Lennart Schmidt, David Schäfer, Juliane Geller, Peter Lünenschloss, Bert Palm, Karsten Rinke, Corinna Rebmann, Michael Rode, Jan Bumberger, System for automated Quality Control (SaQC) to enable traceable and reproducible data streams in environmental science, Environmental Modelling & Software, 2023, 105809, ISSN 1364-8152, https://doi.org/10.1016/j.envsoft.2023.105809. (https://www.sciencedirect.com/science/article/pii/S1364815223001950)
-----------------
<a href="https://www.ufz.de/index.php?en=33573">
......
......@@ -24,11 +24,14 @@ package_path = os.path.abspath("..")
os.environ["PYTHONPATH"] = ":".join((package_path, os.environ.get("PYTHONPATH", "")))
# ---------- Version string --------------------------------------------------
# read the version string without importing it
vdict = {}
with open("../saqc/version.py") as f:
exec(f.read(), vdict)
version = vdict["__version__"]
# TODO: what for we need `version` and the `release` variables for ?
# import saqc for versioning, but prevent plots to pop up
# by setting mpl backend to non-interactive
import saqc.funcs
version = saqc.__version__
saqc.funcs.tools._MPL_DEFAULT_BACKEND = "Agg"
# -- Customize logging -------------------------------------------------------
......
......@@ -100,18 +100,22 @@ Example Data Import
import pandas as pd
data = pd.read_csv('../resources/data/tempSensorGroup.csv', index_col=0)
data.index = pd.DatetimeIndex(data.index)
variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
qc = saqc.SaQC(data)
We load the example `data set <https://git.ufz.de/rdm-software/saqc/-/blob/develop/docs/resources/data/tempsenorGroup.csv>`_
from the *saqc* repository using the `pandas <https://pandas.pydata.org/>`_ csv
file reader. Subsequently, we cast the index of the imported data to `DatetimeIndex`
and use the dataframe's `plot` method, to inspect the imported data:
instantiate a saqc object and plot the data:
.. doctest:: flagDriftFromNorm
>>> import saqc
>>> data = pd.read_csv('./resources/data/tempSensorGroup.csv', index_col=0)
>>> data.index = pd.DatetimeIndex(data.index)
>>> data.plot() # doctest: +SKIP
>>> variables = ['temp1 [degC]', 'temp2 [degC]', 'temp3 [degC]', 'temp4 [degC]', 'temp5 [degC]']
>>> qc = saqc.SaQC(data)
>>> qc.plot(variables) # doctest: +SKIP
.. plot::
......@@ -119,22 +123,13 @@ and use the dataframe's `plot` method, to inspect the imported data:
:include-source: False
:class: center
data.plot()
qc.plot(variables)
Example Algorithm Application
-----------------------------
Looking at our example data set more closely, we see that 2 of the 5 variables start to drift away.
.. plot::
:context: close-figs
:include-source: False
:class: center
:caption: 2 variables start departing the majority group of variables (the group containing more than ``frac`` variables) around july.
data['2017-05':'2017-11'].plot()
Looking at the example data set more closely, we see that 2 of the 5 variables start to drift away.
.. plot::
:context: close-figs
......@@ -142,17 +137,9 @@ Looking at our example data set more closely, we see that 2 of the 5 variables s
:class: center
:caption: 2 variables are departed from the majority group of variables (the group containing more than ``frac`` variables) by the end of the year.
data['2017-09':'2018-01'].plot()
Lets try to detect those drifts via saqc. There for we import the *saqc* package and instantiate a :py:class:`saqc.SaQC`
object with the data:
.. doctest:: flagDriftFromNorm
>>> import saqc
>>> qc = saqc.SaQC(data)
qc.plot(variables, xscope=slice('2017-05', '2017-11'))
The changes we observe in the data seem to develop significantly only in temporal spans over a month,
Lets try to detect those drifts via saqc. The changes we observe in the data seem to develop significantly only in temporal spans over a month,
so we go for ``"1M"`` as value for the
``window`` parameter. We identified the majority group as a group containing three variables, whereby two variables
seem to be scattered away, so that we can leave the ``frac`` value at its default ``.5`` level.
......@@ -179,55 +166,12 @@ Lets check the results:
.. doctest:: flagDriftFromNorm
>>> qc.plot('temp1 [degC]') # doctest: +SKIP
>>> qc.plot(variables, marker_kwargs={'alpha':.3, 's': 1, 'color': 'red', 'edgecolor': 'face'}) # doctest: +SKIP
.. plot::
:context: close-figs
:include-source: False
:class: center
qc.plot('temp1 [degC]')
.. doctest:: flagDriftFromNorm
>>> qc.plot('temp2 [degC]') # doctest: +SKIP
.. plot::
:context: close-figs
:include-source: False
:class: center
qc.plot('temp2 [degC]')
.. doctest:: flagDriftFromNorm
>>> qc.plot('temp3 [degC]') # doctest: +SKIP
.. plot::
:context: close-figs
:include-source: False
:class: center
qc.plot('temp3 [degC]')
.. doctest:: flagDriftFromNorm
>>> qc.plot('temp4 [degC]') # doctest: +SKIP
.. plot::
:context: close-figs
:include-source: False
:class: center
qc.plot('temp4 [degC]')
.. doctest:: flagDriftFromNorm
>>> qc.plot('temp5 [degC]') # doctest: +SKIP
.. plot::
:context: close-figs
:include-source: False
:class: center
qc.plot(variables, marker_kwargs={'alpha':.3, 's': 1, 'color': 'red', 'edgecolor': 'face'})
qc.plot('temp5 [degC]')
\ No newline at end of file
......@@ -246,17 +246,14 @@ Check out the results for the year *2016*
.. doctest:: exampleMV
>>> plt.plot(qc.data['sac254_raw']['2016'], alpha=.5, color='black', label='original') # doctest:+SKIP
>>> plt.plot(qc.data['sac254_corrected']['2016'], color='black', label='corrected') # doctest:+SKIP
>>> qc.plot(['sac254_raw','sac254_corrected'], xscope='2016', plot_kwargs={'color':['black', 'black'], 'alpha':[.5, 1], 'label':['original', 'corrrected']}) # doctest:+SKIP
.. plot::
:context:
:include-source: False
plt.figure(figsize=(16,9))
plt.plot(qc.data['sac254_raw']['2016'], alpha=.5, color='black', label='original')
plt.plot(qc.data['sac254_corrected']['2016'], color='black', label='corrected')
plt.legend()
>>> qc.plot(['sac254_raw','sac254_corrected'], xscope='2016', plot_kwargs={'color':['black', 'black'], 'alpha':[.5, 1], 'label':['original', 'corrrected']})
Multivariate Flagging Procedure
-------------------------------
......@@ -345,7 +342,7 @@ correlated with relatively high *kNNscores*, we could try to calculate a thresho
`STRAY <https://arxiv.org/pdf/1908.04000.pdf>`_ algorithm, which is available as the method:
:py:meth:`~saqc.SaQC.flagByStray`. This method will mark some samples of the `kNNscore` variable as anomaly.
Subsequently we project this marks (or *flags*) on to the *sac* variable with a call to
:py:meth:`~saqc.SaQC.concatFlags`. For the sake of demonstration, we also project the flags
:py:meth:`~saqc.SaQC.transferFlags`. For the sake of demonstration, we also project the flags
on the normalized *sac* and plot the flagged values in the *sac254_norm* - *level_norm* feature space.
......@@ -353,8 +350,8 @@ on the normalized *sac* and plot the flagged values in the *sac254_norm* - *leve
.. doctest:: exampleMV
>>> qc = qc.flagByStray(field='kNNscores', freq='30D', alpha=.3)
>>> qc = qc.concatFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
>>> qc = qc.concatFlags(field='kNNscores', target='sac254_norm', label='STRAY')
>>> qc = qc.transferFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
>>> qc = qc.transferFlags(field='kNNscores', target='sac254_norm', label='STRAY')
>>> qc.plot('sac254_corrected', xscope='2016-11') # doctest:+SKIP
>>> qc.plot('sac254_norm', phaseplot='level_norm', xscope='2016-11') # doctest:+SKIP
......@@ -363,8 +360,8 @@ on the normalized *sac* and plot the flagged values in the *sac254_norm* - *leve
:include-source: False
qc = qc.flagByStray(field='kNNscores', freq='30D', alpha=.3)
qc = qc.concatFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
qc = qc.concatFlags(field='kNNscores', target='sac254_norm', label='STRAY')
qc = qc.transferFlags(field='kNNscores', target='sac254_corrected', label='STRAY')
qc = qc.transferFlags(field='kNNscores', target='sac254_norm', label='STRAY')
.. plot::
:context: close-figs
......@@ -393,4 +390,4 @@ Config
To configure `saqc` to execute the above data processing and flagging steps, the config file would have to look
as follows:
.. literalinclude:: ../resources/data/hydro_config.csv
\ No newline at end of file
.. literalinclude:: ../resources/data/hydro_config.csv
......@@ -255,25 +255,11 @@ This function object, we can pass on to the :py:meth:`~saqc.SaQC.processGeneric`
Visualisation
-------------
We can obtain those updated informations by generating a `pandas dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
representation of it, with the :py:attr:`data <saqc.core.core.SaQC.data>` method:
To see all the results obtained so far, plotted in one figure window, we make use of the :py:meth:`~saqc.SaQC.plot` method.
.. doctest:: exampleOD
>>> data = qc.data
.. plot::
:context:
:include-source: False
data = qc.data
To see all the results obtained so far, plotted in one figure window, we make use of the dataframes `plot <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html>`_ method.
.. doctest:: exampleOD
>>> data.to_pandas().plot()
<Axes...>
>>> qc.plot(".", regex=True) # doctest: +SKIP
.. plot::
:context:
......@@ -281,7 +267,7 @@ To see all the results obtained so far, plotted in one figure window, we make us
:width: 80 %
:class: center
data.to_pandas().plot()
qc.plot(".", regex=True)
Residuals and Scores
......
......@@ -51,8 +51,7 @@ dummy dataset, to lead us through the following code snippets:
.. testsetup:: python
from saqc import fromConfig
from tests.common import writeIO
from saqc.parsing.reader import _ConfigReader as ConfigReader
.. testcode:: python
......@@ -116,16 +115,13 @@ Simple constraints
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;------------------------
... x ; flagGeneric(func=x < 30)
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc1.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -177,16 +173,13 @@ Cross variable constraints
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;------------------------------------
... x ; flagGeneric(field="y", func=y > 30)
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc2.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -241,16 +234,13 @@ need to be put in parentheses.
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;--------------------------------------------------------
... x ; flagGeneric(field=["y", "z"], func=(y > 30) & (z < 50))
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc3.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -293,16 +283,13 @@ Arithmetics
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;-------------------------------------------------------
... x ; flagGeneric(field=["x", "y", "z"], func=x > (y + z)/2)
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc4.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -351,16 +338,13 @@ Special functions
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;---------------------------------------------------
... x ; flagGeneric(field=["x", "z"], func=x > std(z) * 2)
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc5.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -402,17 +386,14 @@ Special functions
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;------------------------------------------
... y ; flagRange(min=10, max=60)
... x ; flagGeneric(field="y", func=isflagged(y))
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc6.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -481,16 +462,13 @@ Let's consider the following dataset:
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;---------------------------------------------------------------
... meas ; flagGeneric(field=["fan", "volt"], func=(x == 0) | (y < 12.0))
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc7.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -533,8 +511,7 @@ But we could also quality check our independent variables first and than leverag
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;--------------------------------------------------------------------------
......@@ -543,9 +520,7 @@ But we could also quality check our independent variables first and than leverag
... volt ; flagGeneric(func=volt < 12.0)
... meas ; flagGeneric(field=["fan", "volt"], func=isflagged(fan) | isflagged(volt))
... """
... ),
... data
... )
... ).run()
>>> tmp.flags == qc8.flags #doctest:+NORMALIZE_WHITESPACE
True
......@@ -634,16 +609,13 @@ variables in a given dataset. We start with dummy data again:
.. doctest:: python
:hide:
>>> tmp = fromConfig(
... writeIO(
>>> tmp = ConfigReader(data).readString(
... """
... varname ; test
... #-------;------------------------------------------------------
... mean ; processGeneric(field=["x", "y", "z"], func=(x+y+z)/2)
... """
... ),
... data
... )
... ).run()
>>> tmp.data == qc1.data #doctest:+NORMALIZE_WHITESPACE
True
......
......@@ -37,7 +37,6 @@ Example Data
:context: close-figs
:include-source: False
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import saqc
......
......@@ -13,4 +13,4 @@ Change Points and Noise
~SaQC.flagChangePoints
~SaQC.assignChangePointCluster
~SaQC.flagByStatLowPass
~SaQC.flagByScatterLowpass
......@@ -78,7 +78,7 @@ or
.. code-block:: sh
pip install git+https://git.ufz.de/rdm-software/saqc@master
pip install git+https://git.ufz.de/rdm-software/saqc@main
If you feel more adventurous, feel free to use the latest development version from our
......
......@@ -4,11 +4,11 @@
recommonmark==0.7.1
sphinx==6.2.1
sphinx-automodapi==0.15.0
sphinx-automodapi==0.16.0
sphinxcontrib-fulltoc==1.2.0
sphinx-markdown-tables==0.0.17
jupyter-sphinx==0.4.0
sphinx_autodoc_typehints==1.23
sphinx-tabs==3.4.1
sphinx-design==0.4.1
sphinx-design==0.5.0
pydata-sphinx-theme==0.13.3
......@@ -3,4 +3,4 @@ varname ; test
SM2 ; align(freq="15Min", method="nshift")
SM2 ; flagMissing()
'SM(1|2)+' ; flagRange(min=10, max=60)
SM2 ; flagMAD(window="30d", z=3.5)
SM2 ; flagZScore(window="30d", thresh=3.5, method='modified', center=False)
......@@ -3,5 +3,5 @@ SM2;align(freq="15Min", method="nshift");False
'.*';flagRange(min=10, max=60);False
SM2;flagMissing();False
SM2;flagRange(min=10, max=60);False
SM2;flagMAD(window="30d", z=3.5);False
SM2;flagZScore(window="30d", thresh=3.5, method='modified', center=False);False
Dummy;flag(func=(isflagged(SM1) | isflagged(SM2)))
......@@ -16,6 +16,6 @@ water_z ; transform(field=['water_temp_raw'], func=zScore(x), fr
sac_z ; transform(field=['sac254_raw'], func=zScore(x), freq='20D')
kNN_scores ; assignKNNScore(field=['level_z', 'water_z', 'sac_z'], freq='20D')
kNN_scores ; flagByStray(freq='20D')
level_raw ; concatFlags(field=['kNN_scores'], label='STRAY')
sac254_corr ; concatFlags(field=['kNN_scores'], label='STRAY')
water_temp_raw ; concatFlags(field=['kNN_scores'], label='STRAY')
level_raw ; transferFlags(field=['kNN_scores'], label='STRAY')
sac254_corr ; transferFlags(field=['kNN_scores'], label='STRAY')
water_temp_raw ; transferFlags(field=['kNN_scores'], label='STRAY')
varname;test
#------;--------------------------
SM2 ;flagRange(min=10, max=60)
SM2 ;flagMAD(window="30d", z=3.5)
SM2 ;flagZScore(window="30d", thresh=3.5, method="modified", center=False)
SM2 ;plot()
\ No newline at end of file