diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 73f3f7e26c008d7e69d562ec114fd2ce117df1f1..490a4cf65ce118a0b322e7b436de134272ccc87e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,7 +25,7 @@ python37: image: python:3.7 script: - pytest tests/core tests/flagger tests/funcs -# - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv + - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv # test saqc with python 3.8 @@ -35,7 +35,7 @@ python38: - schedules script: - pytest tests/core tests/flagger tests/funcs -# - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv + - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv # make (visual) coverage in gitlab merge request diff's @@ -85,7 +85,6 @@ fuzzy: stage: test only: - schedules - allow_failure: true script: - pytest tests/fuzzy diff --git a/saqc/__main__.py b/saqc/__main__.py index 806377faa01a955c8105ace70425a706ae5ebdbc..b878c823788e383f710d7ce7e2fbdd21b4fab523 100644 --- a/saqc/__main__.py +++ b/saqc/__main__.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import logging +import warnings from functools import partial from pathlib import Path @@ -11,18 +12,18 @@ import numpy as np import pandas as pd import pyarrow as pa +from saqc.constants import * from saqc.core import SaQC -from saqc.flagger import CategoricalFlagger -from saqc.flagger.dmpflagger import DmpFlagger logger = logging.getLogger("SaQC") -FLAGGERS = { - "numeric": CategoricalFlagger([-1, 0, 1]), - "category": CategoricalFlagger(["NIL", "OK", "BAD"]), - "dmp": DmpFlagger(), +SCHEMES = { + None: None, + "numeric": NotImplemented, + "category": NotImplemented, + "dmp": NotImplemented, } @@ -72,7 +73,7 @@ def writeData(writer_dict, df, fname): ) @click.option("-o", "--outfile", type=click.Path(exists=False), help="path to the output file") @click.option( - "--flagger", default="category", type=click.Choice(FLAGGERS.keys()), help="the flagging scheme to use", + "--flagger", default=None, type=click.Choice(SCHEMES.keys()), help="the flagging scheme to use", ) @click.option("--nodata", default=np.nan, help="nodata value") @click.option( @@ -81,27 +82,25 @@ def writeData(writer_dict, df, fname): @click.option("--fail/--no-fail", default=True, help="whether to stop the program run on errors") def main(config, data, flagger, outfile, nodata, log_level, fail): + if SCHEMES[flagger] is NotImplemented: + warnings.warn("flagger is currently not supported") + _setup_logging(log_level) reader, writer = setupIO(nodata) data = readData(reader, data) - saqc = SaQC(flagger=FLAGGERS[flagger], data=data, nodata=nodata, error_policy="raise" if fail else "warn",) + saqc = SaQC(data=data, nodata=nodata, error_policy="raise" if fail else "warn",) data_result, flagger_result = saqc.readConfig(config).getResult(raw=True) if outfile: data_result = data_result.to_df() - flags = flagger_result.flags.to_df() - flags_flagged = flagger_result.isFlagged().to_df() - - flags_out = flags.where((flags.isnull() | flags_flagged), flagger_result.GOOD) - fields = {"data": data_result, "flags": flags_out} + flags = flagger_result.toFrame() + unflagged = (flags == UNFLAGGED) | flags.isna() + flags[unflagged] = GOOD - if isinstance(flagger_result, DmpFlagger): - fields["quality_flag"] = fields.pop("flags") - fields["quality_comment"] = flagger_result.comments.to_df() - fields["quality_cause"] = flagger_result.causes.to_df() + fields = {"data": data_result, "flags": flags} out = ( pd.concat(fields.values(), axis=1, keys=fields.keys()) diff --git a/saqc/flagger/flags.py b/saqc/flagger/flags.py index d40544a9586714a4b70e7104d888d49ab35f4ac1..3b59e65adda67f946790ee04cabd427c45432e77 100644 --- a/saqc/flagger/flags.py +++ b/saqc/flagger/flags.py @@ -424,5 +424,6 @@ def appendHistory(flags: Flags, column, append_hist): flags.history[column] = new_history return flags + # for now we keep this name Flagger = Flags diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..e88a90ab308f2d166d4741b43b299f2a5282129a --- /dev/null +++ b/tests/integration/test_integration.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +from click.testing import CliRunner +import os + + +def test__main__py(): + import saqc.__main__ + + # if not run from project root + projpath = os.path.dirname(saqc.__file__) + '/../' + + runner = CliRunner() + result = runner.invoke( + saqc.__main__.main, [ + '--config', projpath + 'ressources/data/config_ci.csv', + '--data', projpath + 'ressources/data/data.csv', + '--outfile', '/tmp/test.csv', # the filesystem temp dir + ]) + assert result.exit_code == 0, result.output