fixed __main__.py, and .gitlab-ci.yml, added new integration test

fixed main.py, and .gitlab-ci.yml, added new integration test
851e5cb8 · Bert Palm · f06eaead · 851e5cb8 · 851e5cb8 · 851e5cb8
Commit 851e5cb8 authored 4 years ago by Bert Palm 🎇
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+# ===========================================================
+# preparation
+# ===========================================================
+
 variables:
  GIT_SUBMODULE_STRATEGY: recursive

-
 default:
  image: python:3.8
+  before_script:
+    - pip install --upgrade pip
+    - pip install pytest
+    - pip install -r requirements.txt


-before_script:
-  - pip install --upgrade pip
-  - pip install pytest
-  - pip install -r requirements.txt
-
+# ===========================================================
+# normal jobs (non scheduled)
+# ===========================================================

 # test saqc with python 3.7
 python37:
  stage: test
+  except:
+    - schedules
  image: python:3.7
  script:
    - pytest tests/core tests/flagger tests/funcs
@@ -24,31 +31,19 @@ python37:
 # test saqc with python 3.8
 python38:
  stage: test
+  except:
+    - schedules
  script:
    - pytest tests/core tests/flagger tests/funcs
    - python -m saqc --config ressources/data/config_ci.csv --data ressources/data/data.csv --outfile /tmp/test.csv


-# test lib saqc
-testLib:
-  stage: test
-  script:
-    - pytest tests/lib
-
-
-# fuzzy testing saqc
-fuzzy:
-  allow_failure: true
-  stage: test
-  script:
-    - pytest tests/fuzzy
-
-
 # make (visual) coverage in gitlab merge request diff's
 coverage:
-  allow_failure: true
  stage: test
-
+  except:
+    - schedules
+  allow_failure: true
  script:
    - pip install pytest-cov coverage
    - pytest --cov=saqc tests/core tests/flagger tests/funcs
@@ -67,6 +62,10 @@ coverage:
 # make html docu with sphinx
 pages:
  stage: deploy
+  only:
+    - develop
+  except:
+    - schedules
  script:
    - cd sphinx-doc/
    - pip install -r requirements_sphinx.txt
@@ -75,5 +74,26 @@ pages:
  artifacts:
    paths:
      - public
+
+
+# ===========================================================
+# scheduled jobs
+# ===========================================================
+
+# fuzzy testing saqc
+fuzzy:
+  stage: test
  only:
-    - develop
+    - schedules
+  script:
+    - pytest tests/fuzzy
+
+
+# test lib saqc
+testLib:
+  stage: test
+  only:
+    - schedules
+  script:
+    - pytest tests/lib
+
--- a/saqc/__main__.py
+++ b/saqc/__main__.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-

 import logging
+import warnings
 from functools import partial
 from pathlib import Path

@@ -11,18 +12,18 @@ import numpy as np
 import pandas as pd
 import pyarrow as pa

+from saqc.constants import *
 from saqc.core import SaQC
-from saqc.flagger import CategoricalFlagger
-from saqc.flagger.dmpflagger import DmpFlagger


 logger = logging.getLogger("SaQC")


-FLAGGERS = {
-    "numeric": CategoricalFlagger([-1, 0, 1]),
-    "category": CategoricalFlagger(["NIL", "OK", "BAD"]),
-    "dmp": DmpFlagger(),
+SCHEMES = {
+    None: None,
+    "numeric": NotImplemented,
+    "category": NotImplemented,
+    "dmp": NotImplemented,
 }


@@ -72,7 +73,7 @@ def writeData(writer_dict, df, fname):
 )
 @click.option("-o", "--outfile", type=click.Path(exists=False), help="path to the output file")
 @click.option(
-    "--flagger", default="category", type=click.Choice(FLAGGERS.keys()), help="the flagging scheme to use",
+    "--flagger", default=None, type=click.Choice(SCHEMES.keys()), help="the flagging scheme to use",
 )
 @click.option("--nodata", default=np.nan, help="nodata value")
 @click.option(
@@ -81,27 +82,25 @@ def writeData(writer_dict, df, fname):
 @click.option("--fail/--no-fail", default=True, help="whether to stop the program run on errors")
 def main(config, data, flagger, outfile, nodata, log_level, fail):

+    if SCHEMES[flagger] is NotImplemented:
+        warnings.warn("flagger is currently not supported")
+
    _setup_logging(log_level)
    reader, writer = setupIO(nodata)

    data = readData(reader, data)

-    saqc = SaQC(flagger=FLAGGERS[flagger], data=data, nodata=nodata, error_policy="raise" if fail else "warn",)
+    saqc = SaQC(data=data, nodata=nodata, error_policy="raise" if fail else "warn",)

    data_result, flagger_result = saqc.readConfig(config).getResult(raw=True)

    if outfile:
        data_result = data_result.to_df()
-        flags = flagger_result.flags.to_df()
-        flags_flagged = flagger_result.isFlagged().to_df()
-
-        flags_out = flags.where((flags.isnull() | flags_flagged), flagger_result.GOOD)
-        fields = {"data": data_result, "flags": flags_out}
+        flags = flagger_result.toFrame()
+        unflagged = (flags == UNFLAGGED) | flags.isna()
+        flags[unflagged] = GOOD

-        if isinstance(flagger_result, DmpFlagger):
-            fields["quality_flag"] = fields.pop("flags")
-            fields["quality_comment"] = flagger_result.comments.to_df()
-            fields["quality_cause"] = flagger_result.causes.to_df()
+        fields = {"data": data_result, "flags": flags}

        out = (
            pd.concat(fields.values(), axis=1, keys=fields.keys())

--- a/saqc/flagger/flags.py
+++ b/saqc/flagger/flags.py
@@ -424,5 +424,6 @@ def appendHistory(flags: Flags, column, append_hist):
    flags.history[column] = new_history
    return flags

+
 # for now we keep this name
 Flagger = Flags
--- a/tests/integration/__init__.py
+++ b/tests/integration/__init__.py
--- a/tests/integration/test_integration.py
+++ b/tests/integration/test_integration.py
+#!/usr/bin/env python
+from click.testing import CliRunner
+import os
+
+
+def test__main__py():
+    import saqc.__main__
+
+    # if not run from project root
+    projpath = os.path.dirname(saqc.__file__) + '/../'
+
+    runner = CliRunner()
+    result = runner.invoke(
+        saqc.__main__.main, [
+            '--config', projpath + 'ressources/data/config_ci.csv',
+            '--data', projpath + 'ressources/data/data.csv',
+            '--outfile', '/tmp/test.csv',  # the filesystem temp dir
+        ])
+    assert result.exit_code == 0, result.output