Skip to content
Snippets Groups Projects
Commit 17c19ed7 authored by David Schäfer's avatar David Schäfer
Browse files

added linenumber to the read-in meta

parent b2cc6c5c
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@ class Fields:
ASSIGN = "assign"
TESTS = "test*"
PLOT = "plot"
LINENUMBER = "line"
class Params:
......
......@@ -88,18 +88,31 @@ def runner(metafname, flagger, data, flags=None, nodata=np.nan):
def readMeta(fname):
return pd.read_csv(fname, delimiter=",", comment="#")
return pd.read_csv(fname, delimiter=",")
def prepareMeta(meta, data):
# NOTE: an option needed to only pass tests within a file and deduce
# everything else from data
if Fields.VARNAME not in meta or meta[Fields.VARNAME].isna().any():
raise TypeError(f"columns {Fields.VARNAME} is needed")
tests = meta.filter(regex=Fields.TESTS)
if tests.empty or tests.isna().all(axis=1).any():
raise TypeError("at least one test must be given")
# add line numbers and remove comments
meta[Fields.LINENUMBER] = np.arange(len(meta)) + 1
comment_mask = ~meta.iloc[:, 0].str.startswith("#")
meta = meta[comment_mask]
# no dates given, fall back to the available index range
for field in [Fields.VARNAME, Fields.TESTS, Fields.START, Fields.END, Fields.ASSIGN, Fields.PLOT]:
if field not in meta:
meta = meta.assign(**{field: np.nan})
# fill with default values
meta = meta.fillna({
Fields.VARNAME: np.nan,
Fields.TESTS: np.nan,
......@@ -109,13 +122,6 @@ def prepareMeta(meta, data):
Fields.PLOT: False,
})
if meta[Fields.VARNAME].isna().any():
raise TypeError(f"columns {Fields.VARNAME} is needed")
tests = meta.filter(regex=Fields.TESTS)
if tests.isna().all(axis=1).any():
raise TypeError("at least one test must be given")
dtype = np.datetime64 if isinstance(data.index, pd.DatetimeIndex) else int
meta[Fields.START] = meta[Fields.START].astype(dtype)
......
......@@ -26,7 +26,7 @@ def initData(cols=2, start_date="2017-01-01", end_date="2017-12-31", freq="1h"):
return pd.DataFrame(data, index=dates)
def initMeta(metastring, data):
def initMetaString(metastring, data):
cleaned = re.sub(r"\s*,\s*", r",",
re.sub(r"\|", r",",
re.sub(r"\n[ \t]+", r"\n",
......@@ -40,6 +40,6 @@ def initMeta(metastring, data):
def initMetaDict(metadict, data):
meta = prepareMeta(pd.DataFrame(metadict), data)
fobj = io.StringIO()
meta.to_csv(fobj)
meta.to_csv(fobj, index=False)
fobj.seek(0)
return fobj, meta
......@@ -8,7 +8,7 @@ from saqc.funcs import register, flagRange
from saqc.core.core import runner
from saqc.core.config import Fields as F
from saqc.lib.plotting import plot
from test.common import initData, initMetaDict, TESTFLAGGER
from test.common import initData, initMetaDict, initMetaString, TESTFLAGGER
@pytest.fixture
......@@ -35,11 +35,10 @@ def test_temporalPartitioning(data, flagger):
metadict = [
{F.VARNAME: var1, F.TESTS: "flagAll()"},
{F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_date},
{F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_date},
# {F.VARNAME: var2, F.TESTS: "flagAll()", F.END: split_date},
# {F.VARNAME: var3, F.TESTS: "flagAll()", F.START: split_date},
]
meta_file, meta_frame = initMetaDict(metadict, data)
pdata, pflags = runner(meta_file, flagger, data)
fields = [F.VARNAME, F.START, F.END]
......@@ -172,7 +171,7 @@ def test_plotting(data, flagger):
plot(data, flagged, mask, field, flagger, interactive_backend=False)
def test_configReader(data):
def test_configPreparation(data):
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
......@@ -185,28 +184,45 @@ def test_configReader(data):
defaults = {
F.START: data.index.min(), F.END: data.index.max(),
F.ASSIGN: False, F.PLOT: False
F.ASSIGN: False, F.PLOT: False, F.LINENUMBER: 1
}
for test in tests:
for i, test in enumerate(tests):
_, meta_frame = initMetaDict([test], data)
result = dict(zip(meta_frame.columns, meta_frame.iloc[0]))
expected = {**defaults, **test}
assert result == expected
def test_configReaderExcpetion(data):
def test_configPreparationExcpetion(data):
var1, var2, var3, *_ = data.columns
date = data.index[len(data.index)//2]
tests = [
{},
{F.TESTS: "flagAll()"},
{F.VARNAME: var2},
{F.VARNAME: var3, F.END: date, F.ASSIGN: True},
# {F.TESTS: "flagAll()"},
# {F.VARNAME: var2},
# {F.VARNAME: var3, F.END: date, F.ASSIGN: True},
]
for test in tests:
with pytest.raises(TypeError):
initMetaDict([test], data)
def test_configReaderLineNumbers(data):
config = f"""
{F.VARNAME}|{F.TESTS}
#temp1|flagAll()
temp1|flagAll()
temp2|flagAll()
pre1|flagAll()
pre2|flagAll()
SM|flagAll()
#SM|flagAll()
SM1|flagAll()
"""
meta_fname, meta_frame = initMetaString(config, data)
result = meta_frame[F.LINENUMBER].tolist()
expected = [2, 3, 4, 5, 6, 8]
assert result == expected
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import initData, initMeta
from ..common import initData
from saqc.flagger import BaseFlagger
import numpy as np
......
......@@ -4,7 +4,7 @@
import json
import pandas as pd
from ..common import initData, initMeta, initMetaDict
from ..common import initData, initMetaDict
from saqc.core.core import runner
from saqc.flagger.dmpflagger import DmpFlagger
from saqc.flagger.dmpflagger import FlagFields as F
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment