Commit df6466ab authored by David Schäfer's avatar David Schäfer
Browse files

bump saqc

parent 437afbe8
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import re
import json
from datetime import datetime
import click
......@@ -9,7 +10,7 @@ import requests
import numpy as np
import pandas as pd
from saqc import SaQC, DmpFlagger, register
from saqc import fromConfig, DmpTranslator, flagging, processing, Flags, UNFLAGGED
from pipetools.lib import splitData, mergeData, writeParquet
......@@ -80,21 +81,21 @@ def extractNM(data):
return nm
@register()
def calcSoilMositure(
data: pd.DataFrame, field: str, flagger: DmpFlagger,
@processing()
def calcSoilMoisture(
data: pd.DataFrame, field: str, flags: Flags,
neutrons_field: str, n0: int,
lattice_water: float, bulk_density: float, soil_org_carbon: float,
# basically constants
a0: float = 0.0808, a1: float = 0.372, a2: float = 0.115,
**kwargs
) -> (pd.DataFrame, DmpFlagger):
) -> (pd.DataFrame, Flags):
neutrons = data[neutrons_field]
sm = (a0 / (neutrons / n0 - a1) - a2 - lattice_water - soil_org_carbon * 0.556) * bulk_density
data[field] = sm
flagger = flagger.merge(flagger.initFlags(data[field]))
return data, flagger
flags[field] = pd.Series(UNFLAGGED, index=data[field].index)
return data, flags
def getManFlags(fname, field):
......@@ -110,33 +111,31 @@ def getManFlags(fname, field):
return out
@register(masking="field")
def flagManual(data, field, flagger, fname, **kwargs):
@flagging(masking="field")
def flagManual(data, field, flags, fname, **kwargs):
mflags = getManFlags(fname, field)
for _, (start, end, flag) in mflags.iterrows():
mask = pd.Series(data=0, index=data[field].index, dtype=bool)
mask.loc[start:end] = True
flagger = flagger.setFlags(field, loc=mask, flag=flag, **kwargs)
return data, flagger
flags[mask, field] = flag
return data, flags
@click.command()
@click.option("-i", "--infile", type=click.Path(exists=True), required=True)
@click.option("-o", "--outfile", type=click.Path(), required=True)
@click.option("-c", "--configfile", type=click.Path(exists=True), required=True)
@click.option("-m", "--manfile", type=click.Path(exists=True), required=True)
def main(infile, outfile, configfile, manfile):
def main(infile, outfile, configfile):
data = pd.read_parquet(infile)
data.loc[:, "NM"] = extractNM(data)
flagger = DmpFlagger()
saqc = SaQC(flagger, data=data, error_policy="raise")
data, flags = (saqc
.flagManual(".*", regex=True, fname=manfile)
.readConfig(configfile).getResult())
saqc = fromConfig(fname=configfile, data=data, scheme=DmpTranslator(), error_policy="raise")
data, flags = saqc.getResult()
# we currently don't write 'real# metadata into the DMP
flags.loc[:, (slice(None), "quality_comment")] = json.dumps({"comment":"", "test": ""})
data = data.round(8)
df_out = mergeData(data, flags)
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ;flagManual(fname="manual/saqc-flags-1488.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -48,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=2054, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=2054, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1490.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -39,7 +40,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=2250, lattice_water=0.02, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=2250, lattice_water=0.02, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1499.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -39,7 +40,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=1830, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=1830, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1501.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -48,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=828, lattice_water=0.05, bulk_density=1.05, soil_org_carbon=0.1)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=828, lattice_water=0.05, bulk_density=1.05, soil_org_carbon=0.1)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1504.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -48,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=945, lattice_water=0.05, bulk_density=1.05, soil_org_carbon=0.1)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=945, lattice_water=0.05, bulk_density=1.05, soil_org_carbon=0.1)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -2,7 +2,9 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1505.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -47,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=1010, lattice_water=0.01, bulk_density=1.4, soil_org_carbon=0.04)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=1010, lattice_water=0.01, bulk_density=1.4, soil_org_carbon=0.04)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -2,7 +2,9 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1507.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -47,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=800, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.000)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=800, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.000)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-1567.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -39,7 +40,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_inc)
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=5100, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=5100, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# flag everything
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-956.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -48,7 +49,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=1000, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=1000, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
......@@ -3,7 +3,8 @@
varname ; test
# ----- ; ----
'.*' ; breaks.flagMissing(nodata=NAN)
'.*' ; flagManual(fname="manual/saqc-flags-960.csv")
'.*' ; breaks.flagMissing()
# quality control
# ---------------
......@@ -46,7 +47,7 @@ N_corrected ; generic.process(func=N * correct_p * correct_h * correct_i
# soil moisture calculation
# -------------------------
SM ; calcSoilMositure(neutrons_field="N_corrected", n0=1000, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; calcSoilMoisture(neutrons_field="N_corrected", n0=1000, lattice_water=0.0043, bulk_density=1.6, soil_org_carbon=0.0050)
SM ; outliers.flagRange(min=0, max=0.7)
# rename variables to match the DMP names
......
Subproject commit 434175916026cb0f5da1428c6d8d0d740bfc5d7e
Subproject commit cfab0bfbe2ef34e51c9b773931966c98546ee87f
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment