Skip to content
Snippets Groups Projects
Commit ab4c7489 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

assign becomes own Field in meta, pre-run introducing of all variables

parent 08437754
No related branches found
No related tags found
No related merge requests found
...@@ -9,6 +9,7 @@ class Fields: ...@@ -9,6 +9,7 @@ class Fields:
VARNAME = "headerout" VARNAME = "headerout"
STARTDATE = "date start" STARTDATE = "date start"
ENDDATE = "date end" ENDDATE = "date end"
ASSIGN = "assign"
FLAGS = "Flag*" FLAGS = "Flag*"
......
...@@ -51,7 +51,15 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan): ...@@ -51,7 +51,15 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan):
raise TypeError("cannot infer time frequency from dataset") raise TypeError("cannot infer time frequency from dataset")
# the required meta data columns # the required meta data columns
fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE] fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE, Fields.ASSIGN]
# get to know every variable from meta
for idx, configrow in meta.iterrows():
varname, _, _, assign = configrow[fields]
if varname not in flags and (varname in data or varname not in data and assign):
col_flags = flagger.initFlags(pd.DataFrame(index=data.index, columns=[varname]))
flags = col_flags if flags.empty else flags.join(col_flags)
print(flags.columns.values)
# NOTE: # NOTE:
# the outer loop runs over the flag tests, the inner one over the # the outer loop runs over the flag tests, the inner one over the
...@@ -70,29 +78,16 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan): ...@@ -70,29 +78,16 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan):
if pd.isnull(flag_test): if pd.isnull(flag_test):
continue continue
varname, start_date, end_date = configrow[fields] varname, start_date, end_date, _ = configrow[fields]
func_name, flag_params = parseFlag(flag_test) func_name, flag_params = parseFlag(flag_test)
# NOTE: if varname not in data and varname not in flags:
# create a flag column if this is explicitly stated
# or if a variable is checked but no corresponding
# flag column exists
if flag_params.get(FlagParams.ASSIGN) or \
(varname in data and varname not in flags):
col_flags = flagger.initFlags(
pd.DataFrame(index=data.index, columns=[varname]))
flags = col_flags if flags.empty else flags.join(col_flags)
elif varname not in data and varname not in flags:
continue continue
dchunk = data.loc[start_date:end_date] dchunk = data.loc[start_date:end_date]
if dchunk.empty: if dchunk.empty:
continue continue
# NOTE:
# within the activation period of a variable, the flag will
# be initialized if necessary
fchunk = flags.loc[start_date:end_date] fchunk = flags.loc[start_date:end_date]
try: try:
...@@ -139,6 +134,9 @@ def prepareMeta(meta, data): ...@@ -139,6 +134,9 @@ def prepareMeta(meta, data):
{Fields.ENDDATE: data.index.max(), {Fields.ENDDATE: data.index.max(),
Fields.STARTDATE: data.index.min()}) Fields.STARTDATE: data.index.min()})
if Fields.ASSIGN not in meta:
meta = meta.assign(**{Fields.ASSIGN: False})
# rows without a variables name don't help much # rows without a variables name don't help much
meta = meta.dropna(subset=[Fields.VARNAME]) meta = meta.dropna(subset=[Fields.VARNAME])
......
...@@ -95,8 +95,9 @@ def test_assignVariable(flagger): ...@@ -95,8 +95,9 @@ def test_assignVariable(flagger):
meta = prepareMeta( meta = prepareMeta(
pd.DataFrame( pd.DataFrame(
{Fields.VARNAME: [var1, var2], {Fields.VARNAME: [var1, var2],
Fields.ASSIGN: [False, True],
Fields.FLAGS: ["range, {min: 99999, max: -99999}", Fields.FLAGS: ["range, {min: 99999, max: -99999}",
f"generic, {{func: isflagged({var1}), assign: True}}"]}), f"generic, {{func: isflagged({var1})}}"]}),
data) data)
pdata, pflags = runner(meta, flagger, data) pdata, pflags = runner(meta, flagger, data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment