From ab4c7489844df9b154ed211848118c9b857e211a Mon Sep 17 00:00:00 2001 From: Bert Palm <bert.palm@ufz.de> Date: Wed, 24 Apr 2019 14:17:43 +0200 Subject: [PATCH] assign becomes own Field in meta, pre-run introducing of all variables --- config.py | 1 + core.py | 30 ++++++++++++++---------------- test/test_core.py | 3 ++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/config.py b/config.py index d9066f72b..340db537d 100644 --- a/config.py +++ b/config.py @@ -9,6 +9,7 @@ class Fields: VARNAME = "headerout" STARTDATE = "date start" ENDDATE = "date end" + ASSIGN = "assign" FLAGS = "Flag*" diff --git a/core.py b/core.py index 84afc00b9..56e0fc681 100644 --- a/core.py +++ b/core.py @@ -51,7 +51,15 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan): raise TypeError("cannot infer time frequency from dataset") # the required meta data columns - fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE] + fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE, Fields.ASSIGN] + + # get to know every variable from meta + for idx, configrow in meta.iterrows(): + varname, _, _, assign = configrow[fields] + if varname not in flags and (varname in data or varname not in data and assign): + col_flags = flagger.initFlags(pd.DataFrame(index=data.index, columns=[varname])) + flags = col_flags if flags.empty else flags.join(col_flags) + print(flags.columns.values) # NOTE: # the outer loop runs over the flag tests, the inner one over the @@ -70,29 +78,16 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan): if pd.isnull(flag_test): continue - varname, start_date, end_date = configrow[fields] + varname, start_date, end_date, _ = configrow[fields] func_name, flag_params = parseFlag(flag_test) - # NOTE: - # create a flag column if this is explicitly stated - # or if a variable is checked but no corresponding - # flag column exists - if flag_params.get(FlagParams.ASSIGN) or \ - (varname in data and varname not in flags): - col_flags = flagger.initFlags( - pd.DataFrame(index=data.index, columns=[varname])) - flags = col_flags if flags.empty else flags.join(col_flags) - - elif varname not in data and varname not in flags: + if varname not in data and varname not in flags: continue dchunk = data.loc[start_date:end_date] if dchunk.empty: continue - # NOTE: - # within the activation period of a variable, the flag will - # be initialized if necessary fchunk = flags.loc[start_date:end_date] try: @@ -139,6 +134,9 @@ def prepareMeta(meta, data): {Fields.ENDDATE: data.index.max(), Fields.STARTDATE: data.index.min()}) + if Fields.ASSIGN not in meta: + meta = meta.assign(**{Fields.ASSIGN: False}) + # rows without a variables name don't help much meta = meta.dropna(subset=[Fields.VARNAME]) diff --git a/test/test_core.py b/test/test_core.py index b51c7b7d9..de8023372 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -95,8 +95,9 @@ def test_assignVariable(flagger): meta = prepareMeta( pd.DataFrame( {Fields.VARNAME: [var1, var2], + Fields.ASSIGN: [False, True], Fields.FLAGS: ["range, {min: 99999, max: -99999}", - f"generic, {{func: isflagged({var1}), assign: True}}"]}), + f"generic, {{func: isflagged({var1})}}"]}), data) pdata, pflags = runner(meta, flagger, data) -- GitLab