From ab4c7489844df9b154ed211848118c9b857e211a Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Wed, 24 Apr 2019 14:17:43 +0200
Subject: [PATCH] assign becomes own Field in meta, pre-run introducing of all
 variables

---
 config.py         |  1 +
 core.py           | 30 ++++++++++++++----------------
 test/test_core.py |  3 ++-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/config.py b/config.py
index d9066f72b..340db537d 100644
--- a/config.py
+++ b/config.py
@@ -9,6 +9,7 @@ class Fields:
     VARNAME = "headerout"
     STARTDATE = "date start"
     ENDDATE = "date end"
+    ASSIGN = "assign"
     FLAGS = "Flag*"
 
 
diff --git a/core.py b/core.py
index 84afc00b9..56e0fc681 100644
--- a/core.py
+++ b/core.py
@@ -51,7 +51,15 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan):
         raise TypeError("cannot infer time frequency from dataset")
 
     # the required meta data columns
-    fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE]
+    fields = [Fields.VARNAME, Fields.STARTDATE, Fields.ENDDATE, Fields.ASSIGN]
+
+    # get to know every variable from meta
+    for idx, configrow in meta.iterrows():
+        varname, _, _, assign = configrow[fields]
+        if varname not in flags and (varname in data or varname not in data and assign):
+            col_flags = flagger.initFlags(pd.DataFrame(index=data.index, columns=[varname]))
+            flags = col_flags if flags.empty else flags.join(col_flags)
+    print(flags.columns.values)
 
     # NOTE:
     # the outer loop runs over the flag tests, the inner one over the
@@ -70,29 +78,16 @@ def runner(meta, flagger, data, flags=None, nodata=np.nan):
             if pd.isnull(flag_test):
                 continue
 
-            varname, start_date, end_date = configrow[fields]
+            varname, start_date, end_date, _ = configrow[fields]
             func_name, flag_params = parseFlag(flag_test)
 
-            # NOTE:
-            # create a flag column if this is explicitly stated
-            # or if a variable is checked but no corresponding
-            # flag column exists
-            if flag_params.get(FlagParams.ASSIGN) or \
-                    (varname in data and varname not in flags):
-                col_flags = flagger.initFlags(
-                    pd.DataFrame(index=data.index, columns=[varname]))
-                flags = col_flags if flags.empty else flags.join(col_flags)
-
-            elif varname not in data and varname not in flags:
+            if varname not in data and varname not in flags:
                 continue
 
             dchunk = data.loc[start_date:end_date]
             if dchunk.empty:
                 continue
 
-            # NOTE:
-            # within the activation period of a variable, the flag will
-            # be initialized if necessary
             fchunk = flags.loc[start_date:end_date]
 
             try:
@@ -139,6 +134,9 @@ def prepareMeta(meta, data):
         {Fields.ENDDATE: data.index.max(),
          Fields.STARTDATE: data.index.min()})
 
+    if Fields.ASSIGN not in meta:
+        meta = meta.assign(**{Fields.ASSIGN: False})
+
     # rows without a variables name don't help much
     meta = meta.dropna(subset=[Fields.VARNAME])
 
diff --git a/test/test_core.py b/test/test_core.py
index b51c7b7d9..de8023372 100644
--- a/test/test_core.py
+++ b/test/test_core.py
@@ -95,8 +95,9 @@ def test_assignVariable(flagger):
     meta = prepareMeta(
         pd.DataFrame(
             {Fields.VARNAME: [var1, var2],
+             Fields.ASSIGN: [False, True],
              Fields.FLAGS: ["range, {min: 99999, max: -99999}",
-                            f"generic, {{func: isflagged({var1}), assign: True}}"]}),
+                            f"generic, {{func: isflagged({var1})}}"]}),
         data)
 
     pdata, pflags = runner(meta, flagger, data)
-- 
GitLab