Alexander Hinz
--- a/level1/do_dendro_calculation.py

+ 35

− 51
+++ b/level1/do_dendro_calculation.py

+ 35

− 51
 @@ -57,14 +57,11 @@ def dendroCalculation(df, dendro, first_value):
    c0 = math.pi * 2 * r0
    v0 = df[dendro][0]
    cwire = c0 + 2 * ((dini + r0 - v0) ** 2 - r0 ** 2) ** 0.5 - 2 * r0 * math.acos(r0 / (dini + r0 - v0))
-    for num, val in enumerate(df['rad'][1:], 1):
-        df['rad'][num] = (cwire
-                          - 2 * ((dini + df['rad'].iloc[num - 1] - df[dendro].iloc[num]) ** 2
-                                 - df['rad'].iloc[num - 1] ** 2) ** 0.5
-                          + 2 * df['rad'].iloc[num - 1]
-                          * math.acos(df['rad'].iloc[num - 1] / (dini + df['rad'].iloc[num - 1]
-                                                                 - df[dendro].iloc[num]))) / math.pi / 2
-    return df['rad'] * 2
+    df['rad_2'] = df['rad'].shift(1)
+    df['rad'] = (cwire
+                 - 2 * ((dini + df['rad_2'] - df[dendro]) ** 2 - df['rad_2'] ** 2) ** .5
+                 + 2 * df['rad_2'] * np.arccos(df['rad_2'] / (dini + df['rad_2'] - df[dendro]))) / math.pi
+    return df['rad']


 def dendroInterpolation(df, dendro):
 @@ -76,72 +73,56 @@ def dendroInterpolation(df, dendro):


 def dendroProcess(logger, start_date, end_date, data, manflags):
-    final_df = pd.DataFrame()
+    final = []
    for dendro in data:
        if data[dendro].isnull().all():
            continue  # table is empty

        dropped_data = data[dendro].dropna()
        dropped_manflags = manflags.loc[dendro].dropna(how='all')
-        dropped_manflags.index = dropped_manflags.index.round('10min')
+        dropped_manflags.index = dropped_manflags.index.ceil('10min')
        concat_df = pd.concat([dropped_data, dropped_manflags], axis=1)
        concat_df = concat_df.loc[start_date:end_date]

        first_value = data.loc[data[dendro].first_valid_index(), dendro]
-        index_list = concat_df['d_ini'].dropna().index
-        temporary_df = pd.DataFrame()
-        stop_list = list(index_list.copy())
-        stop_list.pop(0)
-        stop_list.append(pd.Timestamp.today())
-        for start, stop in zip(index_list, stop_list):
+        d_ini_index = concat_df['d_ini'].dropna().index
+        temporary = []
+        stop_list = d_ini_index[1:].tolist() + [pd.Timestamp.today()]
+        for start, stop in zip(d_ini_index, stop_list):
            part_concat_df = concat_df.loc[start:stop][:-1]
            if part_concat_df.empty:
                continue

            value_df = dendroCalculation(part_concat_df, dendro, first_value)
-            temporary_df = pd.concat([temporary_df, value_df], axis=0)
-        try:
-            temporary_df_regex = r'[0-9]+'
-            temporary_df.columns = [f'bhd_auto_{re.findall(temporary_df_regex, dendro)[0]} [mm]']
-        except ValueError:
-            continue  # temporary_df is empty, because no d_ini value in this period of time
+            temporary.append(value_df)
+        temporary_df = pd.concat(temporary, axis=0)
+        temporary_df_regex = r'[0-9]+'
+        temporary_df.name = f'bhd_auto_{re.findall(temporary_df_regex, dendro)[0]} [mm]'
+        final.append(temporary_df)

        # interpolation Column
        df = manflags['dbh'].loc[dendro]
        df_reindexed = dendroInterpolation(df, dendro)
+        final.append(df_reindexed * 10)

        # Flags Column
        flag = temporary_df.copy()
        name_regex = r'[0-9]+'
-        name = f'bhd_{re.findall(name_regex, dendro)[0]}_f'
-        flag.columns = [name]
-        flag[name] = 9
+        flag_name = f'bhd_{re.findall(name_regex, dendro)[0]}_f'
+        flag.name = flag_name
+        flag[:] = 9
+        final.append(flag)

-        final_df = pd.concat([final_df, temporary_df, df_reindexed * 10, flag], axis=1)
+    final_df = pd.concat(final, axis=1)
    final_df.index.name = 'Date Time'
    return final_df


 def writeData(data, device):
-    # index of dataframe data does not have datetime as format
-    data.index = pd.to_datetime(data.index)
-
    fname = Path(
        f"{ROOT}/HohesHolz/derived/dendrometer/",
        f"{device.station_key}_bhd_trees_10min.level2.csv")

-    if Path(fname).is_file():
-        old = pd.read_csv(fname, index_col=0, parse_dates=True)
-        resulting_list = list(old.columns)
-        resulting_list.extend(x for x in data.columns if x not in resulting_list)
-        data = data.combine_first(old)[resulting_list]
-
-        # corrections in not set flags
-        for a, b, c in grouper(3, data):
-            data[a] = data[a].replace(np.nan, -9999)
-            data[b] = data[b].replace(np.nan, -9999)
-            data[c] = data[c].replace(np.nan, 92)
-
    data.to_csv(fname)
    for time, values in data.groupby(by=pd.Grouper(freq='AS', closed="right")):
        values.to_csv(f"{ROOT}/{device.station_path}/derived/dendrometer/{device.station_key}_bhd_trees_10min.level2_{time.year}.csv")
 @@ -162,0+143,0 @@
    logger.debug("processing")
    data, flags = splitTable(df, index_level="varnames")
    data = filter(data, flags)
+    # in case, an error in the pipeline has produced a level2 dataframe only with -9999, the device will be skipped here
+    if data.isna().values.all():
+        return
    manflags.index = manflags.index.str.split(' ').str[0]
    manflags = manflags.set_index('end', append=True)
    manflags = manflags.drop(['start', 'flag', 'comment'], axis=1)
 @@ -184,18 +168,18 @@ def main(station, device, start_date, end_date, debug):
            with exceptionLogged(logger, f"{device}: failed", fail=debug):
                final_out_list.append(procDevice(logger, device))

-                logger.debug("writing")
-                final = pd.concat(final_out_list, axis=1)
-                final.index.name = 'Date Time'
+        logger.debug("writing")
+        final = pd.concat(final_out_list, axis=1)
+        final.index.name = 'Date Time'

-                # corrections in index Order and not setted flags
-                final = final.sort_index()
-                for a, b, c in grouper(3, final):
-                    final[a] = final[a].replace(np.nan, -9999)
-                    final[b] = final[b].replace(np.nan, -9999)
-                    final[c] = final[c].replace(np.nan, 92)
+        # corrections in index Order and not setted flags
+        final = final.sort_index()
+        for a, b, c in grouper(3, final):
+            final[a] = final[a].replace(np.nan, -9999)
+            final[b] = final[b].replace(np.nan, -9999)
+            final[c] = final[c].replace(np.nan, 92)

-                writeData(final, device)
+        writeData(final, device)


 if __name__ == "__main__":