From 8144f3c4dcf0f3ce7e3117467743593ad864292c Mon Sep 17 00:00:00 2001
From: Bert Palm <bert.palm@ufz.de>
Date: Thu, 13 Feb 2020 16:56:55 +0100
Subject: [PATCH] dos->dios

---
 profiling/generate_testsets.py | 21 ++++++++-------------
 profiling/memory.py            | 24 ++++++++++++------------
 profiling/performance.py       | 22 +++++++++++-----------
 3 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/profiling/generate_testsets.py b/profiling/generate_testsets.py
index df2d97e..9ec68ba 100644
--- a/profiling/generate_testsets.py
+++ b/profiling/generate_testsets.py
@@ -44,14 +44,9 @@ def _gen_testset(rowsz, colsz, freq='1min', disalign=True, randstart=True):
     return df, dos
 
 
-def _gen_df(rowsz, colsz, freq='1min', disalign=True, randstart=True):
-    df, _ = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart)
-    return df
-
-
-def gen_dos(rowsz, colsz, freq='1min', disalign=True, randstart=True):
-    _, dos = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart)
-    return dos
+def get_random_df_and_dios(rowsz, colsz, freq='1min', disalign=True, randstart=True):
+    df, _, _, dios, *_ = get_testset(rowsz, colsz, freq=freq, disalign=disalign, randstart=randstart)
+    return df, dios
 
 
 def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir='testsets', noresult=False):
@@ -63,11 +58,11 @@ def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir
                 return
             tup = pickle.load(fh)
     except (pickle.UnpicklingError, FileNotFoundError):
-        df_, dos_ = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart)
-        df_ = df_.sort_index(axis=0, level=0)
-        a_ = df_.copy().stack(dropna=False).sort_index(axis=0, level=0).copy()
-        b_ = df_.copy().unstack().sort_index(axis=0, level=0).copy()
-        tup = df_, a_, b_, dos_
+        df, dios = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart)
+        df = df.sort_index(axis=0, level=0)
+        df_type_a = df.copy().stack(dropna=False).sort_index(axis=0, level=0).copy()
+        df_type_b = df.copy().unstack().sort_index(axis=0, level=0).copy()
+        tup = df, df_type_a, df_type_b, dios
         with open(fpath, 'wb') as fh:
             pickle.dump(tup, fh)
 
diff --git a/profiling/memory.py b/profiling/memory.py
index 81f7f00..d577464 100644
--- a/profiling/memory.py
+++ b/profiling/memory.py
@@ -1,5 +1,5 @@
 import gc
-from profiling import get_testset, _gen_testset
+from profiling.generate_testsets import get_random_df_and_dios
 
 
 def calc_mem(rows, cols, shifted=False, dtypesz=(64 / 8)):
@@ -36,7 +36,7 @@ def rows_by_time(nsec, mdays):
 
 if __name__ == '__main__':
 
-    # dos      - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
+    # dios      - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
     do_real_check = True
     cols = 10
     rows = 100000
@@ -45,14 +45,14 @@ if __name__ == '__main__':
     mem = calc_mem(rows, cols, shifted=False)
     memsh = calc_mem(rows, cols, shifted=True)
 
-    df, _, _, dos = get_testset(rows, cols, disalign=False, randstart=True)
-    dos_mem = dos.memory_usage()
-    print(f"dos:\n-----------")
-    print("mem: ", *bytes2hread(dos_mem))
-    print("entries:", sum([len(dos[e]) for e in dos]))
+    df, dios = get_random_df_and_dios(rows, cols, disalign=False, randstart=True)
+    dios_mem = dios.memory_usage()
+    print(f"dios:\n-----------")
+    print("mem: ", *bytes2hread(dios_mem))
+    print("entries:", sum([len(dios[e]) for e in dios]))
     print()
 
-    ratio = (1 / (memsh - mem) ) * dos_mem
+    ratio = (1 / (memsh - mem) ) * dios_mem
 
     mem = bytes2hread(mem)
     memsh = bytes2hread(memsh)
@@ -66,7 +66,7 @@ if __name__ == '__main__':
     print("entries:", rows * cols)
 
     print()
-    print(f"dfbest, dos, dfworst: 0%, {round(ratio, 4)*100}%, 100% ")
+    print(f"dfbest, dios, dfworst: 0%, {round(ratio, 4)*100}%, 100% ")
 
     if not do_real_check:
         exit(0)
@@ -77,7 +77,7 @@ if __name__ == '__main__':
         # best case
         print()
         print('best case proove')
-        dfb, _ = _gen_testset(rows, cols, disalign=False, randstart=False)
+        dfb, _ = get_random_df_and_dios(rows, cols, disalign=False, randstart=False)
         dfb.info(memory_usage='deep', verbose=False)
 
     print()
@@ -87,7 +87,7 @@ if __name__ == '__main__':
 
     print()
     print('rand start, rand freq')
-    df, _ = get_testset(rows, cols, disalign='random', randstart=True)
+    df, _ = get_random_df_and_dios(rows, cols, disalign='random', randstart=True)
     df.info(memory_usage='deep', verbose=False)
     print("entries:", sum([len(df[e]) for e in df]))
 
@@ -95,7 +95,7 @@ if __name__ == '__main__':
         # worst case
         print()
         print('worst case proove')
-        df, _ = _gen_testset(rows, cols, disalign=True, randstart=False)
+        df, _ = get_random_df_and_dios(rows, cols, disalign=True, randstart=False)
         df.info(memory_usage='deep', verbose=False)
 
     gc.collect()
diff --git a/profiling/performance.py b/profiling/performance.py
index eb5c95a..1be82e8 100644
--- a/profiling/performance.py
+++ b/profiling/performance.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import numpy as np
 import time
-from profiling import get_testset, var_prefix
+from profiling.generate_testsets import get_testset, var_prefix
 
 profile_assignment = False
 
@@ -61,20 +61,20 @@ def b_timings(df, t0, t1, v1, v2):
     return a, b, df
 
 
-def dos_timings(dos, t0, t1, v1, v2):
+def dios_timings(dios, t0, t1, v1, v2):
     _t0 = time.time()
-    a = dos[t0:t1, :]
+    a = dios[t0:t1, :]
     _t1 = time.time()
-    b = dos[:, v1]
+    b = dios[:, v1]
     _t2 = time.time()
     if profile_assignment:
-        dos[t0:t1, v1] = dos[t0:t1, v1] * 1111
+        dios[t0:t1, v1] = dios[t0:t1, v1] * 1111
     _t3 = time.time()
 
     timingsdf.at[rows, ('ts', 'dios')] += _t1 - _t0
     timingsdf.at[rows, ('var', 'dios')] += _t2 - _t1
     timingsdf.at[rows, ('ass', 'dios')] += _t3 - _t2
-    return a, b, dos
+    return a, b, dios
 
 
 def gen_random_timestamps(m, M):
@@ -116,7 +116,7 @@ if __name__ == '__main__':
     use_df = True
     use_a = False
     use_b = False
-    use_dos = True
+    use_dios = True
 
     # plot options
     normalize_to_df = False
@@ -132,7 +132,7 @@ if __name__ == '__main__':
 
         timingsdf.loc[rows] = (0,) * len(timingsdf.columns)
 
-        df, a, b, dos = get_testset(rows, cols)
+        df, a, b, dios = get_testset(rows, cols)
         t0, t4 = find_index_range(df)
 
         if use_df or normalize_to_df:
@@ -153,11 +153,11 @@ if __name__ == '__main__':
                 vr1 = var_prefix + str(np.random.randint(0, cols))
                 b_timings(b, t1, t2, vr1, None)
 
-        if use_dos:
+        if use_dios:
             for r in range(runs):
                 t1, t2 = gen_random_timestamps(t0, t4)
                 vr1 = var_prefix + str(np.random.randint(0, cols))
-                dos_timings(dos, t1, t2, vr1, None)
+                dios_timings(dios, t1, t2, vr1, None)
 
     # calc the average
     timingsdf /= runs
@@ -198,7 +198,7 @@ if __name__ == '__main__':
         a.plot(logy=plot_ylog, logx=plot_xlog, linestyle='--', ax=ax)
     if use_b:
         b.plot(logy=plot_ylog, logx=plot_xlog, linestyle=':', ax=ax)
-    if use_dos:
+    if use_dios:
         dios.plot(logy=plot_ylog, logx=plot_xlog, linestyle='-.', ax=ax)
 
     plt.show()
-- 
GitLab