Skip to content
Snippets Groups Projects
Commit 8144f3c4 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

dos->dios

parent 200b7aec
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
...@@ -44,14 +44,9 @@ def _gen_testset(rowsz, colsz, freq='1min', disalign=True, randstart=True): ...@@ -44,14 +44,9 @@ def _gen_testset(rowsz, colsz, freq='1min', disalign=True, randstart=True):
return df, dos return df, dos
def _gen_df(rowsz, colsz, freq='1min', disalign=True, randstart=True): def get_random_df_and_dios(rowsz, colsz, freq='1min', disalign=True, randstart=True):
df, _ = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart) df, _, _, dios, *_ = get_testset(rowsz, colsz, freq=freq, disalign=disalign, randstart=randstart)
return df return df, dios
def gen_dos(rowsz, colsz, freq='1min', disalign=True, randstart=True):
_, dos = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart)
return dos
def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir='testsets', noresult=False): def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir='testsets', noresult=False):
...@@ -63,11 +58,11 @@ def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir ...@@ -63,11 +58,11 @@ def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir
return return
tup = pickle.load(fh) tup = pickle.load(fh)
except (pickle.UnpicklingError, FileNotFoundError): except (pickle.UnpicklingError, FileNotFoundError):
df_, dos_ = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart) df, dios = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart)
df_ = df_.sort_index(axis=0, level=0) df = df.sort_index(axis=0, level=0)
a_ = df_.copy().stack(dropna=False).sort_index(axis=0, level=0).copy() df_type_a = df.copy().stack(dropna=False).sort_index(axis=0, level=0).copy()
b_ = df_.copy().unstack().sort_index(axis=0, level=0).copy() df_type_b = df.copy().unstack().sort_index(axis=0, level=0).copy()
tup = df_, a_, b_, dos_ tup = df, df_type_a, df_type_b, dios
with open(fpath, 'wb') as fh: with open(fpath, 'wb') as fh:
pickle.dump(tup, fh) pickle.dump(tup, fh)
......
import gc import gc
from profiling import get_testset, _gen_testset from profiling.generate_testsets import get_random_df_and_dios
def calc_mem(rows, cols, shifted=False, dtypesz=(64 / 8)): def calc_mem(rows, cols, shifted=False, dtypesz=(64 / 8)):
...@@ -36,7 +36,7 @@ def rows_by_time(nsec, mdays): ...@@ -36,7 +36,7 @@ def rows_by_time(nsec, mdays):
if __name__ == '__main__': if __name__ == '__main__':
# dos - linear in rows and colums, same size for r=10,c=100 or r=100,c=10 # dios - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
do_real_check = True do_real_check = True
cols = 10 cols = 10
rows = 100000 rows = 100000
...@@ -45,14 +45,14 @@ if __name__ == '__main__': ...@@ -45,14 +45,14 @@ if __name__ == '__main__':
mem = calc_mem(rows, cols, shifted=False) mem = calc_mem(rows, cols, shifted=False)
memsh = calc_mem(rows, cols, shifted=True) memsh = calc_mem(rows, cols, shifted=True)
df, _, _, dos = get_testset(rows, cols, disalign=False, randstart=True) df, dios = get_random_df_and_dios(rows, cols, disalign=False, randstart=True)
dos_mem = dos.memory_usage() dios_mem = dios.memory_usage()
print(f"dos:\n-----------") print(f"dios:\n-----------")
print("mem: ", *bytes2hread(dos_mem)) print("mem: ", *bytes2hread(dios_mem))
print("entries:", sum([len(dos[e]) for e in dos])) print("entries:", sum([len(dios[e]) for e in dios]))
print() print()
ratio = (1 / (memsh - mem) ) * dos_mem ratio = (1 / (memsh - mem) ) * dios_mem
mem = bytes2hread(mem) mem = bytes2hread(mem)
memsh = bytes2hread(memsh) memsh = bytes2hread(memsh)
...@@ -66,7 +66,7 @@ if __name__ == '__main__': ...@@ -66,7 +66,7 @@ if __name__ == '__main__':
print("entries:", rows * cols) print("entries:", rows * cols)
print() print()
print(f"dfbest, dos, dfworst: 0%, {round(ratio, 4)*100}%, 100% ") print(f"dfbest, dios, dfworst: 0%, {round(ratio, 4)*100}%, 100% ")
if not do_real_check: if not do_real_check:
exit(0) exit(0)
...@@ -77,7 +77,7 @@ if __name__ == '__main__': ...@@ -77,7 +77,7 @@ if __name__ == '__main__':
# best case # best case
print() print()
print('best case proove') print('best case proove')
dfb, _ = _gen_testset(rows, cols, disalign=False, randstart=False) dfb, _ = get_random_df_and_dios(rows, cols, disalign=False, randstart=False)
dfb.info(memory_usage='deep', verbose=False) dfb.info(memory_usage='deep', verbose=False)
print() print()
...@@ -87,7 +87,7 @@ if __name__ == '__main__': ...@@ -87,7 +87,7 @@ if __name__ == '__main__':
print() print()
print('rand start, rand freq') print('rand start, rand freq')
df, _ = get_testset(rows, cols, disalign='random', randstart=True) df, _ = get_random_df_and_dios(rows, cols, disalign='random', randstart=True)
df.info(memory_usage='deep', verbose=False) df.info(memory_usage='deep', verbose=False)
print("entries:", sum([len(df[e]) for e in df])) print("entries:", sum([len(df[e]) for e in df]))
...@@ -95,7 +95,7 @@ if __name__ == '__main__': ...@@ -95,7 +95,7 @@ if __name__ == '__main__':
# worst case # worst case
print() print()
print('worst case proove') print('worst case proove')
df, _ = _gen_testset(rows, cols, disalign=True, randstart=False) df, _ = get_random_df_and_dios(rows, cols, disalign=True, randstart=False)
df.info(memory_usage='deep', verbose=False) df.info(memory_usage='deep', verbose=False)
gc.collect() gc.collect()
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import time import time
from profiling import get_testset, var_prefix from profiling.generate_testsets import get_testset, var_prefix
profile_assignment = False profile_assignment = False
...@@ -61,20 +61,20 @@ def b_timings(df, t0, t1, v1, v2): ...@@ -61,20 +61,20 @@ def b_timings(df, t0, t1, v1, v2):
return a, b, df return a, b, df
def dos_timings(dos, t0, t1, v1, v2): def dios_timings(dios, t0, t1, v1, v2):
_t0 = time.time() _t0 = time.time()
a = dos[t0:t1, :] a = dios[t0:t1, :]
_t1 = time.time() _t1 = time.time()
b = dos[:, v1] b = dios[:, v1]
_t2 = time.time() _t2 = time.time()
if profile_assignment: if profile_assignment:
dos[t0:t1, v1] = dos[t0:t1, v1] * 1111 dios[t0:t1, v1] = dios[t0:t1, v1] * 1111
_t3 = time.time() _t3 = time.time()
timingsdf.at[rows, ('ts', 'dios')] += _t1 - _t0 timingsdf.at[rows, ('ts', 'dios')] += _t1 - _t0
timingsdf.at[rows, ('var', 'dios')] += _t2 - _t1 timingsdf.at[rows, ('var', 'dios')] += _t2 - _t1
timingsdf.at[rows, ('ass', 'dios')] += _t3 - _t2 timingsdf.at[rows, ('ass', 'dios')] += _t3 - _t2
return a, b, dos return a, b, dios
def gen_random_timestamps(m, M): def gen_random_timestamps(m, M):
...@@ -116,7 +116,7 @@ if __name__ == '__main__': ...@@ -116,7 +116,7 @@ if __name__ == '__main__':
use_df = True use_df = True
use_a = False use_a = False
use_b = False use_b = False
use_dos = True use_dios = True
# plot options # plot options
normalize_to_df = False normalize_to_df = False
...@@ -132,7 +132,7 @@ if __name__ == '__main__': ...@@ -132,7 +132,7 @@ if __name__ == '__main__':
timingsdf.loc[rows] = (0,) * len(timingsdf.columns) timingsdf.loc[rows] = (0,) * len(timingsdf.columns)
df, a, b, dos = get_testset(rows, cols) df, a, b, dios = get_testset(rows, cols)
t0, t4 = find_index_range(df) t0, t4 = find_index_range(df)
if use_df or normalize_to_df: if use_df or normalize_to_df:
...@@ -153,11 +153,11 @@ if __name__ == '__main__': ...@@ -153,11 +153,11 @@ if __name__ == '__main__':
vr1 = var_prefix + str(np.random.randint(0, cols)) vr1 = var_prefix + str(np.random.randint(0, cols))
b_timings(b, t1, t2, vr1, None) b_timings(b, t1, t2, vr1, None)
if use_dos: if use_dios:
for r in range(runs): for r in range(runs):
t1, t2 = gen_random_timestamps(t0, t4) t1, t2 = gen_random_timestamps(t0, t4)
vr1 = var_prefix + str(np.random.randint(0, cols)) vr1 = var_prefix + str(np.random.randint(0, cols))
dos_timings(dos, t1, t2, vr1, None) dios_timings(dios, t1, t2, vr1, None)
# calc the average # calc the average
timingsdf /= runs timingsdf /= runs
...@@ -198,7 +198,7 @@ if __name__ == '__main__': ...@@ -198,7 +198,7 @@ if __name__ == '__main__':
a.plot(logy=plot_ylog, logx=plot_xlog, linestyle='--', ax=ax) a.plot(logy=plot_ylog, logx=plot_xlog, linestyle='--', ax=ax)
if use_b: if use_b:
b.plot(logy=plot_ylog, logx=plot_xlog, linestyle=':', ax=ax) b.plot(logy=plot_ylog, logx=plot_xlog, linestyle=':', ax=ax)
if use_dos: if use_dios:
dios.plot(logy=plot_ylog, logx=plot_xlog, linestyle='-.', ax=ax) dios.plot(logy=plot_ylog, logx=plot_xlog, linestyle='-.', ax=ax)
plt.show() plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment