Skip to content
Snippets Groups Projects
Commit 8144f3c4 authored by Bert Palm's avatar Bert Palm 🎇
Browse files

dos->dios

parent 200b7aec
No related branches found
No related tags found
2 merge requests!2Develop,!1complete rework
......@@ -44,14 +44,9 @@ def _gen_testset(rowsz, colsz, freq='1min', disalign=True, randstart=True):
return df, dos
def _gen_df(rowsz, colsz, freq='1min', disalign=True, randstart=True):
df, _ = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart)
return df
def gen_dos(rowsz, colsz, freq='1min', disalign=True, randstart=True):
_, dos = _gen_testset(rowsz=rowsz, colsz=colsz, freq=freq, disalign=disalign, randstart=randstart)
return dos
def get_random_df_and_dios(rowsz, colsz, freq='1min', disalign=True, randstart=True):
df, _, _, dios, *_ = get_testset(rowsz, colsz, freq=freq, disalign=disalign, randstart=randstart)
return df, dios
def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir='testsets', noresult=False):
......@@ -63,11 +58,11 @@ def get_testset(rows, cols, freq='1s', disalign=True, randstart=True, storagedir
return
tup = pickle.load(fh)
except (pickle.UnpicklingError, FileNotFoundError):
df_, dos_ = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart)
df_ = df_.sort_index(axis=0, level=0)
a_ = df_.copy().stack(dropna=False).sort_index(axis=0, level=0).copy()
b_ = df_.copy().unstack().sort_index(axis=0, level=0).copy()
tup = df_, a_, b_, dos_
df, dios = _gen_testset(rowsz=rows, colsz=cols, freq=freq, disalign=disalign, randstart=randstart)
df = df.sort_index(axis=0, level=0)
df_type_a = df.copy().stack(dropna=False).sort_index(axis=0, level=0).copy()
df_type_b = df.copy().unstack().sort_index(axis=0, level=0).copy()
tup = df, df_type_a, df_type_b, dios
with open(fpath, 'wb') as fh:
pickle.dump(tup, fh)
......
import gc
from profiling import get_testset, _gen_testset
from profiling.generate_testsets import get_random_df_and_dios
def calc_mem(rows, cols, shifted=False, dtypesz=(64 / 8)):
......@@ -36,7 +36,7 @@ def rows_by_time(nsec, mdays):
if __name__ == '__main__':
# dos - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
# dios - linear in rows and colums, same size for r=10,c=100 or r=100,c=10
do_real_check = True
cols = 10
rows = 100000
......@@ -45,14 +45,14 @@ if __name__ == '__main__':
mem = calc_mem(rows, cols, shifted=False)
memsh = calc_mem(rows, cols, shifted=True)
df, _, _, dos = get_testset(rows, cols, disalign=False, randstart=True)
dos_mem = dos.memory_usage()
print(f"dos:\n-----------")
print("mem: ", *bytes2hread(dos_mem))
print("entries:", sum([len(dos[e]) for e in dos]))
df, dios = get_random_df_and_dios(rows, cols, disalign=False, randstart=True)
dios_mem = dios.memory_usage()
print(f"dios:\n-----------")
print("mem: ", *bytes2hread(dios_mem))
print("entries:", sum([len(dios[e]) for e in dios]))
print()
ratio = (1 / (memsh - mem) ) * dos_mem
ratio = (1 / (memsh - mem) ) * dios_mem
mem = bytes2hread(mem)
memsh = bytes2hread(memsh)
......@@ -66,7 +66,7 @@ if __name__ == '__main__':
print("entries:", rows * cols)
print()
print(f"dfbest, dos, dfworst: 0%, {round(ratio, 4)*100}%, 100% ")
print(f"dfbest, dios, dfworst: 0%, {round(ratio, 4)*100}%, 100% ")
if not do_real_check:
exit(0)
......@@ -77,7 +77,7 @@ if __name__ == '__main__':
# best case
print()
print('best case proove')
dfb, _ = _gen_testset(rows, cols, disalign=False, randstart=False)
dfb, _ = get_random_df_and_dios(rows, cols, disalign=False, randstart=False)
dfb.info(memory_usage='deep', verbose=False)
print()
......@@ -87,7 +87,7 @@ if __name__ == '__main__':
print()
print('rand start, rand freq')
df, _ = get_testset(rows, cols, disalign='random', randstart=True)
df, _ = get_random_df_and_dios(rows, cols, disalign='random', randstart=True)
df.info(memory_usage='deep', verbose=False)
print("entries:", sum([len(df[e]) for e in df]))
......@@ -95,7 +95,7 @@ if __name__ == '__main__':
# worst case
print()
print('worst case proove')
df, _ = _gen_testset(rows, cols, disalign=True, randstart=False)
df, _ = get_random_df_and_dios(rows, cols, disalign=True, randstart=False)
df.info(memory_usage='deep', verbose=False)
gc.collect()
import pandas as pd
import numpy as np
import time
from profiling import get_testset, var_prefix
from profiling.generate_testsets import get_testset, var_prefix
profile_assignment = False
......@@ -61,20 +61,20 @@ def b_timings(df, t0, t1, v1, v2):
return a, b, df
def dos_timings(dos, t0, t1, v1, v2):
def dios_timings(dios, t0, t1, v1, v2):
_t0 = time.time()
a = dos[t0:t1, :]
a = dios[t0:t1, :]
_t1 = time.time()
b = dos[:, v1]
b = dios[:, v1]
_t2 = time.time()
if profile_assignment:
dos[t0:t1, v1] = dos[t0:t1, v1] * 1111
dios[t0:t1, v1] = dios[t0:t1, v1] * 1111
_t3 = time.time()
timingsdf.at[rows, ('ts', 'dios')] += _t1 - _t0
timingsdf.at[rows, ('var', 'dios')] += _t2 - _t1
timingsdf.at[rows, ('ass', 'dios')] += _t3 - _t2
return a, b, dos
return a, b, dios
def gen_random_timestamps(m, M):
......@@ -116,7 +116,7 @@ if __name__ == '__main__':
use_df = True
use_a = False
use_b = False
use_dos = True
use_dios = True
# plot options
normalize_to_df = False
......@@ -132,7 +132,7 @@ if __name__ == '__main__':
timingsdf.loc[rows] = (0,) * len(timingsdf.columns)
df, a, b, dos = get_testset(rows, cols)
df, a, b, dios = get_testset(rows, cols)
t0, t4 = find_index_range(df)
if use_df or normalize_to_df:
......@@ -153,11 +153,11 @@ if __name__ == '__main__':
vr1 = var_prefix + str(np.random.randint(0, cols))
b_timings(b, t1, t2, vr1, None)
if use_dos:
if use_dios:
for r in range(runs):
t1, t2 = gen_random_timestamps(t0, t4)
vr1 = var_prefix + str(np.random.randint(0, cols))
dos_timings(dos, t1, t2, vr1, None)
dios_timings(dios, t1, t2, vr1, None)
# calc the average
timingsdf /= runs
......@@ -198,7 +198,7 @@ if __name__ == '__main__':
a.plot(logy=plot_ylog, logx=plot_xlog, linestyle='--', ax=ax)
if use_b:
b.plot(logy=plot_ylog, logx=plot_xlog, linestyle=':', ax=ax)
if use_dos:
if use_dios:
dios.plot(logy=plot_ylog, logx=plot_xlog, linestyle='-.', ax=ax)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment