Merge branch 'benchmark-base-run' into 'main'

Benchmark full run, profiling See merge request !221

Merge branch 'benchmark-base-run' into 'main'
Benchmark full run, profiling See merge request !221
bfddf831 · Martin Lange · eb5bfd41 · ec7c5199 · bfddf831 · bfddf831
Commit bfddf831 authored 2 years ago by Martin Lange
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ coverage.xml
 *.cover
 .hypothesis/
 /bench/
+/prof/

 # Translations
 *.mo

--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -40,6 +40,19 @@ benchmark:
    paths:
      - bench

+profile:
+  stage: test
+  before_script:
+    - apt-get update -y
+    - apt-get install -y graphviz
+    - pip3 install graphviz gprof2dot
+  script:
+    - pip3 install --editable .[test]
+    - ./benchmarks/run_profiling.sh
+  artifacts:
+    paths:
+      - prof
+
 doctest:
  stage: test
  script:

--- a/benchmarks/README.md
+++ b/benchmarks/README.md
 # FINAM benchmarks

-Micro-benchmarks for important FINAM functions and functionality.
+Micro-benchmarks and profiling for important FINAM runs functions.

 Note that plot panels have different units!
 `ms` is milliseconds (1/1,000 second), `us` is microseconds (1/1,000,000 second).

+## Full runs
+
+**Profiling data** for full runs can be found in the latest [job artifacts](https://git.ufz.de/FINAM/finam/-/jobs/artifacts/main/browse/prof?job=profile).
+
+### Simple link, 365 steps
+
+Simple run over one year with two coupled components with daily time step.
+Left without, right with units conversion.
+
+The source component assigns time to a data array each step.
+Remaining time is data exchange and scheduling (negligible).
+
+![tools](https://git.ufz.de/FINAM/finam/-/jobs/artifacts/main/raw/bench/bench-run-sim.svg?job=benchmark)
+
 ## SDK

 ### Push & pull

--- a/benchmarks/profiling/simple_run.py
+++ b/benchmarks/profiling/simple_run.py
+"""Simple coupling setup for profiling.
+
+Two components, coupled via a single link.
+
+Simulation runs for 1 year with a daily step in both components.
+Components exchange a 128x64 uniform grid.
+"""
+import datetime as dt
+
+import finam as fm
+
+
+def run_model():
+    start_time = dt.datetime(2000, 1, 1)
+    end_time = dt.datetime(2000, 12, 31)
+
+    counter = 0
+
+    size = (128, 64)
+
+    info1 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m")
+    info2 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m")
+    data = [
+        fm.data.full(0.0, "input", info1, start_time),
+        fm.data.full(0.0, "input", info1, start_time),
+    ]
+
+    def gen_data(t):
+        nonlocal counter
+        d = data[counter % 2]
+        counter += 1
+        d = fm.data.assign_time(d, t)
+        return d
+
+    source = fm.modules.CallbackGenerator(
+        callbacks={"Out": (gen_data, info1.copy())},
+        start=start_time,
+        step=dt.timedelta(days=1),
+    )
+    sink = fm.modules.DebugConsumer(
+        inputs={
+            "In": info2.copy(),
+        },
+        start=start_time,
+        step=dt.timedelta(days=1),
+    )
+
+    composition = fm.Composition([source, sink])
+    composition.initialize()
+
+    source["Out"] >> sink["In"]
+
+    composition.run(end_time=end_time)
+
+
+if __name__ == "__main__":
+    for i in range(10):
+        run_model()
--- a/benchmarks/pstats_to_csv.py
+++ b/benchmarks/pstats_to_csv.py
+import io
+import os
+import pstats
+
+
+def _convert_to_csv(in_path, out_path):
+    result = io.StringIO()
+
+    pstats.Stats(in_path, stream=result).print_stats()
+    result = result.getvalue()
+    result = "ncalls" + result.split("ncalls")[-1]
+    result = "\n".join(
+        [",".join(line.rstrip().split(None, 5)) for line in result.split("\n")]
+    )
+    with open(out_path, "w+") as f:
+        f.write(result)
+        f.close()
+
+
+if __name__ == "__main__":
+    path = "prof/"
+    for file in os.listdir(path):
+        if file.endswith(".pstats"):
+            in_file = os.path.join(path, file)
+            out_file = os.path.join(path, file.replace(".pstats", ".csv"))
+            _convert_to_csv(in_file, out_file)
--- a/benchmarks/run/test_run.py
+++ b/benchmarks/run/test_run.py
+import datetime as dt
+import unittest
+
+import pytest
+
+import finam as fm
+
+
+class SimpleRunBase(unittest.TestCase):
+    def setup(self, benchmark):
+        self.benchmark = benchmark
+        self.start_time = dt.datetime(2000, 1, 1)
+        self.end_time = dt.datetime(2000, 12, 31)
+        self.counter = 0
+
+    def gen_data(self, t):
+        d = self.data[self.counter % 2]
+        self.counter += 1
+        d = fm.data.assign_time(d, t)
+        return d
+
+    def run_simulation(self):
+        source = fm.modules.CallbackGenerator(
+            callbacks={"Out": (self.gen_data, self.info1.copy())},
+            start=self.start_time,
+            step=dt.timedelta(days=1),
+        )
+        sink = fm.modules.DebugConsumer(
+            inputs={
+                "In": self.info2.copy(),
+            },
+            start=self.start_time,
+            step=dt.timedelta(days=1),
+        )
+
+        self.composition = fm.Composition([source, sink])
+        self.composition.initialize()
+
+        source["Out"] >> sink["In"]
+
+        self.composition.run(end_time=self.end_time)
+
+    def run_test(self, sx, sy):
+        self.setup_data(size=(sx, sy))
+        self.benchmark(self.run_simulation)
+
+
+class TestSimpleRun(SimpleRunBase):
+    @pytest.fixture(autouse=True)
+    def setupBenchmark(self, benchmark):
+        self.setup(benchmark)
+
+    def setup_data(self, size):
+        self.info1 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m")
+        self.info2 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m")
+        self.data = [
+            fm.data.full(0.0, "input", self.info1, self.start_time),
+            fm.data.full(0.0, "input", self.info1, self.start_time),
+        ]
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_01_2x1(self):
+        self.run_test(2, 1)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_02_32x16(self):
+        self.run_test(32, 16)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_03_64x32(self):
+        self.run_test(64, 32)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_04_128x64(self):
+        self.run_test(128, 64)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_05_256x128(self):
+        self.run_test(256, 128)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_06_512x256(self):
+        self.run_test(512, 256)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_07_1024x512(self):
+        self.run_test(1024, 512)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_simple_08_2048x1024(self):
+        self.run_test(2048, 1024)
+
+
+class TestSimpleRunUnits(SimpleRunBase):
+    @pytest.fixture(autouse=True)
+    def setupBenchmark(self, benchmark):
+        self.setup(benchmark)
+
+    def setup_data(self, size):
+        self.info1 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m")
+        self.info2 = fm.Info(time=None, grid=fm.UniformGrid(size), units="km")
+        self.data = [
+            fm.data.full(0.0, "input", self.info1, self.start_time),
+            fm.data.full(0.0, "input", self.info1, self.start_time),
+        ]
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_01_2x1(self):
+        self.run_test(2, 1)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_02_32x16(self):
+        self.run_test(32, 16)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_03_64x32(self):
+        self.run_test(64, 32)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_04_128x64(self):
+        self.run_test(128, 64)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_05_256x128(self):
+        self.run_test(256, 128)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_06_512x256(self):
+        self.run_test(512, 256)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_07_1024x512(self):
+        self.run_test(1024, 512)
+
+    @pytest.mark.benchmark(group="run-sim")
+    def test_run_units_08_2048x1024(self):
+        self.run_test(2048, 1024)
--- a/benchmarks/run_profiling.sh
+++ b/benchmarks/run_profiling.sh
+echo Profiling...
+
+mkdir -p prof
+
+python -m cProfile -o prof/simple_run.pstats benchmarks/profiling/simple_run.py
+gprof2dot --colour-nodes-by-selftime -f pstats prof/simple_run.pstats > prof/simple_run.dot
+dot -Tsvg -o prof/simple_run.svg prof/simple_run.dot
+dot -Tpng -o prof/simple_run.png prof/simple_run.dot
+
+python benchmarks/pstats_to_csv.py