diff --git a/benchmarks/data/test_tools.py b/benchmarks/data/test_tools.py index 0912b7a11141d2a46a72e1f41b44179cbe90eb89..c9baf1a880818e171c2064993d9e0cb3df0ac6d5 100644 --- a/benchmarks/data/test_tools.py +++ b/benchmarks/data/test_tools.py @@ -4,15 +4,18 @@ import unittest import pytest import finam as fm -from finam.data import ( +from finam.data.tools import ( assign_time, check, + compatible_units, + equivalent_units, full, full_like, get_magnitude, get_time, get_units, has_time, + is_quantified, strip_data, strip_time, to_units, @@ -188,6 +191,13 @@ class TestTimeTools(unittest.TestCase): xdata = full(0.0, "test", info, time) _result = self.benchmark(get_time, xdata=xdata) + @pytest.mark.benchmark(group="data-tools") + def test_get_time_neg(self): + time = dt.datetime(1800, 1, 1) + info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="m") + xdata = full(0.0, "test", info, time) + _result = self.benchmark(get_time, xdata=xdata) + @pytest.mark.benchmark(group="data-tools") def test_has_time(self): time = dt.datetime(2000, 1, 1) @@ -208,6 +218,36 @@ class TestUnitsTools(unittest.TestCase): xdata = full(0.0, "test", info, time) _result = self.benchmark(get_units, xdata=xdata) + @pytest.mark.benchmark(group="data-tools") + def test_is_quantified(self): + time = dt.datetime(2000, 1, 1) + info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="m") + xdata = full(0.0, "test", info, time) + _result = self.benchmark(is_quantified, xdata=xdata) + + @pytest.mark.benchmark(group="data-tools") + def test_equivalent_units_true(self): + time = dt.datetime(2000, 1, 1) + info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="mm") + xdata = full(0.0, "test", info, time) + result = self.benchmark(equivalent_units, unit1=xdata, unit2="L/m^2") + self.assertTrue(result) + + @pytest.mark.benchmark(group="data-tools") + def test_equivalent_units_False(self): + time = dt.datetime(2000, 1, 1) + info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="mm") + xdata = full(0.0, "test", info, time) + result = self.benchmark(equivalent_units, unit1=xdata, unit2="m") + self.assertFalse(result) + + @pytest.mark.benchmark(group="data-tools") + def test_compatible_units(self): + time = dt.datetime(2000, 1, 1) + info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="mm") + xdata = full(0.0, "test", info, time) + _result = self.benchmark(compatible_units, unit1=xdata, unit2="km") + @pytest.mark.benchmark(group="data-tools-slow") def test_to_units_01_2x1(self): time = dt.datetime(2000, 1, 1) @@ -229,21 +269,21 @@ class TestUnitsTools(unittest.TestCase): xdata = full(0.0, "test", info, time) _result = self.benchmark(to_units, xdata=xdata, units="in") - @pytest.mark.benchmark(group="data-tools-slow") + @pytest.mark.benchmark(group="data-tools") def test_to_units_noop_01_2x1(self): time = dt.datetime(2000, 1, 1) info = fm.Info(time=time, grid=fm.UniformGrid((2, 1)), units="m") xdata = full(0.0, "test", info, time) _result = self.benchmark(to_units, xdata=xdata, units="m") - @pytest.mark.benchmark(group="data-tools-slow") + @pytest.mark.benchmark(group="data-tools") def test_to_units_noop_02_512x256(self): time = dt.datetime(2000, 1, 1) info = fm.Info(time=time, grid=fm.UniformGrid((512, 256)), units="m") xdata = full(0.0, "test", info, time) _result = self.benchmark(to_units, xdata=xdata, units="m") - @pytest.mark.benchmark(group="data-tools-slow") + @pytest.mark.benchmark(group="data-tools") def test_to_units_noop_03_2048x1024(self): time = dt.datetime(2000, 1, 1) info = fm.Info(time=time, grid=fm.UniformGrid((2048, 1024)), units="m") diff --git a/benchmarks/profiling/simple_numpy.py b/benchmarks/profiling/simple_numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..4a6a4d9869470466db8c91686efb5edd415fee0a --- /dev/null +++ b/benchmarks/profiling/simple_numpy.py @@ -0,0 +1,57 @@ +"""Simple coupling setup for profiling, using numpy arrays. + +Two components, coupled via a single link. + +Simulation runs for 1 year with a daily step in both components. +Components exchange a 128x64 uniform grid. +""" +import datetime as dt + +import finam as fm + + +def run_model(): + start_time = dt.datetime(2000, 1, 1) + end_time = dt.datetime(2000, 12, 31) + + counter = 0 + + size = (128, 64) + + info1 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m") + info2 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m") + data = [ + fm.data.strip_data(fm.data.full(0.0, "input", info1, start_time)), + fm.data.strip_data(fm.data.full(0.0, "input", info1, start_time)), + ] + + def gen_data(t): + nonlocal counter + d = data[counter % 2] + counter += 1 + return d + + source = fm.modules.CallbackGenerator( + callbacks={"Out": (gen_data, info1.copy())}, + start=start_time, + step=dt.timedelta(days=1), + ) + sink = fm.modules.DebugConsumer( + inputs={ + "In": info2.copy(), + }, + start=start_time, + step=dt.timedelta(days=1), + ) + + composition = fm.Composition([source, sink]) + composition.initialize() + + source["Out"] >> sink["In"] + + composition.run(end_time=end_time) + + +if __name__ == "__main__": + for i in range(10): + run_model() diff --git a/benchmarks/profiling/simple_run.py b/benchmarks/profiling/simple_xarray.py similarity index 100% rename from benchmarks/profiling/simple_run.py rename to benchmarks/profiling/simple_xarray.py diff --git a/benchmarks/run/test_run.py b/benchmarks/run/test_run.py index d5391bc4c8388e4a488c6bbc4f0bef78e273eaf3..86a633be3a37a9d2d605c7479c60b479752a8265 100644 --- a/benchmarks/run/test_run.py +++ b/benchmarks/run/test_run.py @@ -135,3 +135,54 @@ class TestSimpleRunUnits(SimpleRunBase): @pytest.mark.benchmark(group="run-sim") def test_run_units_08_2048x1024(self): self.run_test(2048, 1024) + + +class TestSimpleRunNumpy(SimpleRunBase): + @pytest.fixture(autouse=True) + def setupBenchmark(self, benchmark): + self.setup(benchmark) + + def setup_data(self, size): + self.info1 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m") + self.info2 = fm.Info(time=None, grid=fm.UniformGrid(size), units="m") + self.data = [ + fm.data.strip_data(fm.data.full(0.0, "input", self.info1, self.start_time)), + fm.data.strip_data(fm.data.full(0.0, "input", self.info1, self.start_time)), + ] + + def gen_data(self, t): + d = self.data[self.counter % 2] + self.counter += 1 + return d + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_01_2x1(self): + self.run_test(2, 1) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_02_32x16(self): + self.run_test(32, 16) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_03_64x32(self): + self.run_test(64, 32) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_04_128x64(self): + self.run_test(128, 64) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_05_256x128(self): + self.run_test(256, 128) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_06_512x256(self): + self.run_test(512, 256) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_07_1024x512(self): + self.run_test(1024, 512) + + @pytest.mark.benchmark(group="run-sim") + def test_run_numpy_08_2048x1024(self): + self.run_test(2048, 1024) diff --git a/benchmarks/run_profiling.sh b/benchmarks/run_profiling.sh index 3e978005bfd317fcb4fe072247ee28329b23e391..7e4aa2ba51edb1930471fc83250c5ec1c4558054 100755 --- a/benchmarks/run_profiling.sh +++ b/benchmarks/run_profiling.sh @@ -1,10 +1,16 @@ +#!/bin/bash echo Profiling... mkdir -p prof -python -m cProfile -o prof/simple_run.pstats benchmarks/profiling/simple_run.py -gprof2dot --colour-nodes-by-selftime -f pstats prof/simple_run.pstats > prof/simple_run.dot -dot -Tsvg -o prof/simple_run.svg prof/simple_run.dot -dot -Tpng -o prof/simple_run.png prof/simple_run.dot +for filename in benchmarks/profiling/*.py; do + fn=$(basename -- "$filename") + fn="${fn%.*}" + echo "$fn" + python -m cProfile -o prof/"$fn".pstats benchmarks/profiling/"$fn".py + gprof2dot --colour-nodes-by-selftime -f pstats prof/"$fn".pstats > prof/"$fn".dot + dot -Tsvg -o prof/"$fn".svg prof/"$fn".dot + dot -Tpng -o prof/"$fn".png prof/"$fn".dot +done python benchmarks/pstats_to_csv.py diff --git a/src/finam/data/tools.py b/src/finam/data/tools.py index 3850516ed18c4a5c453fc53942d1f7849ce6ef4b..9614d5c653b506887d5ba226f914c68741039c3b 100644 --- a/src/finam/data/tools.py +++ b/src/finam/data/tools.py @@ -26,18 +26,6 @@ pint_xarray.unit_registry.default_format = "cf" UNITS = pint_xarray.unit_registry -def _extract_units(xdata): - """ - extract the units of an array - - If ``xdata.data`` is not a quantity, the units are ``None`` - """ - try: - return xdata.data.units - except AttributeError: - return None - - def _gen_dims(ndim, info): """ Generate dimension names. @@ -204,7 +192,6 @@ def has_time_axis(xdata): bool Whether the data has a time axis. """ - check_quantified(xdata, "has_time") return "time" in xdata.coords @@ -248,7 +235,8 @@ def assign_time(xdata, time): if isinstance(time, datetime.datetime): time = [time] if has_time_axis(xdata): - return xdata.assign_coords(dict(time=[pd.Timestamp(t) for t in time])) + xdata.coords["time"].data[:] = [pd.Timestamp(t) for t in time] + return xdata return xdata.expand_dims(dim="time", axis=0).assign_coords( dict(time=[pd.Timestamp(t) for t in time]) @@ -272,11 +260,29 @@ def get_time(xdata): if has_time_axis(xdata): time = xdata["time"] if time.size == 1: - time = [time.item()] - return list(pd.to_datetime(time).to_pydatetime()) + return [to_datetime(time.data[0])] + return [to_datetime(t) for t in time.data] return None +_BASE_DATETIME = datetime.datetime(1970, 1, 1) +_BASE_TIME = np.datetime64("1970-01-01T00:00:00") +_BASE_DELTA = np.timedelta64(1, "s") + + +def to_datetime(date): + """Converts a numpy datetime64 object to a python datetime object""" + if np.isnan(date): + return pd.NaT + + timestamp = (date - _BASE_TIME) / _BASE_DELTA + + if timestamp < 0: + return _BASE_DATETIME + datetime.timedelta(seconds=timestamp) + + return datetime.datetime.utcfromtimestamp(timestamp) + + def get_magnitude(xdata): """ Get magnitude of given data. @@ -368,7 +374,6 @@ def get_units(xdata): pint.Unit Units of the data. """ - check_quantified(xdata, "get_units") return xdata.pint.units @@ -407,7 +412,10 @@ def to_units(xdata, units): Converted data. """ check_quantified(xdata, "to_units") - return xdata.pint.to(pint.Unit(units)) + units = UNITS.Unit(units) + if units == xdata.pint.units: + return xdata + return xdata.pint.to(units) def full_like(xdata, value): @@ -591,7 +599,7 @@ def is_quantified(xdata): bool Wether the data is a quantified DataArray. """ - return isinstance(xdata, xr.DataArray) and _extract_units(xdata) is not None + return isinstance(xdata, xr.DataArray) and xdata.pint.units is not None def quantify(xdata): @@ -634,7 +642,11 @@ def check_quantified(xdata, routine="check_quantified"): def _get_pint_units(var): if var is None: raise FinamDataError("Can't extract units from 'None'.") - return get_units(var) if is_quantified(var) else UNITS.Unit(var) + + if isinstance(var, xr.DataArray): + return var.pint.units or UNITS.dimensionless + + return UNITS.Unit(var) def compatible_units(unit1, unit2): @@ -674,8 +686,10 @@ def equivalent_units(unit1, unit2): Unit equivalence. """ unit1, unit2 = _get_pint_units(unit1), _get_pint_units(unit2) - ratio = ((1 * unit1) / (1 * unit2)).to_base_units() - return ratio.dimensionless and np.isclose(ratio.magnitude, 1) + try: + return np.isclose((1.0 * unit1).to(unit2).magnitude, 1.0) + except pint.errors.DimensionalityError: + return False def assert_type(cls, slot, obj, types): diff --git a/src/finam/tools/connect_helper.py b/src/finam/tools/connect_helper.py index 9a6906800d100307ebaebd443ac9d9ffbdc5f385..024c18433055e92fa05ebcdff4e508b5e3f8d65a 100644 --- a/src/finam/tools/connect_helper.py +++ b/src/finam/tools/connect_helper.py @@ -526,10 +526,10 @@ class ConnectHelper(Loggable): out.push_data(data, None) elif info_time != time: if isinstance(data, xr.DataArray): - data_1 = assign_time(data, time) + data_1 = assign_time(data.copy(), time) out.push_data(data_1, time) data_2 = assign_time(data, info_time) - out.push_data(data_2.copy(), info_time) + out.push_data(data_2, info_time) else: out.push_data(data, time) out.push_data(copy.copy(data), info_time) diff --git a/tests/core/test_schedule.py b/tests/core/test_schedule.py index b71a8fdaae5fdb642d138e1a373ce84bc43ac465..2fc16202e22559e9e6bc58971ae7c09af7e15db8 100644 --- a/tests/core/test_schedule.py +++ b/tests/core/test_schedule.py @@ -963,7 +963,7 @@ class TestComposition(unittest.TestCase): return t.day def lambda_component(inp, t): - return {"Out": fm.data.assign_time(inp["In"], t)} + return {"Out": fm.data.assign_time(inp["In"].copy(), t)} def lambda_debugger(name, data, t): updates[name].append(t.day) diff --git a/tests/data/test_tools.py b/tests/data/test_tools.py index b628f04a41b0c42f66beb5f53e10fa00471471b1..a41ac033cf2a5a7098ee9f79cb3384f6094c93ad 100644 --- a/tests/data/test_tools.py +++ b/tests/data/test_tools.py @@ -147,6 +147,7 @@ class TestDataTools(unittest.TestCase): self.assertEqual(xdata.shape, (1,)) self.assertTrue(finam.data.has_time_axis(xdata)) self.assertTrue(finam.data.has_time(xdata)) + self.assertEqual(finam.data.get_time(xdata), [time]) xdata = xr.concat( [ @@ -162,6 +163,7 @@ class TestDataTools(unittest.TestCase): ) self.assertTrue(finam.data.has_time_axis(xdata)) self.assertTrue(finam.data.has_time(xdata)) + self.assertEqual(finam.data.get_time(xdata), [time, dt(2000, 1, 2)]) xdata = finam.data.to_xarray(1.0, "data", finam.Info(None, grid=finam.NoGrid())) self.assertEqual(xdata.shape, (1,))