From 7270be1065af7b0b8b9b499c07e5f8b181db2a33 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Thu, 9 Nov 2023 09:56:39 +0100 Subject: [PATCH 1/2] Fix several performance issues when creating cfuncs from very large expressions. --- heyoka/_test_cfunc.py | 49 +++++++++++++++++++++++++++++++++++++++++-- heyoka/cfunc.cpp | 36 ++++++++++++------------------- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/heyoka/_test_cfunc.py b/heyoka/_test_cfunc.py index 161fcb14..ff914fbc 100644 --- a/heyoka/_test_cfunc.py +++ b/heyoka/_test_cfunc.py @@ -12,7 +12,7 @@ class cfunc_test_case(_ut.TestCase): def test_basic(self): - from . import make_cfunc, make_vars, cfunc_dbl, core + from . import make_cfunc, make_vars, cfunc_dbl, core, par, time import pickle from copy import copy, deepcopy @@ -34,7 +34,7 @@ def test_basic(self): "Cannot invoke a default-constructed compiled function" in str(cm.exception) ) - x, y, z = make_vars("x", "y", "z") + x, y, z, s = make_vars("x", "y", "z", "s") cf = make_cfunc([y * (x + z)]) self.assertFalse(cf.llvm_state_scalar.force_avx512) @@ -84,6 +84,51 @@ def test_basic(self): self.assertTrue(cf.llvm_state_batch.force_avx512) self.assertTrue(cf.llvm_state_batch.slp_vectorize) + # Tests for correct detection of number of params, time dependency + # and list of variables. + cf = make_cfunc( + [y * (x + z), x], vars=[y, z, x] + ) + self.assertEqual(cf.param_size, 0) + cf = make_cfunc( + [y * (x + z), par[0]], vars=[y, z, x] + ) + self.assertEqual(cf.param_size, 1) + cf = make_cfunc( + [y * (x + z) - par[89], par[0]], vars=[y, z, x] + ) + self.assertEqual(cf.param_size, 90) + + cf = make_cfunc( + [y * (x + z), x], vars=[y, z, x] + ) + self.assertFalse(cf.is_time_dependent) + cf = make_cfunc( + [y * (x + z) + time, x], vars=[y, z, x] + ) + self.assertTrue(cf.is_time_dependent) + cf = make_cfunc( + [y * (x + z), x + time], vars=[y, z, x] + ) + self.assertTrue(cf.is_time_dependent) + + cf = make_cfunc( + [y * (x + z), x + time] + ) + self.assertEqual(cf.list_var, [x, y, z]) + cf = make_cfunc( + [y * (x + z), x + time], vars=[y, z, x] + ) + self.assertEqual(cf.list_var, [y, z, x]) + cf = make_cfunc( + [y * (x + z), x + time], vars=[y, z, x, s] + ) + self.assertEqual(cf.list_var, [y, z, x, s]) + cf = make_cfunc( + [y * (x + z), x + time], vars=[s, y, z, x] + ) + self.assertEqual(cf.list_var, [s, y, z, x]) + # NOTE: test for a bug in the multiprecision # implementation where the precision is not # correctly copied. diff --git a/heyoka/cfunc.cpp b/heyoka/cfunc.cpp index 3c440d86..ceebacd0 100644 --- a/heyoka/cfunc.cpp +++ b/heyoka/cfunc.cpp @@ -14,10 +14,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -685,6 +683,9 @@ void expose_add_cfunc_impl(py::module &m, const char *suffix) cfunc_inst.def(py::init<>()); cfunc_inst.def("__call__", &cfunc::operator(), "inputs"_a, "outputs"_a = py::none{}, "pars"_a = py::none{}, "time"_a = py::none{}); + cfunc_inst.def_readonly("param_size", &cfunc::nparams); + cfunc_inst.def_readonly("is_time_dependent", &cfunc::is_time_dependent); + // NOTE: these can be probably simplified into def_readonly(). cfunc_inst.def_property_readonly("llvm_state_scalar", [](const cfunc &cf) -> const hey::llvm_state & { return cf.s_scal; }); cfunc_inst.def_property_readonly("llvm_state_batch", @@ -780,12 +781,8 @@ void expose_add_cfunc_impl(py::module &m, const char *suffix) } // Let's figure out if fn contains params and if it is time-dependent. - std::uint32_t nparams = 0; - bool is_time_dependent = false; - for (const auto &ex : fn) { - nparams = std::max(nparams, hey::get_param_size(ex)); - is_time_dependent = is_time_dependent || hey::is_time_dependent(ex); - } + const auto nparams = hey::get_param_size(fn); + const auto is_time_dependent = hey::is_time_dependent(fn); // Cache the number of variables and outputs. // NOTE: static casts are fine, because add_cfunc() @@ -803,21 +800,14 @@ void expose_add_cfunc_impl(py::module &m, const char *suffix) list_var = std::move(*vars); } else { - // NOTE: this is a bit of repetition from add_cfunc(). - // If this becomes an issue, we can consider in the - // future changing add_cfunc() to return also the number - // of detected variables. - std::set dvars; - for (const auto &ex : fn) { - for (const auto &var : hey::get_variables(ex)) { - dvars.emplace(var); - } - } - - nvars = static_cast(dvars.size()); - - std::transform(dvars.begin(), dvars.end(), std::back_inserter(list_var), - [](const auto &str) { return hey::expression{str}; }); + // NOTE: get_variables() returns an ordered list of strings, + // we need to convert it into a list of expressions. + const auto var_slist = hey::get_variables(fn); + list_var.reserve(var_slist.size()); + std::transform(var_slist.begin(), var_slist.end(), std::back_inserter(list_var), + [](const auto &name) { return hey::expression{name}; }); + + nvars = static_cast(list_var.size()); } // Prepare local buffers to store inputs, outputs, pars and time From 38806af355b2ae6cd7f515992e10b1c399ae2227 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Thu, 9 Nov 2023 10:41:09 +0100 Subject: [PATCH 2/2] Update changelog. --- doc/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index 722b6978..953d4cec 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -36,6 +36,8 @@ Changes Fix ~~~ +- Fix slow performance when creating very large compiled functions + (`#144 `__). - Fix building against Python 3.12 (`#139 `__).