From baa540ec34a66cd5c865bbf973dc03b8f945b673 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 18 Jun 2024 16:54:38 -0400 Subject: [PATCH] fix: support free-threaded CPython with GIL disabled (#5148) * Support free-threaded CPython (PEP 703) Some additional locking is added in the free-threaded build when `Py_GIL_DISABLED` is defined: - Most accesses to internals are protected by a single mutex - The registered_instances uses a striped lock to improve concurrency Pybind11 modules can indicate support for running with the GIL disabled by calling `set_gil_not_used()`. * refactor: use PYBIND11_MODULE (#11) Signed-off-by: Henry Schreiner * Address code review * Suppress MSVC warning * Changes from review * style: pre-commit fixes * `py::mod_gil_not_used()` suggestion. * Update include/pybind11/pybind11.h --------- Signed-off-by: Henry Schreiner Co-authored-by: Henry Schreiner Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ralf W. Grosse-Kunstleve --- include/pybind11/detail/class.h | 116 +++++++------ include/pybind11/detail/common.h | 7 +- include/pybind11/detail/internals.h | 152 ++++++++++++++++-- include/pybind11/detail/type_caster_base.h | 48 +++--- include/pybind11/numpy.h | 4 +- include/pybind11/pybind11.h | 131 ++++++++++----- include/pybind11/pytypes.h | 17 ++ tests/cross_module_gil_utils.cpp | 3 + ...s_module_interleaved_error_already_set.cpp | 3 + tests/eigen_tensor_avoid_stl_array.cpp | 4 +- tests/env.py | 2 + tests/pybind11_cross_module_tests.cpp | 2 +- tests/pybind11_tests.cpp | 4 +- tests/test_cmake_build/main.cpp | 2 +- tests/test_embed/external_module.cpp | 2 +- 15 files changed, 356 insertions(+), 141 deletions(-) diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h index 73f30477fb..01f56abb29 100644 --- a/include/pybind11/detail/class.h +++ b/include/pybind11/detail/class.h @@ -205,39 +205,40 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P /// Cleanup the type-info for a pybind11-registered type. extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { - auto *type = (PyTypeObject *) obj; - auto &internals = get_internals(); - - // A pybind11-registered type will: - // 1) be found in internals.registered_types_py - // 2) have exactly one associated `detail::type_info` - auto found_type = internals.registered_types_py.find(type); - if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1 - && found_type->second[0]->type == type) { - - auto *tinfo = found_type->second[0]; - auto tindex = std::type_index(*tinfo->cpptype); - internals.direct_conversions.erase(tindex); - - if (tinfo->module_local) { - get_local_internals().registered_types_cpp.erase(tindex); - } else { - internals.registered_types_cpp.erase(tindex); - } - internals.registered_types_py.erase(tinfo->type); - - // Actually just `std::erase_if`, but that's only available in C++20 - auto &cache = internals.inactive_override_cache; - for (auto it = cache.begin(), last = cache.end(); it != last;) { - if (it->first == (PyObject *) tinfo->type) { - it = cache.erase(it); + with_internals([obj](internals &internals) { + auto *type = (PyTypeObject *) obj; + + // A pybind11-registered type will: + // 1) be found in internals.registered_types_py + // 2) have exactly one associated `detail::type_info` + auto found_type = internals.registered_types_py.find(type); + if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1 + && found_type->second[0]->type == type) { + + auto *tinfo = found_type->second[0]; + auto tindex = std::type_index(*tinfo->cpptype); + internals.direct_conversions.erase(tindex); + + if (tinfo->module_local) { + get_local_internals().registered_types_cpp.erase(tindex); } else { - ++it; + internals.registered_types_cpp.erase(tindex); + } + internals.registered_types_py.erase(tinfo->type); + + // Actually just `std::erase_if`, but that's only available in C++20 + auto &cache = internals.inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last;) { + if (it->first == (PyObject *) tinfo->type) { + it = cache.erase(it); + } else { + ++it; + } } - } - delete tinfo; - } + delete tinfo; + } + }); PyType_Type.tp_dealloc(obj); } @@ -310,19 +311,20 @@ inline void traverse_offset_bases(void *valueptr, } inline bool register_instance_impl(void *ptr, instance *self) { - get_internals().registered_instances.emplace(ptr, self); + with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); }); return true; // unused, but gives the same signature as the deregister func } inline bool deregister_instance_impl(void *ptr, instance *self) { - auto ®istered_instances = get_internals().registered_instances; - auto range = registered_instances.equal_range(ptr); - for (auto it = range.first; it != range.second; ++it) { - if (self == it->second) { - registered_instances.erase(it); - return true; + return with_instance_map(ptr, [&](instance_map &instances) { + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + if (self == it->second) { + instances.erase(it); + return true; + } } - } - return false; + return false; + }); } inline void register_instance(instance *self, void *valptr, const type_info *tinfo) { @@ -377,27 +379,32 @@ extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject } inline void add_patient(PyObject *nurse, PyObject *patient) { - auto &internals = get_internals(); auto *instance = reinterpret_cast(nurse); instance->has_patients = true; Py_INCREF(patient); - internals.patients[nurse].push_back(patient); + + with_internals([&](internals &internals) { internals.patients[nurse].push_back(patient); }); } inline void clear_patients(PyObject *self) { auto *instance = reinterpret_cast(self); - auto &internals = get_internals(); - auto pos = internals.patients.find(self); + std::vector patients; - if (pos == internals.patients.end()) { - pybind11_fail("FATAL: Internal consistency check failed: Invalid clear_patients() call."); - } + with_internals([&](internals &internals) { + auto pos = internals.patients.find(self); + + if (pos == internals.patients.end()) { + pybind11_fail( + "FATAL: Internal consistency check failed: Invalid clear_patients() call."); + } + + // Clearing the patients can cause more Python code to run, which + // can invalidate the iterator. Extract the vector of patients + // from the unordered_map first. + patients = std::move(pos->second); + internals.patients.erase(pos); + }); - // Clearing the patients can cause more Python code to run, which - // can invalidate the iterator. Extract the vector of patients - // from the unordered_map first. - auto patients = std::move(pos->second); - internals.patients.erase(pos); instance->has_patients = false; for (PyObject *&patient : patients) { Py_CLEAR(patient); @@ -662,10 +669,13 @@ inline PyObject *make_new_python_type(const type_record &rec) { char *tp_doc = nullptr; if (rec.doc && options::show_user_defined_docstrings()) { - /* Allocate memory for docstring (using PyObject_MALLOC, since - Python will free this later on) */ + /* Allocate memory for docstring (Python will free this later on) */ size_t size = std::strlen(rec.doc) + 1; +#if PY_VERSION_HEX >= 0x030D0000 + tp_doc = (char *) PyMem_MALLOC(size); +#else tp_doc = (char *) PyObject_MALLOC(size); +#endif std::memcpy((void *) tp_doc, rec.doc, size); } diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 22416b0368..f0f3c50d3d 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -464,7 +464,7 @@ PYBIND11_WARNING_POP }); } \endrst */ -#define PYBIND11_MODULE(name, variable) \ +#define PYBIND11_MODULE(name, variable, ...) \ static ::pybind11::module_::module_def PYBIND11_CONCAT(pybind11_module_def_, name) \ PYBIND11_MAYBE_UNUSED; \ PYBIND11_MAYBE_UNUSED \ @@ -473,7 +473,10 @@ PYBIND11_WARNING_POP PYBIND11_CHECK_PYTHON_VERSION \ PYBIND11_ENSURE_INTERNALS_READY \ auto m = ::pybind11::module_::create_extension_module( \ - PYBIND11_TOSTRING(name), nullptr, &PYBIND11_CONCAT(pybind11_module_def_, name)); \ + PYBIND11_TOSTRING(name), \ + nullptr, \ + &PYBIND11_CONCAT(pybind11_module_def_, name), \ + ##__VA_ARGS__); \ try { \ PYBIND11_CONCAT(pybind11_init_, name)(m); \ return m.ptr(); \ diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index f1b14bc34f..c7ceda2f9e 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -18,6 +18,8 @@ #include "../pytypes.h" #include +#include +#include /// Tracks the `internals` and `type_info` ABI version independent of the main library version. /// @@ -168,15 +170,37 @@ struct override_hash { } }; +using instance_map = std::unordered_multimap; + +// ignore: structure was padded due to alignment specifier +PYBIND11_WARNING_PUSH +PYBIND11_WARNING_DISABLE_MSVC(4324) + +// Instance map shards are used to reduce mutex contention in free-threaded Python. +struct alignas(64) instance_map_shard { + std::mutex mutex; + instance_map registered_instances; +}; + +PYBIND11_WARNING_POP + /// Internal data structure used to track registered instances and types. /// Whenever binary incompatible changes are made to this structure, /// `PYBIND11_INTERNALS_VERSION` must be incremented. struct internals { +#ifdef Py_GIL_DISABLED + std::mutex mutex; +#endif // std::type_index -> pybind11's type information type_map registered_types_cpp; // PyTypeObject* -> base type_info(s) std::unordered_map> registered_types_py; - std::unordered_multimap registered_instances; // void * -> instance* +#ifdef Py_GIL_DISABLED + std::unique_ptr instance_shards; // void * -> instance* + size_t instance_shards_mask; +#else + instance_map registered_instances; // void * -> instance* +#endif std::unordered_set, override_hash> inactive_override_cache; type_map> direct_conversions; @@ -462,7 +486,8 @@ inline object get_python_state_dict() { } inline object get_internals_obj_from_state_dict(handle state_dict) { - return reinterpret_borrow(dict_getitemstring(state_dict.ptr(), PYBIND11_INTERNALS_ID)); + return reinterpret_steal( + dict_getitemstringref(state_dict.ptr(), PYBIND11_INTERNALS_ID)); } inline internals **get_internals_pp_from_capsule(handle obj) { @@ -474,6 +499,20 @@ inline internals **get_internals_pp_from_capsule(handle obj) { return static_cast(raw_ptr); } +inline uint64_t round_up_to_next_pow2(uint64_t x) { + // Round-up to the next power of two. + // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + x--; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + x++; + return x; +} + /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { auto **&internals_pp = get_internals_pp(); @@ -542,6 +581,16 @@ PYBIND11_NOINLINE internals &get_internals() { internals_ptr->static_property_type = make_static_property_type(); internals_ptr->default_metaclass = make_default_metaclass(); internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); +#ifdef Py_GIL_DISABLED + // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. + auto num_shards + = static_cast(round_up_to_next_pow2(2 * std::thread::hardware_concurrency())); + if (num_shards == 0) { + num_shards = 1; + } + internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]); + internals_ptr->instance_shards_mask = num_shards - 1; +#endif // Py_GIL_DISABLED } return **internals_pp; } @@ -602,13 +651,80 @@ inline local_internals &get_local_internals() { return *locals; } +#ifdef Py_GIL_DISABLED +# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) +#else +# define PYBIND11_LOCK_INTERNALS(internals) +#endif + +template +inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) { + auto &internals = get_internals(); + PYBIND11_LOCK_INTERNALS(internals); + return cb(internals); +} + +inline std::uint64_t mix64(std::uint64_t z) { + // David Stafford's variant 13 of the MurmurHash3 finalizer popularized + // by the SplitMix PRNG. + // https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + +template +inline auto with_instance_map(const void *ptr, + const F &cb) -> decltype(cb(std::declval())) { + auto &internals = get_internals(); + +#ifdef Py_GIL_DISABLED + // Hash address to compute shard, but ignore low bits. We'd like allocations + // from the same thread/core to map to the same shard and allocations from + // other threads/cores to map to other shards. Using the high bits is a good + // heuristic because memory allocators often have a per-thread + // arena/superblock/segment from which smaller allocations are served. + auto addr = reinterpret_cast(ptr); + auto hash = mix64(static_cast(addr >> 20)); + auto idx = static_cast(hash & internals.instance_shards_mask); + + auto &shard = internals.instance_shards[idx]; + std::unique_lock lock(shard.mutex); + return cb(shard.registered_instances); +#else + (void) ptr; + return cb(internals.registered_instances); +#endif +} + +// Returns the number of registered instances for testing purposes. The result may not be +// consistent if other threads are registering or unregistering instances concurrently. +inline size_t num_registered_instances() { + auto &internals = get_internals(); +#ifdef Py_GIL_DISABLED + size_t count = 0; + for (size_t i = 0; i <= internals.instance_shards_mask; ++i) { + auto &shard = internals.instance_shards[i]; + std::unique_lock lock(shard.mutex); + count += shard.registered_instances.size(); + } + return count; +#else + return internals.registered_instances.size(); +#endif +} + /// Constructs a std::string with the given arguments, stores it in `internals`, and returns its /// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only /// cleared when the program exits or after interpreter shutdown (when embedding), and so are /// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name). template const char *c_str(Args &&...args) { - auto &strings = get_internals().static_strings; + // GCC 4.8 doesn't like parameter unpack within lambda capture, so use + // PYBIND11_LOCK_INTERNALS. + auto &internals = get_internals(); + PYBIND11_LOCK_INTERNALS(internals); + auto &strings = internals.static_strings; strings.emplace_front(std::forward(args)...); return strings.front().c_str(); } @@ -638,15 +754,18 @@ PYBIND11_NAMESPACE_END(detail) /// pybind11 version) running in the current interpreter. Names starting with underscores /// are reserved for internal usage. Returns `nullptr` if no matching entry was found. PYBIND11_NOINLINE void *get_shared_data(const std::string &name) { - auto &internals = detail::get_internals(); - auto it = internals.shared_data.find(name); - return it != internals.shared_data.end() ? it->second : nullptr; + return detail::with_internals([&](detail::internals &internals) { + auto it = internals.shared_data.find(name); + return it != internals.shared_data.end() ? it->second : nullptr; + }); } /// Set the shared data that can be later recovered by `get_shared_data()`. PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { - detail::get_internals().shared_data[name] = data; - return data; + return detail::with_internals([&](detail::internals &internals) { + internals.shared_data[name] = data; + return data; + }); } /// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if @@ -654,14 +773,15 @@ PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { /// added to the shared data under the given name and a reference to it is returned. template T &get_or_create_shared_data(const std::string &name) { - auto &internals = detail::get_internals(); - auto it = internals.shared_data.find(name); - T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); - if (!ptr) { - ptr = new T(); - internals.shared_data[name] = ptr; - } - return *ptr; + return *detail::with_internals([&](detail::internals &internals) { + auto it = internals.shared_data.find(name); + T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); + if (!ptr) { + ptr = new T(); + internals.shared_data[name] = ptr; + } + return ptr; + }); } PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 037c430a0e..832d849bf1 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -217,12 +217,15 @@ inline detail::type_info *get_local_type_info(const std::type_index &tp) { } inline detail::type_info *get_global_type_info(const std::type_index &tp) { - auto &types = get_internals().registered_types_cpp; - auto it = types.find(tp); - if (it != types.end()) { - return it->second; - } - return nullptr; + return with_internals([&](internals &internals) { + detail::type_info *type_info = nullptr; + auto &types = internals.registered_types_cpp; + auto it = types.find(tp); + if (it != types.end()) { + type_info = it->second; + } + return type_info; + }); } /// Return the type info for a given C++ type; on lookup failure can either throw or return @@ -253,15 +256,17 @@ PYBIND11_NOINLINE handle get_type_handle(const std::type_info &tp, bool throw_if // Searches the inheritance graph for a registered Python instance, using all_type_info(). PYBIND11_NOINLINE handle find_registered_python_instance(void *src, const detail::type_info *tinfo) { - auto it_instances = get_internals().registered_instances.equal_range(src); - for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { - for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { - if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) { - return handle((PyObject *) it_i->second).inc_ref(); + return with_instance_map(src, [&](instance_map &instances) { + auto it_instances = instances.equal_range(src); + for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { + for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { + if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) { + return handle((PyObject *) it_i->second).inc_ref(); + } } } - } - return handle(); + return handle(); + }); } struct value_and_holder { @@ -506,16 +511,17 @@ PYBIND11_NOINLINE bool isinstance_generic(handle obj, const std::type_info &tp) } PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_info *type) { - auto &instances = get_internals().registered_instances; - auto range = instances.equal_range(ptr); - for (auto it = range.first; it != range.second; ++it) { - for (const auto &vh : values_and_holders(it->second)) { - if (vh.type == type) { - return handle((PyObject *) it->second); + return with_instance_map(ptr, [&](instance_map &instances) { + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + for (const auto &vh : values_and_holders(it->second)) { + if (vh.type == type) { + return handle((PyObject *) it->second); + } } } - } - return handle(); + return handle(); + }); } inline PyThreadState *get_thread_state_unchecked() { diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 03abc8e778..05ef3918b1 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -1553,7 +1553,9 @@ PYBIND11_NOINLINE void register_structured_dtype(any_container auto tindex = std::type_index(tinfo); numpy_internals.registered_dtypes[tindex] = {dtype_ptr, std::move(format_str)}; - get_internals().direct_conversions[tindex].push_back(direct_converter); + with_internals([tindex, &direct_converter](internals &internals) { + internals.direct_conversions[tindex].push_back(direct_converter); + }); } template diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index b3fdb7fce3..74919a7d52 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1054,13 +1054,20 @@ class cpp_function : public function { - delegate translation to the next translator by throwing a new type of exception. */ - auto &local_exception_translators - = get_local_internals().registered_exception_translators; - if (detail::apply_exception_translators(local_exception_translators)) { - return nullptr; - } - auto &exception_translators = get_internals().registered_exception_translators; - if (detail::apply_exception_translators(exception_translators)) { + bool handled = with_internals([&](internals &internals) { + auto &local_exception_translators + = get_local_internals().registered_exception_translators; + if (detail::apply_exception_translators(local_exception_translators)) { + return true; + } + auto &exception_translators = internals.registered_exception_translators; + if (detail::apply_exception_translators(exception_translators)) { + return true; + } + return false; + }); + + if (handled) { return nullptr; } @@ -1199,6 +1206,16 @@ struct handle_type_name { PYBIND11_NAMESPACE_END(detail) +// Use to activate Py_MOD_GIL_NOT_USED. +class mod_gil_not_used { +public: + explicit mod_gil_not_used(bool flag = true) : flag_(flag) {} + bool flag() const { return flag_; } + +private: + bool flag_; +}; + /// Wrapper for Python extension modules class module_ : public object { public: @@ -1299,7 +1316,11 @@ class module_ : public object { ``def`` should point to a statically allocated module_def. \endrst */ - static module_ create_extension_module(const char *name, const char *doc, module_def *def) { + static module_ create_extension_module(const char *name, + const char *doc, + module_def *def, + mod_gil_not_used gil_not_used + = mod_gil_not_used(false)) { // module_def is PyModuleDef // Placement new (not an allocation). def = new (def) @@ -1319,6 +1340,11 @@ class module_ : public object { } pybind11_fail("Internal error in module_::create_extension_module()"); } + if (gil_not_used.flag()) { +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); +#endif + } // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when // returned from PyInit_... // For Python 2, reinterpret_borrow was correct. @@ -1397,15 +1423,16 @@ class generic_type : public object { tinfo->default_holder = rec.default_holder; tinfo->module_local = rec.module_local; - auto &internals = get_internals(); - auto tindex = std::type_index(*rec.type); - tinfo->direct_conversions = &internals.direct_conversions[tindex]; - if (rec.module_local) { - get_local_internals().registered_types_cpp[tindex] = tinfo; - } else { - internals.registered_types_cpp[tindex] = tinfo; - } - internals.registered_types_py[(PyTypeObject *) m_ptr] = {tinfo}; + with_internals([&](internals &internals) { + auto tindex = std::type_index(*rec.type); + tinfo->direct_conversions = &internals.direct_conversions[tindex]; + if (rec.module_local) { + get_local_internals().registered_types_cpp[tindex] = tinfo; + } else { + internals.registered_types_cpp[tindex] = tinfo; + } + internals.registered_types_py[(PyTypeObject *) m_ptr] = {tinfo}; + }); if (rec.bases.size() > 1 || rec.multiple_inheritance) { mark_parents_nonsimple(tinfo->type); @@ -1618,10 +1645,12 @@ class class_ : public detail::generic_type { generic_type::initialize(record); if (has_alias) { - auto &instances = record.module_local ? get_local_internals().registered_types_cpp - : get_internals().registered_types_cpp; - instances[std::type_index(typeid(type_alias))] - = instances[std::type_index(typeid(type))]; + with_internals([&](internals &internals) { + auto &instances = record.module_local ? get_local_internals().registered_types_cpp + : internals.registered_types_cpp; + instances[std::type_index(typeid(type_alias))] + = instances[std::type_index(typeid(type))]; + }); } } @@ -2336,28 +2365,32 @@ keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) { inline std::pair all_type_info_get_cache(PyTypeObject *type) { - auto res = get_internals() - .registered_types_py + auto res = with_internals([type](internals &internals) { + return internals + .registered_types_py #ifdef __cpp_lib_unordered_map_try_emplace - .try_emplace(type); + .try_emplace(type); #else - .emplace(type, std::vector()); + .emplace(type, std::vector()); #endif + }); if (res.second) { // New cache entry created; set up a weak reference to automatically remove it if the type // gets destroyed: weakref((PyObject *) type, cpp_function([type](handle wr) { - get_internals().registered_types_py.erase(type); - - // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h - auto &cache = get_internals().inactive_override_cache; - for (auto it = cache.begin(), last = cache.end(); it != last;) { - if (it->first == reinterpret_cast(type)) { - it = cache.erase(it); - } else { - ++it; + with_internals([type](internals &internals) { + internals.registered_types_py.erase(type); + + // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h + auto &cache = internals.inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last;) { + if (it->first == reinterpret_cast(type)) { + it = cache.erase(it); + } else { + ++it; + } } - } + }); wr.dec_ref(); })) @@ -2562,7 +2595,11 @@ void implicitly_convertible() { ~set_flag() { flag = false; } }; auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * { +#ifdef Py_GIL_DISABLED + thread_local bool currently_used = false; +#else static bool currently_used = false; +#endif if (currently_used) { // implicit conversions are non-reentrant return nullptr; } @@ -2587,8 +2624,10 @@ void implicitly_convertible() { } inline void register_exception_translator(ExceptionTranslator &&translator) { - detail::get_internals().registered_exception_translators.push_front( - std::forward(translator)); + detail::with_internals([&](detail::internals &internals) { + internals.registered_exception_translators.push_front( + std::forward(translator)); + }); } /** @@ -2598,8 +2637,11 @@ inline void register_exception_translator(ExceptionTranslator &&translator) { * the exception. */ inline void register_local_exception_translator(ExceptionTranslator &&translator) { - detail::get_local_internals().registered_exception_translators.push_front( - std::forward(translator)); + detail::with_internals([&](detail::internals &internals) { + (void) internals; + detail::get_local_internals().registered_exception_translators.push_front( + std::forward(translator)); + }); } /** @@ -2756,14 +2798,19 @@ get_type_override(const void *this_ptr, const type_info *this_type, const char * /* Cache functions that aren't overridden in Python to avoid many costly Python dictionary lookups below */ - auto &cache = get_internals().inactive_override_cache; - if (cache.find(key) != cache.end()) { + bool not_overridden = with_internals([&key](internals &internals) { + auto &cache = internals.inactive_override_cache; + return cache.find(key) != cache.end(); + }); + if (not_overridden) { return function(); } function override = getattr(self, name, function()); if (override.is_cpp_function()) { - cache.insert(std::move(key)); + with_internals([&](internals &internals) { + internals.inactive_override_cache.insert(std::move(key)); + }); return function(); } diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 3d2d0c2dac..f26c307a87 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -980,6 +980,23 @@ inline PyObject *dict_getitem(PyObject *v, PyObject *key) { return rv; } +inline PyObject *dict_getitemstringref(PyObject *v, const char *key) { +#if PY_VERSION_HEX >= 0x030D0000 + PyObject *rv; + if (PyDict_GetItemStringRef(v, key, &rv) < 0) { + throw error_already_set(); + } + return rv; +#else + PyObject *rv = dict_getitemstring(v, key); + if (rv == nullptr && PyErr_Occurred()) { + throw error_already_set(); + } + Py_XINCREF(rv); + return rv; +#endif +} + // Helper aliases/functions to support implicit casting of values given to python // accessors/methods. When given a pyobject, this simply returns the pyobject as-is; for other C++ // type, the value goes through pybind11::cast(obj) to convert it to an `object`. diff --git a/tests/cross_module_gil_utils.cpp b/tests/cross_module_gil_utils.cpp index 7c20849dd9..39112668a4 100644 --- a/tests/cross_module_gil_utils.cpp +++ b/tests/cross_module_gil_utils.cpp @@ -92,6 +92,9 @@ extern "C" PYBIND11_EXPORT PyObject *PyInit_cross_module_gil_utils() { if (m != nullptr) { static_assert(sizeof(&gil_acquire) == sizeof(void *), "Function pointer must have the same size as void*"); +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); +#endif ADD_FUNCTION("gil_acquire_funcaddr", gil_acquire) ADD_FUNCTION("gil_multi_acquire_release_funcaddr", gil_multi_acquire_release) ADD_FUNCTION("gil_acquire_inner_custom_funcaddr", diff --git a/tests/cross_module_interleaved_error_already_set.cpp b/tests/cross_module_interleaved_error_already_set.cpp index 3493a7e615..65a4463b8c 100644 --- a/tests/cross_module_interleaved_error_already_set.cpp +++ b/tests/cross_module_interleaved_error_already_set.cpp @@ -42,6 +42,9 @@ extern "C" PYBIND11_EXPORT PyObject *PyInit_cross_module_interleaved_error_alrea if (m != nullptr) { static_assert(sizeof(&interleaved_error_already_set) == sizeof(void *), "Function pointer must have the same size as void *"); +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); +#endif PyModule_AddObject( m, "funcaddr", diff --git a/tests/eigen_tensor_avoid_stl_array.cpp b/tests/eigen_tensor_avoid_stl_array.cpp index eacc9e9bd6..5aca66c5ee 100644 --- a/tests/eigen_tensor_avoid_stl_array.cpp +++ b/tests/eigen_tensor_avoid_stl_array.cpp @@ -11,4 +11,6 @@ #include "test_eigen_tensor.inl" -PYBIND11_MODULE(eigen_tensor_avoid_stl_array, m) { eigen_tensor_test::test_module(m); } +PYBIND11_MODULE(eigen_tensor_avoid_stl_array, m, pybind11::mod_gil_not_used()) { + eigen_tensor_test::test_module(m); +} diff --git a/tests/env.py b/tests/env.py index 7eea5a3b3b..4c60f4bbeb 100644 --- a/tests/env.py +++ b/tests/env.py @@ -1,5 +1,6 @@ import platform import sys +import sysconfig import pytest @@ -9,6 +10,7 @@ CPYTHON = platform.python_implementation() == "CPython" PYPY = platform.python_implementation() == "PyPy" +PY_GIL_DISABLED = bool(sysconfig.get_config_var("Py_GIL_DISABLED")) def deprecated_call(): diff --git a/tests/pybind11_cross_module_tests.cpp b/tests/pybind11_cross_module_tests.cpp index ad68e9a54f..76f40bfa97 100644 --- a/tests/pybind11_cross_module_tests.cpp +++ b/tests/pybind11_cross_module_tests.cpp @@ -16,7 +16,7 @@ #include #include -PYBIND11_MODULE(pybind11_cross_module_tests, m) { +PYBIND11_MODULE(pybind11_cross_module_tests, m, py::mod_gil_not_used()) { m.doc() = "pybind11 cross-module test module"; // test_local_bindings.py tests: diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp index 15cbf74545..3d2d84e77a 100644 --- a/tests/pybind11_tests.cpp +++ b/tests/pybind11_tests.cpp @@ -58,7 +58,7 @@ void bind_ConstructorStats(py::module_ &m) { // registered instances to allow instance cleanup checks (invokes a GC first) .def_static("detail_reg_inst", []() { ConstructorStats::gc(); - return py::detail::get_internals().registered_instances.size(); + return py::detail::num_registered_instances(); }); } @@ -75,7 +75,7 @@ const char *cpp_std() { #endif } -PYBIND11_MODULE(pybind11_tests, m) { +PYBIND11_MODULE(pybind11_tests, m, py::mod_gil_not_used()) { m.doc() = "pybind11 test module"; // Intentionally kept minimal to not create a maintenance chore diff --git a/tests/test_cmake_build/main.cpp b/tests/test_cmake_build/main.cpp index e30f2c4b9a..640449c31b 100644 --- a/tests/test_cmake_build/main.cpp +++ b/tests/test_cmake_build/main.cpp @@ -1,6 +1,6 @@ #include namespace py = pybind11; -PYBIND11_MODULE(test_cmake_build, m) { +PYBIND11_MODULE(test_cmake_build, m, py::mod_gil_not_used()) { m.def("add", [](int i, int j) { return i + j; }); } diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp index 5c482fe061..6564ecbef5 100644 --- a/tests/test_embed/external_module.cpp +++ b/tests/test_embed/external_module.cpp @@ -6,7 +6,7 @@ namespace py = pybind11; * modules aren't preserved over a finalize/initialize. */ -PYBIND11_MODULE(external_module, m) { +PYBIND11_MODULE(external_module, m, py::mod_gil_not_used()) { class A { public: explicit A(int value) : v{value} {};