diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8dcf48928e..45f8675889 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -485,6 +485,11 @@ jobs: steps: - uses: actions/checkout@v6 + - name: Clean out unused stuff to save space + run: | + sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc /opt/hostedtoolcache/CodeQL + sudo apt-get clean + - name: Add NVHPC Repo run: | echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | \ @@ -492,10 +497,11 @@ jobs: - name: Install 🐍 3 & NVHPC run: | - sudo apt-get update -y && \ - sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy && \ - sudo apt-get install -y --no-install-recommends nvhpc-25-11 && \ + sudo apt-get update -y + sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy + sudo apt-get install -y --no-install-recommends nvhpc-25-11 sudo rm -rf /var/lib/apt/lists/* + apt-cache depends nvhpc-25-11 python3 -m pip install --upgrade pip python3 -m pip install --upgrade pytest diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h index 21e966cfea..480c369aa6 100644 --- a/include/pybind11/detail/class.h +++ b/include/pybind11/detail/class.h @@ -207,7 +207,7 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P /// Cleanup the type-info for a pybind11-registered type. extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { - with_internals([obj](internals &internals) { + with_internals_if_internals([obj](internals &internals) { auto *type = (PyTypeObject *) obj; // A pybind11-registered type will: diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index a92f196b1f..d66cf72cc7 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -103,7 +103,7 @@ class thread_specific_storage { // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call // requires a living Python interpreter. #ifdef GRAALVM_PYTHON - if (!Py_IsInitialized() || _Py_IsFinalizing()) { + if (Py_IsInitialized() == 0 || _Py_IsFinalizing() != 0) { return; } #endif @@ -195,6 +195,14 @@ struct override_hash { using instance_map = std::unordered_multimap; +inline bool is_interpreter_alive() { +#if PY_VERSION_HEX < 0x030D0000 + return Py_IsInitialized() != 0 || _Py_IsFinalizing() != 0; +#else + return Py_IsInitialized() != 0 || Py_IsFinalizing() != 0; +#endif +} + #ifdef Py_GIL_DISABLED // Wrapper around PyMutex to provide BasicLockable semantics class pymutex { @@ -308,7 +316,17 @@ struct internals { internals(internals &&other) = delete; internals &operator=(const internals &other) = delete; internals &operator=(internals &&other) = delete; - ~internals() = default; + ~internals() { + // Normally this destructor runs during interpreter finalization and it may DECREF things. + // In odd finalization scenarios it might end up running after the interpreter has + // completely shut down, In that case, we should not decref these objects because pymalloc + // is gone. + if (is_interpreter_alive()) { + Py_CLEAR(instance_base); + Py_CLEAR(default_metaclass); + Py_CLEAR(static_property_type); + } + } }; // the internals struct (above) is shared between all the modules. local_internals are only @@ -325,6 +343,16 @@ struct local_internals { std::forward_list registered_exception_translators; PyTypeObject *function_record_py_type = nullptr; + + ~local_internals() { + // Normally this destructor runs during interpreter finalization and it may DECREF things. + // In odd finalization scenarios it might end up running after the interpreter has + // completely shut down, In that case, we should not decref these objects because pymalloc + // is gone. + if (is_interpreter_alive()) { + Py_CLEAR(function_record_py_type); + } + } }; enum class holder_enum_t : uint8_t { @@ -569,7 +597,7 @@ inline object get_python_state_dict() { // The bool follows std::map::insert convention: true = created, false = existed. template std::pair atomic_get_or_create_in_state_dict(const char *key, - bool clear_destructor = false) { + void (*dtor)(PyObject *) = nullptr) { error_scope err_scope; // preserve any existing Python error states auto state_dict = reinterpret_borrow(get_python_state_dict()); @@ -586,16 +614,13 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, // Use unique_ptr for exception safety: if capsule creation throws, the storage is // automatically deleted. auto storage_ptr = std::unique_ptr(new Payload{}); - // Create capsule with destructor to clean up when the interpreter shuts down. - auto new_capsule = capsule( - storage_ptr.get(), - // The destructor will be called when the capsule is GC'ed. - // - If our capsule is inserted into the dict below, it will be kept alive until - // interpreter shutdown, so the destructor will be called at that time. - // - If our capsule is NOT inserted (another thread inserted first), it will be - // destructed when going out of scope here, so the destructor will be called - // immediately, which will also free the storage. - /*destructor=*/[](void *ptr) -> void { delete static_cast(ptr); }); + auto new_capsule + = capsule(storage_ptr.get(), + // The destructor will be called when the capsule is GC'ed. + // If the insert below fails (entry already in the dict), then this + // destructor will be called on the newly created capsule at the end of this + // function, and we want to just release this memory. + /*destructor=*/[](void *v) { delete static_cast(v); }); // At this point, the capsule object is created successfully. // Release the unique_ptr and let the capsule object own the storage to avoid double-free. (void) storage_ptr.release(); @@ -613,17 +638,16 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, throw error_already_set(); } created = (capsule_obj == new_capsule.ptr()); - if (clear_destructor && created) { - // Our capsule was inserted. - // Remove the destructor to leak the storage on interpreter shutdown. - if (PyCapsule_SetDestructor(capsule_obj, nullptr) < 0) { + // - If key already existed, our `new_capsule` is not inserted, it will be destructed when + // going out of scope here, and will call the destructor set above. + // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state + // dict will incref it. We need to set the caller's destructor on it, which will be + // called when the interpreter shuts down. + if (created && dtor) { + if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) { throw error_already_set(); } } - // - If key already existed, our `new_capsule` is not inserted, it will be destructed when - // going out of scope here, which will also free the storage. - // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state - // dict will incref it. } // Get the storage pointer from the capsule. @@ -707,14 +731,27 @@ class internals_pp_manager { internals_pp_manager(char const *id, on_fetch_function *on_fetch) : holder_id_(id), on_fetch_(on_fetch) {} + static void internals_shutdown(PyObject *capsule) { + auto *pp = static_cast *>( + PyCapsule_GetPointer(capsule, nullptr)); + if (pp) { + pp->reset(); + } + // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here. + // The pp_manager in this module (and possibly other modules sharing internals) holds + // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now. + // + // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is + // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the + // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension + // loaded into an external interpreter), destroy() is never called and the unique_ptr + // shell (8 bytes, not its contents) is leaked. + // (See PR #5958 for ideas to eliminate this leak.) + } + std::unique_ptr *get_or_create_pp_in_state_dict() { - // The `unique_ptr` output is leaked on interpreter shutdown. Once an - // instance is created, it will never be deleted until the process exits (compare to - // interpreter shutdown in multiple-interpreter scenarios). - // Because we cannot guarantee the order of destruction of capsules in the interpreter - // state dict, leaking avoids potential use-after-free issues during interpreter shutdown. auto result = atomic_get_or_create_in_state_dict>( - holder_id_, /*clear_destructor=*/true); + holder_id_, &internals_shutdown); auto *pp = result.first; bool created = result.second; // Only call on_fetch_ when fetching existing internals, not when creating new ones. @@ -834,6 +871,17 @@ inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) { return cb(internals); } +template +inline void with_internals_if_internals(const F &cb) { + auto &ppmgr = get_internals_pp_manager(); + auto &internals_ptr = *ppmgr.get_pp(); + if (internals_ptr) { + auto &internals = *internals_ptr; + PYBIND11_LOCK_INTERNALS(internals); + cb(internals); + } +} + template inline auto with_exception_translators(const F &cb) -> decltype(cb(get_internals().registered_exception_translators,