From 705efccecdf8632612dfb538008efa02c862d615 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 26 Mar 2024 23:20:11 +0100 Subject: [PATCH] feat: make `numpy.h` compatible with both NumPy 1.x and 2.x (#5050) * API: Make `numpy.h` compatible with both NumPy 1.x and 2.x * TST: Update numpy dtype flags test to not covert flags to char * API: Add `numpy2.h` instead and make `numpy.h` safe This means that users of `numpy.h` cannot be broken, but need to update to `numpy2.h` if they want to compile for NumPy 2. Using Macros simply and didn't bother to try to remove unnecessary code paths. * API: Rather than `numpy2.h` use a define for the user. * Thread `PYBIND11_NUMPY2_SUPPORT` through things and try to adept test matrix * Small fixups (shouldn't matter)? * Fixup. Does upgrading scipy help? (it shouldn't?) (Some other small fixup) * Use NumPy 2 nightlies for ubuntu-latest job also * BUG: Fix numpy.bool check * TST: Fix complexwarning * BUG: Fix the fact that only the 50 slot is filled with the copy alias (There were 3 functions all doing the same, only this slot survived 2.x) * TST: One more test tweak * TST: Use "long" name for long, since it changed on windows * TST: Apparently we didn't always have ulong, so just use `L` * TST: Enforce dtype='l' for test as default isn't long anymore on windows * Rename macro and invert logic to PYBIND11_NUMPY_1_ONLY * PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED * Test and code comment expansion * CI: Use pre-releases of numpy/scipy from pip via explicit version * CI: NumPy 2 only available on almalinux (as it is Python >=3.9) * MAINT: Match name more exactly and adopt error phrasing * MAINT: Pushed early, move helper to be private member * fix error message compilation when using NumPy 1.x-only backcompat * silence name shadowing warning * chore: minor optimization Signed-off-by: Henry Schreiner --------- Signed-off-by: Henry Schreiner Co-authored-by: Ralf W. Grosse-Kunstleve Co-authored-by: Henry Schreiner --- .github/workflows/ci.yml | 13 +++- CMakeLists.txt | 5 ++ include/pybind11/cast.h | 14 +++- include/pybind11/detail/common.h | 4 + include/pybind11/numpy.h | 125 ++++++++++++++++++++++++++++--- tests/conftest.py | 1 + tests/pybind11_tests.cpp | 6 ++ tests/test_eigen_matrix.py | 4 +- tests/test_numpy_array.py | 10 ++- tests/test_numpy_dtypes.cpp | 31 +++++++- tests/test_numpy_dtypes.py | 14 +++- 11 files changed, 206 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cf03eebfdf..fd44c1faaf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -108,12 +108,14 @@ jobs: run: python -m pip install pytest-github-actions-annotate-failures # First build - C++11 mode and inplace - # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here. + # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here + # (same for PYBIND11_NUMPY_1_ONLY, but requires a NumPy 1.x at runtime). - name: Configure C++11 ${{ matrix.args }} run: > cmake -S . -B . -DPYBIND11_WERROR=ON -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON + -DPYBIND11_NUMPY_1_ONLY=ON -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DCMAKE_CXX_STANDARD=11 @@ -138,11 +140,13 @@ jobs: # Second build - C++17 mode and in a build directory # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF here. + # (same for PYBIND11_NUMPY_1_ONLY, but requires a NumPy 1.x at runtime). - name: Configure C++17 run: > cmake -S . -B build2 -DPYBIND11_WERROR=ON -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF + -DPYBIND11_NUMPY_1_ONLY=ON -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DCMAKE_CXX_STANDARD=17 @@ -660,6 +664,11 @@ jobs: run: | python3 -m pip install cmake -r tests/requirements.txt + - name: Ensure NumPy 2 is used (required Python >= 3.9) + if: matrix.container == 'almalinux:9' + run: | + python3 -m pip install 'numpy>=2.0.0b1' 'scipy>=1.13.0rc1' + - name: Configure shell: bash run: > @@ -895,8 +904,10 @@ jobs: python-version: ${{ matrix.python }} - name: Prepare env + # Ensure use of NumPy 2 (via NumPy nightlies but can be changed soon) run: | python3 -m pip install -r tests/requirements.txt + python3 -m pip install 'numpy>=2.0.0b1' 'scipy>=1.13.0rc1' - name: Update CMake uses: jwlawson/actions-setup-cmake@v2.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e75e99eb9..890440dc17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,8 @@ option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT}) option(PYBIND11_NOPYTHON "Disable search for Python" OFF) option(PYBIND11_SIMPLE_GIL_MANAGEMENT "Use simpler GIL management logic that does not support disassociation" OFF) +option(PYBIND11_NUMPY_1_ONLY + "Disable NumPy 2 support to avoid changes to previous pybind11 versions." OFF) set(PYBIND11_INTERNALS_VERSION "" CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") @@ -116,6 +118,9 @@ set(PYBIND11_INTERNALS_VERSION if(PYBIND11_SIMPLE_GIL_MANAGEMENT) add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT) endif() +if(PYBIND11_NUMPY_1_ONLY) + add_compile_definitions(PYBIND11_NUMPY_1_ONLY) +endif() cmake_dependent_option( USE_PYTHON_INCLUDE_DIR diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 8b5beb0ef6..3f3f966d08 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -327,8 +327,9 @@ class type_caster { value = false; return true; } - if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) { - // (allow non-implicit conversion for numpy booleans) + if (convert || is_numpy_bool(src)) { + // (allow non-implicit conversion for numpy booleans), use strncmp + // since NumPy 1.x had an additional trailing underscore. Py_ssize_t res = -1; if (src.is_none()) { @@ -360,6 +361,15 @@ class type_caster { return handle(src ? Py_True : Py_False).inc_ref(); } PYBIND11_TYPE_CASTER(bool, const_name("bool")); + +private: + // Test if an object is a NumPy boolean (without fetching the type). + static inline bool is_numpy_bool(handle object) { + const char *type_name = Py_TYPE(object.ptr())->tp_name; + // Name changed to `numpy.bool` in NumPy 2, `numpy.bool_` is needed for 1.x support + return std::strcmp("numpy.bool", type_name) == 0 + || std::strcmp("numpy.bool_", type_name) == 0; + } }; // Helper class for UTF-{8,16,32} C++ stl strings: diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 83800e960b..9f8e13e24f 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -296,6 +296,10 @@ PYBIND11_WARNING_DISABLE_MSVC(4505) # undef copysign #endif +#if defined(PYBIND11_NUMPY_1_ONLY) +# define PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED +#endif + #if defined(PYPY_VERSION) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) # define PYBIND11_SIMPLE_GIL_MANAGEMENT #endif diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 8551aa2648..12a0490e29 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -29,10 +29,15 @@ #include #include +#if defined(PYBIND11_NUMPY_1_ONLY) && !defined(PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED) +# error PYBIND11_NUMPY_1_ONLY must be defined before any pybind11 header is included. +#endif + /* This will be true on all flat address space platforms and allows us to reduce the whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size and dimension types (e.g. shape, strides, indexing), instead of inflicting this - upon the library user. */ + upon the library user. + Note that NumPy 2 now uses ssize_t for `npy_intp` to simplify this. */ static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t"); static_assert(std::is_signed::value, "Py_intptr_t must be signed"); // We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares) @@ -53,7 +58,8 @@ struct handle_type_name { template struct npy_format_descriptor; -struct PyArrayDescr_Proxy { +/* NumPy 1 proxy (always includes legacy fields) */ +struct PyArrayDescr1_Proxy { PyObject_HEAD PyObject *typeobj; char kind; @@ -68,6 +74,43 @@ struct PyArrayDescr_Proxy { PyObject *names; }; +#ifndef PYBIND11_NUMPY_1_ONLY +struct PyArrayDescr_Proxy { + PyObject_HEAD + PyObject *typeobj; + char kind; + char type; + char byteorder; + char _former_flags; + int type_num; + /* Additional fields are NumPy version specific. */ +}; +#else +/* NumPy 1.x only, we can expose all fields */ +using PyArrayDescr_Proxy = PyArrayDescr1_Proxy; +#endif + +/* NumPy 2 proxy, including legacy fields */ +struct PyArrayDescr2_Proxy { + PyObject_HEAD + PyObject *typeobj; + char kind; + char type; + char byteorder; + char _former_flags; + int type_num; + std::uint64_t flags; + ssize_t elsize; + ssize_t alignment; + PyObject *metadata; + Py_hash_t hash; + void *reserved_null[2]; + /* The following fields only exist if 0 <= type_num < 2056 */ + char *subarray; + PyObject *fields; + PyObject *names; +}; + struct PyArray_Proxy { PyObject_HEAD char *data; @@ -131,6 +174,14 @@ PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name object numpy_version = numpy_lib.attr("NumpyVersion")(version_string); int major_version = numpy_version.attr("major").cast(); +#ifdef PYBIND11_NUMPY_1_ONLY + if (major_version >= 2) { + throw std::runtime_error( + "This extension was built with PYBIND11_NUMPY_1_ONLY defined, " + "but NumPy 2 is used in this process. For NumPy2 compatibility, " + "this extension needs to be rebuilt without the PYBIND11_NUMPY_1_ONLY define."); + } +#endif /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially became a private module. */ std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core"; @@ -203,6 +254,8 @@ struct npy_api { NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_), }; + unsigned int PyArray_RUNTIME_VERSION_; + struct PyArray_Dims { Py_intptr_t *ptr; int len; @@ -241,6 +294,7 @@ struct npy_api { PyObject *(*PyArray_FromAny_)(PyObject *, PyObject *, int, int, int, PyObject *); int (*PyArray_DescrConverter_)(PyObject *, PyObject **); bool (*PyArray_EquivTypes_)(PyObject *, PyObject *); +#ifdef PYBIND11_NUMPY_1_ONLY int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, unsigned char, @@ -249,6 +303,7 @@ struct npy_api { Py_intptr_t *, PyObject **, PyObject *); +#endif PyObject *(*PyArray_Squeeze_)(PyObject *); // Unused. Not removed because that affects ABI of the class. int (*PyArray_SetBaseObject_)(PyObject *, PyObject *); @@ -266,7 +321,8 @@ struct npy_api { API_PyArray_DescrFromScalar = 57, API_PyArray_FromAny = 69, API_PyArray_Resize = 80, - API_PyArray_CopyInto = 82, + // CopyInto was slot 82 and 50 was effectively an alias. NumPy 2 removed 82. + API_PyArray_CopyInto = 50, API_PyArray_NewCopy = 85, API_PyArray_NewFromDescr = 94, API_PyArray_DescrNewFromType = 96, @@ -275,7 +331,9 @@ struct npy_api { API_PyArray_View = 137, API_PyArray_DescrConverter = 174, API_PyArray_EquivTypes = 182, +#ifdef PYBIND11_NUMPY_1_ONLY API_PyArray_GetArrayParamsFromObject = 278, +#endif API_PyArray_SetBaseObject = 282 }; @@ -290,7 +348,8 @@ struct npy_api { npy_api api; #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func]; DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion); - if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) { + api.PyArray_RUNTIME_VERSION_ = api.PyArray_GetNDArrayCFeatureVersion_(); + if (api.PyArray_RUNTIME_VERSION_ < 0x7) { pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0"); } DECL_NPY_API(PyArray_Type); @@ -309,7 +368,9 @@ struct npy_api { DECL_NPY_API(PyArray_View); DECL_NPY_API(PyArray_DescrConverter); DECL_NPY_API(PyArray_EquivTypes); +#ifdef PYBIND11_NUMPY_1_ONLY DECL_NPY_API(PyArray_GetArrayParamsFromObject); +#endif DECL_NPY_API(PyArray_SetBaseObject); #undef DECL_NPY_API @@ -331,6 +392,14 @@ inline const PyArrayDescr_Proxy *array_descriptor_proxy(const PyObject *ptr) { return reinterpret_cast(ptr); } +inline const PyArrayDescr1_Proxy *array_descriptor1_proxy(const PyObject *ptr) { + return reinterpret_cast(ptr); +} + +inline const PyArrayDescr2_Proxy *array_descriptor2_proxy(const PyObject *ptr) { + return reinterpret_cast(ptr); +} + inline bool check_flags(const void *ptr, int flag) { return (flag == (array_proxy(ptr)->flags & flag)); } @@ -610,10 +679,32 @@ class dtype : public object { } /// Size of the data type in bytes. +#ifdef PYBIND11_NUMPY_1_ONLY ssize_t itemsize() const { return detail::array_descriptor_proxy(m_ptr)->elsize; } +#else + ssize_t itemsize() const { + if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) { + return detail::array_descriptor1_proxy(m_ptr)->elsize; + } + return detail::array_descriptor2_proxy(m_ptr)->elsize; + } +#endif /// Returns true for structured data types. +#ifdef PYBIND11_NUMPY_1_ONLY bool has_fields() const { return detail::array_descriptor_proxy(m_ptr)->names != nullptr; } +#else + bool has_fields() const { + if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) { + return detail::array_descriptor1_proxy(m_ptr)->names != nullptr; + } + const auto *proxy = detail::array_descriptor2_proxy(m_ptr); + if (proxy->type_num < 0 || proxy->type_num >= 2056) { + return false; + } + return proxy->names != nullptr; + } +#endif /// Single-character code for dtype's kind. /// For example, floating point types are 'f' and integral types are 'i'. @@ -639,11 +730,29 @@ class dtype : public object { /// Single character for byteorder char byteorder() const { return detail::array_descriptor_proxy(m_ptr)->byteorder; } - /// Alignment of the data type +/// Alignment of the data type +#ifdef PYBIND11_NUMPY_1_ONLY int alignment() const { return detail::array_descriptor_proxy(m_ptr)->alignment; } +#else + ssize_t alignment() const { + if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) { + return detail::array_descriptor1_proxy(m_ptr)->alignment; + } + return detail::array_descriptor2_proxy(m_ptr)->alignment; + } +#endif - /// Flags for the array descriptor +/// Flags for the array descriptor +#ifdef PYBIND11_NUMPY_1_ONLY char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; } +#else + std::uint64_t flags() const { + if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) { + return (unsigned char) detail::array_descriptor1_proxy(m_ptr)->flags; + } + return detail::array_descriptor2_proxy(m_ptr)->flags; + } +#endif private: static object &_dtype_from_pep3118() { @@ -810,9 +919,7 @@ class array : public buffer { } /// Byte size of a single element - ssize_t itemsize() const { - return detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize; - } + ssize_t itemsize() const { return dtype().itemsize(); } /// Total number of bytes ssize_t nbytes() const { return size() * itemsize(); } diff --git a/tests/conftest.py b/tests/conftest.py index ad5b47b4b3..8ebc702224 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -218,4 +218,5 @@ def pytest_report_header(config): f" {pybind11_tests.cpp_std}" f" {pybind11_tests.PYBIND11_INTERNALS_ID}" f" PYBIND11_SIMPLE_GIL_MANAGEMENT={pybind11_tests.PYBIND11_SIMPLE_GIL_MANAGEMENT}" + f" PYBIND11_NUMPY_1_ONLY={pybind11_tests.PYBIND11_NUMPY_1_ONLY}" ) diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp index 4578e4b7fa..81869ebe21 100644 --- a/tests/pybind11_tests.cpp +++ b/tests/pybind11_tests.cpp @@ -95,6 +95,12 @@ PYBIND11_MODULE(pybind11_tests, m) { #else false; #endif + m.attr("PYBIND11_NUMPY_1_ONLY") = +#if defined(PYBIND11_NUMPY_1_ONLY) + true; +#else + false; +#endif bind_ConstructorStats(m); diff --git a/tests/test_eigen_matrix.py b/tests/test_eigen_matrix.py index a486c2f93b..9a2cafc0a6 100644 --- a/tests/test_eigen_matrix.py +++ b/tests/test_eigen_matrix.py @@ -608,7 +608,9 @@ def test_both_ref_mutators(): def test_nocopy_wrapper(): # get_elem requires a column-contiguous matrix reference, but should be # callable with other types of matrix (via copying): - int_matrix_colmajor = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], order="F") + int_matrix_colmajor = np.array( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="l", order="F" + ) dbl_matrix_colmajor = np.array( int_matrix_colmajor, dtype="double", order="F", copy=True ) diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py index 0697daf3ef..25ad09ec3c 100644 --- a/tests/test_numpy_array.py +++ b/tests/test_numpy_array.py @@ -536,7 +536,12 @@ def test_format_descriptors_for_floating_point_types(test_func): @pytest.mark.parametrize("contiguity", [None, "C", "F"]) @pytest.mark.parametrize("noconvert", [False, True]) @pytest.mark.filterwarnings( - "ignore:Casting complex values to real discards the imaginary part:numpy.ComplexWarning" + "ignore:Casting complex values to real discards the imaginary part:" + + ( + "numpy.exceptions.ComplexWarning" + if hasattr(np, "exceptions") + else "numpy.ComplexWarning" + ) ) def test_argument_conversions(forcecast, contiguity, noconvert): function_name = "accept_double" @@ -583,7 +588,8 @@ def test_argument_conversions(forcecast, contiguity, noconvert): def test_dtype_refcount_leak(): from sys import getrefcount - dtype = np.dtype(np.float_) + # Was np.float_ but that alias for float64 was removed in NumPy 2. + dtype = np.dtype(np.float64) a = np.array([1], dtype=dtype) before = getrefcount(dtype) m.ndim(a) diff --git a/tests/test_numpy_dtypes.cpp b/tests/test_numpy_dtypes.cpp index 053004a22f..519b69f400 100644 --- a/tests/test_numpy_dtypes.cpp +++ b/tests/test_numpy_dtypes.cpp @@ -405,10 +405,35 @@ TEST_SUBMODULE(numpy_dtypes, m) { }); // test_dtype + // Below we use `L` for unsigned long as unfortunately the only name that + // works reliably on Both NumPy 2.x and old NumPy 1.x. std::vector dtype_names{ - "byte", "short", "intc", "int_", "longlong", "ubyte", "ushort", - "uintc", "uint", "ulonglong", "half", "single", "double", "longdouble", - "csingle", "cdouble", "clongdouble", "bool_", "datetime64", "timedelta64", "object_"}; + "byte", + "short", + "intc", + "long", + "longlong", + "ubyte", + "ushort", + "uintc", + "L", + "ulonglong", + "half", + "single", + "double", + "longdouble", + "csingle", + "cdouble", + "clongdouble", + "bool_", + "datetime64", + "timedelta64", + "object_", + // platform dependent aliases (int_ and uint are also NumPy version dependent on windows) + "int_", + "uint", + "intp", + "uintp"}; m.def("print_dtypes", []() { py::list l; diff --git a/tests/test_numpy_dtypes.py b/tests/test_numpy_dtypes.py index d10457eeb2..e7854df4a6 100644 --- a/tests/test_numpy_dtypes.py +++ b/tests/test_numpy_dtypes.py @@ -3,6 +3,7 @@ import pytest import env # noqa: F401 +from pybind11_tests import PYBIND11_NUMPY_1_ONLY from pybind11_tests import numpy_dtypes as m np = pytest.importorskip("numpy") @@ -172,13 +173,20 @@ def test_dtype(simple_dtype): np.zeros(1, m.trailing_padding_dtype()) ) - expected_chars = "bhilqBHILQefdgFDG?MmO" - assert m.test_dtype_kind() == list("iiiiiuuuuuffffcccbMmO") + expected_chars = list("bhilqBHILQefdgFDG?MmO") + # Note that int_ and uint size and mapping is NumPy version dependent: + expected_chars += [np.dtype(_).char for _ in ("int_", "uint", "intp", "uintp")] + assert m.test_dtype_kind() == list("iiiiiuuuuuffffcccbMmOiuiu") assert m.test_dtype_char_() == list(expected_chars) assert m.test_dtype_num() == [np.dtype(ch).num for ch in expected_chars] assert m.test_dtype_byteorder() == [np.dtype(ch).byteorder for ch in expected_chars] assert m.test_dtype_alignment() == [np.dtype(ch).alignment for ch in expected_chars] - assert m.test_dtype_flags() == [chr(np.dtype(ch).flags) for ch in expected_chars] + if not PYBIND11_NUMPY_1_ONLY: + assert m.test_dtype_flags() == [np.dtype(ch).flags for ch in expected_chars] + else: + assert m.test_dtype_flags() == [ + chr(np.dtype(ch).flags) for ch in expected_chars + ] def test_recarray(simple_dtype, packed_dtype):