diff --git a/libcudacxx/include/cuda/__container/buffer.h b/libcudacxx/include/cuda/__container/buffer.h index 61dd1f279c9..4a8dda97d7b 100644 --- a/libcudacxx/include/cuda/__container/buffer.h +++ b/libcudacxx/include/cuda/__container/buffer.h @@ -4,7 +4,7 @@ // under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// @@ -724,7 +724,7 @@ _CCCL_TEMPLATE(class _Tp, _CCCL_REQUIRES( ::cuda::mr::synchronous_resource_with<::cuda::std::decay_t<_Resource>, _FirstProperty, _RestProperties...> _CCCL_AND __buffer_compatible_env<_Env>) -buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( +_CCCL_HOST_API buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( stream_ref __stream, _Resource&& __mr, const buffer<_Tp, _SourceProperties...>& __source, const _Env& __env = {}) { buffer<_Tp, _FirstProperty, _RestProperties...> __res{ @@ -742,13 +742,13 @@ buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer( +_CCCL_HOST_API auto make_buffer( stream_ref __stream, _Resource&& __mr, const buffer<_Tp, _SourceProperties...>& __source, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class... _SourceProperties, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES(::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<::cuda::std::decay_t<_Resource>>) -auto make_buffer( +_CCCL_HOST_API auto make_buffer( stream_ref __stream, _Resource&& __mr, const buffer<_Tp, _SourceProperties...>& __source, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; @@ -778,13 +778,13 @@ make_buffer(stream_ref __stream, _Resource&& __mr, const _Env& __env = {}) //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer(stream_ref __stream, _Resource&& __mr, const _Env& __env = {}); +_CCCL_HOST_API auto make_buffer(stream_ref __stream, _Resource&& __mr, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES(::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<::cuda::std::decay_t<_Resource>> _CCCL_AND __buffer_compatible_env<_Env>) -auto make_buffer(stream_ref __stream, _Resource&& __mr, const _Env& __env = {}) +_CCCL_HOST_API auto make_buffer(stream_ref __stream, _Resource&& __mr, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; return __buffer_type{__stream, ::cuda::std::forward<_Resource>(__mr), __env}; @@ -799,7 +799,7 @@ _CCCL_TEMPLATE( _CCCL_REQUIRES( ::cuda::mr::synchronous_resource_with<::cuda::std::decay_t<_Resource>, _FirstProperty, _RestProperties...> _CCCL_AND __buffer_compatible_env<_Env>) -buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( +_CCCL_HOST_API buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( stream_ref __stream, _Resource&& __mr, size_t __size, const _Tp& __value, [[maybe_unused]] const _Env& __env = {}) { auto __res = @@ -817,12 +817,13 @@ buffer<_Tp, _FirstProperty, _RestProperties...> make_buffer( //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, const _Tp& __value, const _Env& __env = {}); +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, const _Tp& __value, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES(::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<::cuda::std::decay_t<_Resource>>) -auto make_buffer( +_CCCL_HOST_API auto make_buffer( stream_ref __stream, _Resource&& __mr, size_t __size, const _Tp& __value, [[maybe_unused]] const _Env& __env = {}) { using __default_queries = typename ::cuda::std::decay_t<_Resource>::default_queries; @@ -856,12 +857,14 @@ make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, ::cuda::no_ini //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, ::cuda::no_init_t, const _Env& __env = {}); +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, ::cuda::no_init_t, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES(::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<_Resource>) -auto make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, ::cuda::no_init_t, const _Env& __env = {}) +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, size_t __size, ::cuda::no_init_t, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; return __buffer_type{__stream, ::cuda::std::forward<_Resource>(__mr), __size, ::cuda::no_init, __env}; @@ -893,13 +896,15 @@ make_buffer(stream_ref __stream, _Resource&& __mr, _Iter __first, _Iter __last, //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer(stream_ref __stream, _Resource&& __mr, _Iter __first, _Iter __last, const _Env& __env = {}); +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, _Iter __first, _Iter __last, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Iter, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES( ::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<_Resource> _CCCL_AND ::cuda::std::__has_forward_traversal<_Iter>) -auto make_buffer(stream_ref __stream, _Resource&& __mr, _Iter __first, _Iter __last, const _Env& __env = {}) +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, _Iter __first, _Iter __last, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; return __buffer_type{__stream, ::cuda::std::forward<_Resource>(__mr), __first, __last, __env}; @@ -926,14 +931,14 @@ make_buffer(stream_ref __stream, _Resource&& __mr, ::cuda::std::initializer_list //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer( - stream_ref __stream, _Resource&& __mr, ::cuda::std::initializer_list<_Tp> __ilist, const _Env& __env = {}); +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, ::cuda::std::initializer_list<_Tp> __ilist, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES(::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries<::cuda::std::decay_t<_Resource>>) -auto make_buffer( - stream_ref __stream, _Resource&& __mr, ::cuda::std::initializer_list<_Tp> __ilist, const _Env& __env = {}) +_CCCL_HOST_API auto +make_buffer(stream_ref __stream, _Resource&& __mr, ::cuda::std::initializer_list<_Tp> __ilist, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; return __buffer_type{__stream, ::cuda::std::forward<_Resource>(__mr), __ilist, __env}; @@ -964,13 +969,13 @@ make_buffer(stream_ref __stream, _Resource&& __mr, _Range&& __range, const _Env& //! @param __env The environment providing additional configuration. # ifdef _CCCL_DOXYGEN_INVOKED template > -auto make_buffer(stream_ref __stream, _Resource&& __mr, _Range&& __range, const _Env& __env = {}); +_CCCL_HOST_API auto make_buffer(stream_ref __stream, _Resource&& __mr, _Range&& __range, const _Env& __env = {}); # else // ^^^ _CCCL_DOXYGEN_INVOKED ^^^ / vvv !_CCCL_DOXYGEN_INVOKED vvv _CCCL_TEMPLATE(class _Tp, class _Resource, class _Range, class _Env = ::cuda::std::execution::env<>) _CCCL_REQUIRES( ::cuda::mr::synchronous_resource<::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::mr::__has_default_queries< ::cuda::std::decay_t<_Resource>> _CCCL_AND ::cuda::std::ranges::forward_range<_Range>) -auto make_buffer(stream_ref __stream, _Resource&& __mr, _Range&& __range, const _Env& __env = {}) +_CCCL_HOST_API auto make_buffer(stream_ref __stream, _Resource&& __mr, _Range&& __range, const _Env& __env = {}) { using __buffer_type = __buffer_type_for_props<_Tp, typename ::cuda::std::decay_t<_Resource>::default_queries>; return __buffer_type{__stream, ::cuda::std::forward<_Resource>(__mr), ::cuda::std::forward<_Range>(__range), __env}; diff --git a/libcudacxx/include/cuda/__container/make_buffer_with_pool.h b/libcudacxx/include/cuda/__container/make_buffer_with_pool.h new file mode 100644 index 00000000000..42e653664dc --- /dev/null +++ b/libcudacxx/include/cuda/__container/make_buffer_with_pool.h @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___CONTAINER_MAKE_BUFFER_WITH_POOL_H +#define _CUDA___CONTAINER_MAKE_BUFFER_WITH_POOL_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if _CCCL_HAS_CTK() + +# include +# include + +# if _CCCL_CTK_AT_LEAST(12, 9) +# include +# endif // _CCCL_CTK_AT_LEAST(12, 9) + +# if _CCCL_CTK_AT_LEAST(13, 0) +# include +# endif // _CCCL_CTK_AT_LEAST(13, 0) + +# include + +_CCCL_BEGIN_NAMESPACE_CUDA + +//! @brief Creates a buffer backed by the default device memory pool. +//! @param __stream The stream used for allocation. +//! @param __device The device whose default memory pool will be used. +//! @param __args Remaining arguments forwarded to `make_buffer`. +//! @see make_buffer for the full set of supported argument combinations. +template +_CCCL_HOST_API auto make_device_buffer(stream_ref __stream, device_ref __device, _Args&&... __args) +{ + return ::cuda::make_buffer<_Tp>( + __stream, ::cuda::device_default_memory_pool(__device), ::cuda::std::forward<_Args>(__args)...); +} + +# if _CCCL_CTK_AT_LEAST(12, 9) + +//! @brief Creates a buffer backed by the default pinned memory pool. +//! @param __stream The stream used for allocation. +//! @param __args Remaining arguments forwarded to `make_buffer`. +//! @see make_buffer for the full set of supported argument combinations. +template +_CCCL_HOST_API auto make_pinned_buffer(stream_ref __stream, _Args&&... __args) +{ + return ::cuda::make_buffer<_Tp>( + __stream, ::cuda::pinned_default_memory_pool(), ::cuda::std::forward<_Args>(__args)...); +} + +# endif // _CCCL_CTK_AT_LEAST(12, 9) + +# if _CCCL_CTK_AT_LEAST(13, 0) + +//! @brief Creates a buffer backed by the default managed memory pool. +//! @param __stream The stream used for allocation. +//! @param __args Remaining arguments forwarded to `make_buffer`. +//! @see make_buffer for the full set of supported argument combinations. +template +_CCCL_HOST_API auto make_managed_buffer(stream_ref __stream, _Args&&... __args) +{ + return ::cuda::make_buffer<_Tp>( + __stream, ::cuda::managed_default_memory_pool(), ::cuda::std::forward<_Args>(__args)...); +} + +# endif // _CCCL_CTK_AT_LEAST(13, 0) + +_CCCL_END_NAMESPACE_CUDA + +# include + +#endif // _CCCL_HAS_CTK() + +#endif // _CUDA___CONTAINER_MAKE_BUFFER_WITH_POOL_H diff --git a/libcudacxx/include/cuda/buffer b/libcudacxx/include/cuda/buffer index a999eaa109e..379f2b237d1 100644 --- a/libcudacxx/include/cuda/buffer +++ b/libcudacxx/include/cuda/buffer @@ -4,7 +4,7 @@ // under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// @@ -23,5 +23,6 @@ #include #include +#include #endif // _CUDA_BUFFER diff --git a/libcudacxx/test/libcudacxx/cuda/containers/buffer/constructor.cu b/libcudacxx/test/libcudacxx/cuda/containers/buffer/constructor.cu index 661f176ce45..1d18d5ceee1 100644 --- a/libcudacxx/test/libcudacxx/cuda/containers/buffer/constructor.cu +++ b/libcudacxx/test/libcudacxx/cuda/containers/buffer/constructor.cu @@ -4,7 +4,7 @@ // under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// @@ -513,3 +513,162 @@ C2H_CCCLRT_TEST("cuda::make_buffer with shared_resource", "[container][buffer]") static_assert(decltype(buf)::properties_list::has_property(cuda::mr::device_accessible{})); static_assert(!decltype(buf)::properties_list::has_property(cuda::mr::host_accessible{})); } + +// ── make_device_buffer ────────────────────────────────────────────────────── + +C2H_CCCLRT_TEST("cuda::make_device_buffer", "[container][buffer]") +{ + cuda::stream stream{cuda::device_ref{0}}; + cuda::device_ref dev{0}; + + SECTION("empty") + { + auto buf = cuda::make_device_buffer(stream, dev); + CCCLRT_CHECK(buf.empty()); + CCCLRT_CHECK(buf.data() == nullptr); + STATIC_CHECK(decltype(buf)::properties_list::has_property(cuda::mr::device_accessible{})); + STATIC_CHECK(!decltype(buf)::properties_list::has_property(cuda::mr::host_accessible{})); + } + + SECTION("size with value") + { + auto buf = cuda::make_device_buffer(stream, dev, 5, 42); + CCCLRT_CHECK(buf.size() == 5); + CCCLRT_CHECK(equal_size_value(buf, 5, 42)); + } + + SECTION("size with no_init") + { + auto buf = cuda::make_device_buffer(stream, dev, 10, cuda::no_init); + CCCLRT_CHECK(buf.size() == 10); + CCCLRT_CHECK(buf.data() != nullptr); + } + + SECTION("iterator range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_device_buffer(stream, dev, input.begin(), input.end()); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + SECTION("range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_device_buffer(stream, dev, input); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + stream.sync(); +} + +// ── make_pinned_buffer ────────────────────────────────────────────────────── + +#if _CCCL_CTK_AT_LEAST(12, 9) +C2H_CCCLRT_TEST("cuda::make_pinned_buffer", "[container][buffer]") +{ + if (!cuda::__is_host_memory_pool_supported()) + { + return; + } + + cuda::stream stream{cuda::device_ref{0}}; + + SECTION("empty") + { + auto buf = cuda::make_pinned_buffer(stream); + CCCLRT_CHECK(buf.empty()); + CCCLRT_CHECK(buf.data() == nullptr); + STATIC_CHECK(decltype(buf)::properties_list::has_property(cuda::mr::host_accessible{})); + STATIC_CHECK(decltype(buf)::properties_list::has_property(cuda::mr::device_accessible{})); + } + + SECTION("size with value") + { + auto buf = cuda::make_pinned_buffer(stream, 5, 42); + CCCLRT_CHECK(buf.size() == 5); + CCCLRT_CHECK(equal_size_value(buf, 5, 42)); + } + + SECTION("size with no_init") + { + auto buf = cuda::make_pinned_buffer(stream, 10, cuda::no_init); + CCCLRT_CHECK(buf.size() == 10); + CCCLRT_CHECK(buf.data() != nullptr); + } + + SECTION("iterator range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_pinned_buffer(stream, input.begin(), input.end()); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + SECTION("range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_pinned_buffer(stream, input); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + stream.sync(); +} +#endif // _CCCL_CTK_AT_LEAST(12, 9) + +// ── make_managed_buffer ───────────────────────────────────────────────────── + +#if _CCCL_CTK_AT_LEAST(13, 0) +C2H_CCCLRT_TEST("cuda::make_managed_buffer", "[container][buffer]") +{ + if (!cuda::device_attributes::concurrent_managed_access(cuda::device_ref{0})) + { + return; + } + + cuda::stream stream{cuda::device_ref{0}}; + + SECTION("empty") + { + auto buf = cuda::make_managed_buffer(stream); + CCCLRT_CHECK(buf.empty()); + CCCLRT_CHECK(buf.data() == nullptr); + STATIC_CHECK(decltype(buf)::properties_list::has_property(cuda::mr::host_accessible{})); + STATIC_CHECK(decltype(buf)::properties_list::has_property(cuda::mr::device_accessible{})); + } + + SECTION("size with value") + { + auto buf = cuda::make_managed_buffer(stream, 5, 42); + CCCLRT_CHECK(buf.size() == 5); + CCCLRT_CHECK(equal_size_value(buf, 5, 42)); + } + + SECTION("size with no_init") + { + auto buf = cuda::make_managed_buffer(stream, 10, cuda::no_init); + CCCLRT_CHECK(buf.size() == 10); + CCCLRT_CHECK(buf.data() != nullptr); + } + + SECTION("iterator range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_managed_buffer(stream, input.begin(), input.end()); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + SECTION("range") + { + cuda::std::array input{1, 42, 1337, 0, 12, -1}; + auto buf = cuda::make_managed_buffer(stream, input); + CCCLRT_CHECK(buf.size() == 6); + CCCLRT_CHECK(equal_range(buf)); + } + + stream.sync(); +} +#endif // _CCCL_CTK_AT_LEAST(13, 0)