From bae5f3d40f5d43eca678c7da65200b53060cc274 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 4 Jun 2025 09:11:08 -0700 Subject: [PATCH 01/10] Clean up DPCTL_TARGET_CUDA handling --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a446c19a9..6edb371e15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,10 +55,6 @@ set(_dpctl_amd_targets) if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") if (DPCTL_TARGET_CUDA) set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") - else() - if (DEFINED ENV{DPCTL_TARGET_CUDA}) - set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") - endif() endif() if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x") set(_dpctl_amd_targets ${DPCTL_TARGET_HIP}) From de83f29010649e5805478bc8f474b97af503448e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Jun 2025 03:35:05 -0700 Subject: [PATCH 02/10] Ver 1: Add sm_* offload arch support to DPCTL_TARGET_CUDA --- CMakeLists.txt | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6edb371e15..993fb0e883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,9 +25,10 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS "Build dpctl pybind11 offloading extensions with coverage instrumentation" OFF ) -option(DPCTL_TARGET_CUDA - "Build DPCTL to target CUDA devices" - OFF +set(DPCTL_TARGET_CUDA + "" + CACHE STRING + "Build DPCTL to target CUDA devices. Set to ON to use default architecture (sm_50), or to a specific architecture like sm_80." ) set(DPCTL_TARGET_HIP "" @@ -51,9 +52,20 @@ set(_dpctl_sycl_target_compile_options) set(_dpctl_sycl_target_link_options) set(_dpctl_sycl_targets) +set(_dpctl_cuda_arch) set(_dpctl_amd_targets) if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") - if (DPCTL_TARGET_CUDA) + if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x") + if (DPCTL_TARGET_CUDA STREQUAL "ON") + set(_dpctl_cuda_arch "sm_50") + elseif(DPCTL_TARGET_CUDA MATCHES "^sm_") + set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA}) + else() + message(FATAL_ERROR + "Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". " + "Expected 'ON' or an architecture like 'sm_80'." + ) + endif() set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") endif() if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x") @@ -88,6 +100,10 @@ if (_dpctl_sycl_targets) message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}") list(APPEND _dpctl_sycl_target_compile_options -fsycl-targets=${_dpctl_sycl_targets}) list(APPEND _dpctl_sycl_target_link_options -fsycl-targets=${_dpctl_sycl_targets}) + if (_dpctl_cuda_arch) + list(APPEND _dpctl_sycl_target_compile_options -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${_dpctl_cuda_arch}) + list(APPEND _dpctl_sycl_target_link_options -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${_dpctl_cuda_arch}) + endif() if(_dpctl_amd_targets) list(APPEND _dpctl_sycl_target_compile_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpctl_amd_targets}) list(APPEND _dpctl_sycl_target_link_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpctl_amd_targets}) From 9751a71bcb854b0dd00055f71041dd9072b94fe3 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Jun 2025 05:06:03 -0700 Subject: [PATCH 03/10] Ver 2: Use nvidia_gpu_sm_* alias in -fsycl-targets for CUDA --- CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 993fb0e883..f11cb95c6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,7 @@ if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") "Expected 'ON' or an architecture like 'sm_80'." ) endif() - set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown") endif() if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x") set(_dpctl_amd_targets ${DPCTL_TARGET_HIP}) @@ -100,10 +100,6 @@ if (_dpctl_sycl_targets) message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}") list(APPEND _dpctl_sycl_target_compile_options -fsycl-targets=${_dpctl_sycl_targets}) list(APPEND _dpctl_sycl_target_link_options -fsycl-targets=${_dpctl_sycl_targets}) - if (_dpctl_cuda_arch) - list(APPEND _dpctl_sycl_target_compile_options -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${_dpctl_cuda_arch}) - list(APPEND _dpctl_sycl_target_link_options -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${_dpctl_cuda_arch}) - endif() if(_dpctl_amd_targets) list(APPEND _dpctl_sycl_target_compile_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpctl_amd_targets}) list(APPEND _dpctl_sycl_target_link_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpctl_amd_targets}) From 4a3ecf85ab0e29365e10154f2b3c7f8dc84abd26 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Jun 2025 05:49:16 -0700 Subject: [PATCH 04/10] Update CUDA build docs --- .../beginners_guides/installation.rst | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/doc_sources/beginners_guides/installation.rst b/docs/doc_sources/beginners_guides/installation.rst index afb9c639a9..7bc8410058 100644 --- a/docs/doc_sources/beginners_guides/installation.rst +++ b/docs/doc_sources/beginners_guides/installation.rst @@ -159,13 +159,31 @@ The following plugins from CodePlay are supported: .. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/ .. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/ -``dpctl`` can be built for CUDA devices as follows: +``dpctl`` can be built for CUDA devices using the ``DPCTL_TARGET_CUDA`` CMake option, +which accepts a specific compute architecture string: + +.. code-block:: bash + + python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80" + +To use the default architecture (``sm_50``), use: .. code-block:: bash python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=ON" -And for AMD devices +Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider +range of architectures, but limiting the usage of more recent CUDA features. + +For reference, compute architecture strings like ``sm_80`` are based on +CUDA Compute Capability. A complete mapping between NVIDIA GPU models and their +respective ``sm_XX`` values can be found in the official +`CUDA GPU Compute Capability `_. + +A full list of available SYCL alias targets is available in the +`DPC++ Compiler User Manual `_. + +To build for AMD devices, use: .. code-block:: bash From 80f7bc70a91b9f9ae2c39a30fcaadc376e274ecc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Jun 2025 11:35:52 -0700 Subject: [PATCH 05/10] Improve robustness of DPCTL_TARGET_CUDA handling --- CMakeLists.txt | 10 ++++++---- docs/doc_sources/beginners_guides/installation.rst | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f11cb95c6b..08411e6ddf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,20 +54,22 @@ set(_dpctl_sycl_target_link_options) set(_dpctl_sycl_targets) set(_dpctl_cuda_arch) set(_dpctl_amd_targets) + if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x") - if (DPCTL_TARGET_CUDA STREQUAL "ON") - set(_dpctl_cuda_arch "sm_50") - elseif(DPCTL_TARGET_CUDA MATCHES "^sm_") + if(DPCTL_TARGET_CUDA MATCHES "^sm_") set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA}) + elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") + set(_dpctl_cuda_arch "sm_50") else() message(FATAL_ERROR "Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". " - "Expected 'ON' or an architecture like 'sm_80'." + "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'." ) endif() set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown") endif() + if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x") set(_dpctl_amd_targets ${DPCTL_TARGET_HIP}) if(_dpctl_sycl_targets) diff --git a/docs/doc_sources/beginners_guides/installation.rst b/docs/doc_sources/beginners_guides/installation.rst index 7bc8410058..74aeae6cf0 100644 --- a/docs/doc_sources/beginners_guides/installation.rst +++ b/docs/doc_sources/beginners_guides/installation.rst @@ -166,7 +166,8 @@ which accepts a specific compute architecture string: python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80" -To use the default architecture (``sm_50``), use: +To use the default architecture (``sm_50``), +set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, or ``1``: .. code-block:: bash From d395910a1af13dc94faaac81ee2c53bcf3b00160 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Jun 2025 11:43:04 -0700 Subject: [PATCH 06/10] Update DPCTL_TARGET_CUDA option description --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 08411e6ddf..6d697b5344 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS set(DPCTL_TARGET_CUDA "" CACHE STRING - "Build DPCTL to target CUDA devices. Set to ON to use default architecture (sm_50), or to a specific architecture like sm_80." + "Build DPCTL to target CUDA devices. Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), or to a specific architecture like sm_80." ) set(DPCTL_TARGET_HIP "" From 758e00f4dc29e01899a6c1d18572f13f2d15aed7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Jun 2025 04:57:17 -0700 Subject: [PATCH 07/10] Apply remarks --- CMakeLists.txt | 4 +++- docs/doc_sources/beginners_guides/installation.rst | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d697b5344..9d4fe731c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,9 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS set(DPCTL_TARGET_CUDA "" CACHE STRING - "Build DPCTL to target CUDA devices. Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), or to a specific architecture like sm_80." + "Build DPCTL to target CUDA device. " + "Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), " + "or to a specific architecture like sm_80." ) set(DPCTL_TARGET_HIP "" diff --git a/docs/doc_sources/beginners_guides/installation.rst b/docs/doc_sources/beginners_guides/installation.rst index 74aeae6cf0..44d46797ba 100644 --- a/docs/doc_sources/beginners_guides/installation.rst +++ b/docs/doc_sources/beginners_guides/installation.rst @@ -176,10 +176,11 @@ set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, o Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider range of architectures, but limiting the usage of more recent CUDA features. -For reference, compute architecture strings like ``sm_80`` are based on -CUDA Compute Capability. A complete mapping between NVIDIA GPU models and their -respective ``sm_XX`` values can be found in the official -`CUDA GPU Compute Capability `_. +For reference, compute architecture strings like ``sm_80`` correspond to specific +CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``). +A complete mapping between NVIDIA GPU models and their respective +Compute Capabilities can be found in the official +`CUDA GPU Compute Capability `_ documentation. A full list of available SYCL alias targets is available in the `DPC++ Compiler User Manual `_. From feee9480e7b3be359023acab51937b0145ab501f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Jun 2025 07:25:50 -0700 Subject: [PATCH 08/10] Use string(CONCAT) for multi-line DPCTL_TARGET_CUDA description --- CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d4fe731c2..3aac1d2e74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,13 +25,16 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS "Build dpctl pybind11 offloading extensions with coverage instrumentation" OFF ) -set(DPCTL_TARGET_CUDA - "" - CACHE STRING +string(CONCAT _desc_target_cuda "Build DPCTL to target CUDA device. " "Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), " "or to a specific architecture like sm_80." ) +set(DPCTL_TARGET_CUDA + "" + CACHE STRING + "${_desc_target_cuda}" +) set(DPCTL_TARGET_HIP "" CACHE STRING From 44f80a2ca3f3a7c5172cd4cc3d02674043029c42 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 05:46:07 -0700 Subject: [PATCH 09/10] Enable false value support for DPCTL_TARGET_CUDA --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3aac1d2e74..393d2be0c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ set(_dpctl_cuda_arch) set(_dpctl_amd_targets) if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") - if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x") + if (DPCTL_TARGET_CUDA) if(DPCTL_TARGET_CUDA MATCHES "^sm_") set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA}) elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$") From 2843aaa2290c56582525a7bad4eca2cbf18bd8c6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 10 Jun 2025 05:52:21 -0700 Subject: [PATCH 10/10] Use \ instead of string(CONCAT) --- CMakeLists.txt | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 393d2be0c8..125621d94d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,15 +25,13 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS "Build dpctl pybind11 offloading extensions with coverage instrumentation" OFF ) -string(CONCAT _desc_target_cuda - "Build DPCTL to target CUDA device. " - "Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), " - "or to a specific architecture like sm_80." -) + set(DPCTL_TARGET_CUDA "" CACHE STRING - "${_desc_target_cuda}" + "Build DPCTL to target CUDA device. \ +Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \ +or to a specific architecture like sm_80." ) set(DPCTL_TARGET_HIP ""