From 812b41b9b4de505fc2a816882ee2a8d0325fad4b Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Sun, 22 Sep 2024 01:17:58 +0300 Subject: [PATCH] Regards #681: Added a fatbin builder based on NVIDIA's libnvfatbin and bumped version to 0.8.0; but... * not tested it yet * need to add the nvFatbin documentation example program, rewritten to use the APIs --- .github/action-scripts/install-cuda-ubuntu.sh | 40 +-- .../action-scripts/install-cuda-windows.ps1 | 9 +- CMakeLists.txt | 30 +- README.md | 3 +- examples/CMakeLists.txt | 2 +- examples/other/new_cpp_standard/main.cpp | 4 + src/cuda/api/types.hpp | 5 + src/cuda/api/versions.hpp | 24 ++ src/cuda/fatbin.hpp | 20 ++ src/cuda/fatbin/builder.hpp | 262 ++++++++++++++++++ src/cuda/fatbin/builder_options.hpp | 121 ++++++++ src/cuda/fatbin/error.hpp | 190 +++++++++++++ src/cuda/fatbin/types.hpp | 29 ++ src/cuda/fatbin/versions.hpp | 45 +++ 14 files changed, 753 insertions(+), 31 deletions(-) create mode 100644 src/cuda/fatbin.hpp create mode 100644 src/cuda/fatbin/builder.hpp create mode 100644 src/cuda/fatbin/builder_options.hpp create mode 100644 src/cuda/fatbin/error.hpp create mode 100644 src/cuda/fatbin/types.hpp create mode 100644 src/cuda/fatbin/versions.hpp diff --git a/.github/action-scripts/install-cuda-ubuntu.sh b/.github/action-scripts/install-cuda-ubuntu.sh index 728f4fba..d81a32f2 100755 --- a/.github/action-scripts/install-cuda-ubuntu.sh +++ b/.github/action-scripts/install-cuda-ubuntu.sh @@ -11,27 +11,10 @@ # -## ------------------- -## Constants -## ------------------- - # @todo - apt repos/known supported versions? # @todo - GCC support matrix? -# List of sub-packages to install. -# @todo - pass this in from outside the script? -# @todo - check the specified subpackages exist via apt pre-install? apt-rdepends cuda-9-0 | grep "^cuda-"? - -# Ideally choose from the list of meta-packages to minimise variance between cuda versions (although it does change too) -CUDA_PACKAGES_IN=( - "command-line-tools" - "nvrtc-dev" - "cudart-dev" - "nvcc" - "profiler-api" -) - ## ------------------- ## Bash functions ## ------------------- @@ -56,6 +39,7 @@ function version_lt() { [ "$1" = "$2" ] && return 1 || version_le $1 $2 } + ## ------------------- ## Select CUDA version ## ------------------- @@ -93,6 +77,27 @@ if [ -z ${UBUNTU_VERSION} ]; then exit 1 fi +## ------------------- +## List of sub-packages to install +## ------------------- + +# @todo: Use pairs of package name and minimum version (or version range) + +# Ideally choose from the list of meta-packages to minimise variance between cuda versions (although it does change too) +CUDA_PACKAGES_IN=( + "command-line-tools" + "nvrtc-dev" + "cudart-dev" + "nvcc" + "profiler-api" +) + +if version_ge "$CUDA_VERSION_MAJOR_MINOR" "12.4"; then + CUDA_PACKAGES_IN+=( + "libnvfatbin" + "libnvfatbin-dev" + ) +fi ## --------------------------- ## GCC studio support check? @@ -105,6 +110,7 @@ fi ## ------------------------------- CUDA_PACKAGES="" + for package in "${CUDA_PACKAGES_IN[@]}" do : if [[ "${package}" == "profiler-api" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "11.8" ; then diff --git a/.github/action-scripts/install-cuda-windows.ps1 b/.github/action-scripts/install-cuda-windows.ps1 index 35188f3e..05304adc 100755 --- a/.github/action-scripts/install-cuda-windows.ps1 +++ b/.github/action-scripts/install-cuda-windows.ps1 @@ -120,12 +120,9 @@ $VISUAL_STUDIO_YEAR = $VISUAL_STUDIO.Substring($VISUAL_STUDIO.Length-4) $CUDA_PACKAGES = "" -# for CUDA >= 11 cudart is a required package. -# if([version]$CUDA_VERSION_FULL -ge [version]"11.0") { -# if(-not $CUDA_PACKAGES_IN -contains "cudart") { -# $CUDA_PACKAGES_IN += 'cudart' -# } -# } +if([version]$CUDA_VERSION_FULL -ge [version]"12.4") { + $CUDA_PACKAGES_IN += "nvfatbin" +} Foreach ($package in $CUDA_PACKAGES_IN) { # Make sure the correct package name is used for nvcc. diff --git a/CMakeLists.txt b/CMakeLists.txt index 02c548e6..43be50b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.25 FATAL_ERROR) PROJECT(cuda-api-wrappers - VERSION 0.7.2 + VERSION 0.8.0 DESCRIPTION "Thin C++-flavored wrappers for the CUDA Runtime API" HOMEPAGE_URL https://github.com/eyalroz/cuda-api-wrappers ) @@ -25,8 +25,8 @@ if(libm_exists) set(c_math_lib m) endif() -if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) - foreach(tgt in nvptxcompiler nvptxcompiler_static) +if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) + foreach(tgt in nvfatbin nvfatbin_static) if (NOT TARGET ${tgt}) _CUDAToolkit_find_and_add_import_lib(${tgt}) endif() @@ -35,7 +35,7 @@ endif() set(CMAKE_THREAD_PREFER_PTHREAD TRUE) -set(targets runtime-and-driver nvtx rtc) +set(targets runtime-and-driver nvtx rtc fatbin) set(prefixed-targets "") set(caw_namespace "cuda-api-wrappers") @@ -79,13 +79,29 @@ add_library("${caw_namespace}::driver-and-runtime" ALIAS caw_runtime-and-driver) target_link_libraries(caw_rtc INTERFACE cuda-api-wrappers::runtime-and-driver CUDA::nvrtc) if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) if (TARGET CUDA::nvptxcompiler) + target_link_libraries(caw_rtc INTERFACE CUDA::nvptxcompiler) set(ptx_compiler_target nvptxcompiler) + elseif (TARGET CUDA::nvptxcompiler_static) + target_link_libraries(caw_rtc INTERFACE CUDA::nvptxcompiler_static) + else() + message(WARNING "No valid NVIDIA PTX Compiler target is available") + endif() +endif() +if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) + if (TARGET CUDA::nvfatbin) + target_link_libraries(caw_fatbin INTERFACE CUDA::nvfatbin) + elseif (TARGET CUDA::nvfatbin) + target_link_libraries(caw_fatbin INTERFACE CUDA::nvfatbin_static) + elseif(EXISTS "${CUDA_nvfatbin_LIBRARY}") + target_link_libraries(caw_fatbin INTERFACE "${CUDA_nvfatbin_LIBRARY}") + elseif(EXISTS "${CUDA_nvfatbin_static_LIBRARY}") + target_link_libraries(caw_fatbin INTERFACE "${CUDA_nvfatbin_static_LIBRARY}") else() - set(ptx_compiler_target nvptxcompiler_static) + message(WARNING "Could not locate a valid NVIDIA fatbin creator target or library file") endif() - target_link_libraries(caw_rtc INTERFACE CUDA::${ptx_compiler_target}) endif() +target_link_libraries(caw_fatbin INTERFACE cuda-api-wrappers::runtime-and-driver) target_link_libraries(caw_nvtx INTERFACE cuda-api-wrappers::runtime-and-driver) if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.0) @@ -99,8 +115,10 @@ endif() # but for two settings I won't bother creating one of those if(DEFINED CMAKE_USE_PTHREADS_INIT) target_compile_definitions(caw_nvtx INTERFACE "" "CUDA_API_WRAPPERS_USE_PTHREADS") + target_compile_definitions(caw_fatbin INTERFACE "" "CUDA_API_WRAPPERS_USE_PTHREADS") elseif(DEFINED CMAKE_USE_WIN32_THREADS_INIT) target_compile_definitions(caw_nvtx INTERFACE "" "CUDA_API_WRAPPERS_USE_WIN32_THREADS") + target_compile_definitions(caw_fatbin INTERFACE "" "CUDA_API_WRAPPERS_USE_WIN32_THREADS") endif() # -------- diff --git a/README.md b/README.md index 27d29a0c..c3e504aa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Thin C++-flavored wrappers for the CUDA APIs:
Runtime, Driver, NVRTC, PTX compiler and NVTX +# Thin C++-flavored wrappers for the CUDA APIs:
Runtime, Driver, NVRTC and more @@ -16,6 +16,7 @@ This is a header-only library of integrated wrappers around the core parts of NV * The slightly higher-level CUDA [Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) * NVIDIA's dynamic CUDA code compilation library, [NVRTC](http://docs.nvidia.com/cuda/nvrtc/index.html) * NVIDIA's out-of-driver, full-featured [PTX compiler library](https://docs.nvidia.com/cuda/ptx-compiler-api/index.html) (available since CUDA 11.1) +* NVIDIA's fat binary creation library [nvFatbin](https://docs.nvidia.com/cuda/nvfatbin/index.html) (available since CUDA 12.4) * The NVIDIA profiler in-program API, also known as [NVTX](https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx) (the NVIDIA Toolkit Extensions library). It is intended for those who would otherwise use these APIs directly, to make working with them be more intuitive and consistent, making use of modern C++ language capabilities, programming idioms and best practices. In a nutshell - making CUDA API work more fun :-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index a02e785a..38b214d0 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -216,7 +216,7 @@ if( NOT MSVC ) endif() set(tgt "cpp_${std_version}") add_executable(${tgt} other/new_cpp_standard/main.cpp) - target_link_libraries(${tgt} cuda-api-wrappers::rtc cuda-api-wrappers::nvtx) + target_link_libraries(${tgt} cuda-api-wrappers::rtc cuda-api-wrappers::nvtx cuda-api-wrappers::fatbin) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_libraries(${tgt} stdc++fs) endif() diff --git a/examples/other/new_cpp_standard/main.cpp b/examples/other/new_cpp_standard/main.cpp index b3cae564..e6729979 100644 --- a/examples/other/new_cpp_standard/main.cpp +++ b/examples/other/new_cpp_standard/main.cpp @@ -13,6 +13,7 @@ #include #endif #include +#include #include #include @@ -42,6 +43,9 @@ int main() auto nvrtc_version = cuda::version_numbers::nvrtc(); (void) nvrtc_version; + auto fatbin_version = cuda::version_numbers::fatbin(); + (void) fatbin_version; + #ifndef _MSC_VER auto nvtx_color_yellow = cuda::profiling::color_t::from_hex(0x0FFFF00); (void) nvtx_color_yellow; diff --git a/src/cuda/api/types.hpp b/src/cuda/api/types.hpp index 63f0bd2e..6ec1ddb3 100644 --- a/src/cuda/api/types.hpp +++ b/src/cuda/api/types.hpp @@ -19,6 +19,11 @@ #ifndef CUDA_API_WRAPPERS_COMMON_TYPES_HPP_ #define CUDA_API_WRAPPERS_COMMON_TYPES_HPP_ +#ifdef _MSC_VER +// See @url https://stackoverflow.com/q/4913922/1593077 +#define NOMINMAX +#endif + #if (__cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L)) #error "The CUDA API headers can only be compiled with C++11 or a later version of the C++ language standard" #endif diff --git a/src/cuda/api/versions.hpp b/src/cuda/api/versions.hpp index d0cd4c44..763e1f28 100644 --- a/src/cuda/api/versions.hpp +++ b/src/cuda/api/versions.hpp @@ -12,8 +12,14 @@ #include "error.hpp" +#if CUDA_VERSION >= 12040 +#include +#endif + #include #include +#include + namespace cuda { @@ -161,6 +167,24 @@ inline version_t runtime() { return version_t::from_single_number(version); } +#if CUDA_VERSION >= 12040 + +inline version_t fatbin() { + unsigned int major { 0 }, minor { 0 }; + + auto status = nvFatbinVersion(&major, &minor); + throw_if_error_lazy(status, "Failed obtaining the nvfatbin library version"); +#ifndef NDEBUG + if ((major == 0) or (major > ::std::numeric_limits::max()) + or (minor == 0) or (minor > ::std::numeric_limits::max())) { + throw ::std::logic_error("Invalid version encountered: (" + + ::std::to_string(major) + ", " + ::std::to_string(minor) + ')' ); + } +#endif + return version_t{ static_cast(major), static_cast(minor) }; +} +#endif // CUDA_VERSION >= 12040 + } // namespace version_numbers } // namespace cuda diff --git a/src/cuda/fatbin.hpp b/src/cuda/fatbin.hpp new file mode 100644 index 00000000..002ba527 --- /dev/null +++ b/src/cuda/fatbin.hpp @@ -0,0 +1,20 @@ +/** + * @file + * + * @brief A single file which includes, in turn, the CUDA + * fatbin creator library API wrappers and related headers. + */ +#pragma once +#ifndef CUDA_FATBIN_WRAPPERS_HPP_ +#define CUDA_FATBIN_WRAPPERS_HPP_ + +#if CUDA_VERSION >= 12040 + +#include "fatbin/types.hpp" +#include "fatbin/error.hpp" +#include "fatbin/builder_options.hpp" +#include "fatbin/builder.hpp" + +#endif // CUDA_VERSION >= 12040 + +#endif // CUDA_FATBIN_WRAPPERS_HPP_ diff --git a/src/cuda/fatbin/builder.hpp b/src/cuda/fatbin/builder.hpp new file mode 100644 index 00000000..cadb9f50 --- /dev/null +++ b/src/cuda/fatbin/builder.hpp @@ -0,0 +1,262 @@ +/** + * @file + * + * @brief Contains the @ref fatbin_builder_t class and related code. + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ +#define CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ + +#if CUDA_VERSION >= 12040 + +#include "../api/detail/region.hpp" +#include "builder_options.hpp" +#include "types.hpp" + +#include + +namespace cuda { + +///@cond +class fatbin_builder_t; +///@endcond + +namespace fatbin_builder { + +inline fatbin_builder_t wrap(handle_t handle, bool take_ownership = false) noexcept; + +inline fatbin_builder_t create(const options_t & options); + +namespace detail_ { + +inline ::std::string identify(handle_t handle) +{ + return "Fatbin builder with handle " + cuda::detail_::ptr_as_hex(handle); +} + +inline ::std::string identify(const fatbin_builder_t&); + +} // namespace detail_ + +} // namespace fatbin_builder + + +class fatbin_builder_t { +public: // type definitions + using size_type = ::size_t; + + struct deleter_type { + void operator()(void * data) { operator delete(data); } + }; + +public: // getters + + fatbin_builder::handle_t handle() const + { return handle_; } + + /// True if this wrapper is responsible for telling CUDA to destroy + /// the fatbin handle upon the wrapper's own destruction + bool is_owning() const noexcept + { return owning; } + +protected: // unsafe actions + + void build_without_size_check_in(memory::region_t target_region) const + { + auto status = nvFatbinGet(handle_, target_region.data()); + throw_if_error_lazy(status, "Failed completing the generation of a fatbin at " + + cuda::detail_::ptr_as_hex(target_region.data())); + } + +public: + size_type size() const + { + size_type result; + auto status = nvFatbinSize(handle_, &result); + throw_if_error_lazy(status, "Failed determining prospective fatbin size for " + fatbin_builder::detail_::identify(*this)); + return result; + } + + void build_in(memory::region_t target_region) const + { + auto required_size = size(); + if (target_region.size() < required_size) { + throw ::std::invalid_argument("Provided region for fatbin creation is of size " + + ::std::to_string(target_region.size()) + " bytes, while the fatbin requires " + ::std::to_string(required_size)); + } + return build_without_size_check_in(target_region); + } + + memory::unique_region build() const + { + auto size_ = size(); + auto ptr = operator new(size_); + memory::region_t target_region{ptr, size_}; + build_in(target_region); + return memory::unique_region(target_region); + } + + void add_ptx_source( + const char* identifier, + span nul_terminated_ptx_source, + device::compute_capability_t target_compute_capability) const // no support for options, for now + { +#ifndef NDEBUG + if (nul_terminated_ptx_source.empty()) { + throw ::std::invalid_argument("Empty PTX source code passed for addition into fatbin"); + } + if (nul_terminated_ptx_source[nul_terminated_ptx_source.size() - 1] != '\0') { + throw ::std::invalid_argument("PTX source code passed for addition into fatbin was not nul-character-terminated"); + } +#endif + auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number()); + auto empty_cmdline = ""; + auto status = nvFatbinAddPTX(handle_, + nul_terminated_ptx_source.data(), + nul_terminated_ptx_source.size(), + compute_capability_str.c_str(), + identifier, + empty_cmdline); + throw_if_error_lazy(status, "Failed adding PTX source fragment " + + ::std::string(identifier) + " at " + detail_::ptr_as_hex(nul_terminated_ptx_source.data()) + + " to a fat binary for target compute capability " + compute_capability_str); + } + + void add_lto_ir( + const char* identifier, + memory::region_t lto_ir, + device::compute_capability_t target_compute_capability) const + { + auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number()); + auto empty_cmdline = ""; + auto status = nvFatbinAddLTOIR( + handle_, lto_ir.data(), lto_ir.size(), compute_capability_str.c_str(), identifier, empty_cmdline); + throw_if_error_lazy(status, "Failed adding LTO IR fragment " + + ::std::string(identifier) + " at " + detail_::ptr_as_hex(lto_ir.data()) + + " to a fat binary for target compute capability " + compute_capability_str); + } + + void add_cubin( + const char* identifier, + memory::region_t cubin, + device::compute_capability_t target_compute_capability) const + { + auto compute_capability_str = ::std::to_string(target_compute_capability.as_combined_number()); + auto status = nvFatbinAddCubin( + handle_, cubin.data(), cubin.size(), compute_capability_str.c_str(), identifier); + throw_if_error_lazy(status, "Failed adding cubin fragment " + + ::std::string(identifier) + " at " + detail_::ptr_as_hex(cubin.data()) + + " to a fat binary for target compute capability " + compute_capability_str); + } + +#if CUDA_VERSION >= 12050 + /** + * Adds relocatable PTX entries from a host object to the fat binary being built + * + * @param ptx_code PTX "host object". TODO: Is this PTX code in text mode? Something else? + * + * @note The builder's options (specified on creation) are ignored for these operations. + */ + void add_relocatable_ptx(memory::region_t ptx_code) const + { + auto status = nvFatbinAddReloc(handle_, ptx_code.data(), ptx_code.size()); + throw_if_error_lazy(status, "Failed adding relocatable PTX code at " + detail_::ptr_as_hex(ptx_code.data()) + + "to fatbin builder " + fatbin_builder::detail_::identify(*this) ); + } + + // TODO: WTF is an index? + void add_index(const char* identifier, memory::region_t index) const + { + auto status = nvFatbinAddIndex(handle_, index.data(), index.size(), identifier); + throw_if_error_lazy(status, "Failed adding index " + ::std::string(identifier) + " at " + + detail_::ptr_as_hex(index.data()) + " to a fat binary"); + } +#endif // CUDA_VERSION >= 12050 + +protected: // constructors + + fatbin_builder_t( + fatbin_builder::handle_t handle, + // no support for options, for now + bool take_ownership) noexcept + : handle_(handle), owning(take_ownership) + {} + +public: // friendship + + friend fatbin_builder_t fatbin_builder::wrap(fatbin_builder::handle_t, bool) noexcept; + +public: // constructors and destructor + + fatbin_builder_t(const fatbin_builder_t &) = delete; + + fatbin_builder_t(fatbin_builder_t &&other) noexcept: + fatbin_builder_t(other.handle_, other.owning) + { + other.owning = false; + }; + + ~fatbin_builder_t() noexcept(false) + { + if (owning) { + auto status = nvFatbinDestroy(&handle_); // this nullifies the handle :-O + throw_if_error_lazy(status, + ::std::string("Failed destroying fatbin builder ") + detail_::ptr_as_hex(handle_) + + " in " + fatbin_builder::detail_::identify(handle_)); + } + } + +public: // operators + + fatbin_builder_t &operator=(const fatbin_builder_t &) = delete; + + fatbin_builder_t &operator=(fatbin_builder_t &&other) noexcept + { + ::std::swap(handle_, other.handle_); + ::std::swap(owning, owning); + return *this; + } + +protected: // data members + fatbin_builder::handle_t handle_; + bool owning; + // this field is mutable only for enabling move construction; other + // than in that case it must not be altered +}; + +namespace fatbin_builder { + +/// Create a new link-process (before adding any compiled images or or image-files) +inline fatbin_builder_t create(const options_t & options) +{ + handle_t new_handle; + auto marshalled_options = marshalling::marshal(options); + auto option_ptrs = marshalled_options.option_ptrs(); + auto status = nvFatbinCreate(&new_handle, option_ptrs.data(), option_ptrs.size()); + throw_if_error_lazy(status, "Failed creating a new fatbin builder"); + auto do_take_ownership = true; + return wrap(new_handle, do_take_ownership); +} + +inline fatbin_builder_t wrap(handle_t handle, bool take_ownership) noexcept +{ + return fatbin_builder_t{handle, take_ownership}; +} + +namespace detail_ { + +inline ::std::string identify(const fatbin_builder_t& builder) +{ + return identify(builder.handle()); +} + +} // namespace detail_ + +} // namespace fatbin_builder + + +} // namespace cuda + +#endif // CUDA_VERSION >= 12040 + +#endif // CUDA_API_WRAPPERS_FATBIN_BUILDER_HPP_ diff --git a/src/cuda/fatbin/builder_options.hpp b/src/cuda/fatbin/builder_options.hpp new file mode 100644 index 00000000..b2ccf2a9 --- /dev/null +++ b/src/cuda/fatbin/builder_options.hpp @@ -0,0 +1,121 @@ +/** + * @file + * +* @brief Contains @ref fatbin_builder::options_t class and related definitions + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_FATBIN_BUILDER_OPTIONS_HPP_ +#define CUDA_API_WRAPPERS_FATBIN_BUILDER_OPTIONS_HPP_ + +#include "../api/device_properties.hpp" +#include "../api/detail/option_marshalling.hpp" +#include "../api/types.hpp" + +#include +#include + +namespace cuda { + +///@cond +class module_t; +///@endcond + +namespace fatbin_builder { + + +/* + +Fatbin options (not including deprecated ones): + + -compress= Enable (true) / disable (false) compression (default: true). + + -compress-all Compress everything in the fatbin, even if it’s small. + + -cuda Specify CUDA (rather than OpenCL). + -opencl Specify OpenCL (rather than CUDA). + -host= Specify host operating system. Valid options are “linux”, “windows” (“mac” is deprecated) + + -g Generate debug information. + +*/ + +struct options_t final { + + enum : bool { + width_32_bits = false, width_64_bits = true + }; + optional use_64_bit_entry_width{width_64_bits}; + + enum : bool { + dont_compress = false, do_compress = true + }; + optional compress{do_compress}; + + enum : bool { + only_compress_large_objects = false, compression_for_everything = true + }; + optional apply_compression_to_small_objects{only_compress_large_objects}; + + enum ecosystem_t { + cuda, opencl + }; + optional ecosystem; + + enum host_os_t { + windows, linux + }; + optional targeted_host_os; +}; + +namespace detail_ { + +struct marshalled_options_t { + ::std::size_t num_options; + ::std::string option_str; +}; + +} // namespace detail + +} // namespace fatbin_builder + +namespace marshalling { + +namespace detail_ { + +template +struct gadget { + static void process( + const fatbin_builder::options_t &opts, + MarshalTarget &marshalled, Delimiter delimiter, + bool need_delimiter_after_last_option) + { + using fatbin_builder::options_t; + opt_start_t opt_start { delimiter }; + if (opts.use_64_bit_entry_width) { + marshalled << opt_start << '-' << (opts.use_64_bit_entry_width.value() ? "64" : "32"); + } + if (opts.compress) { + marshalled << opt_start << "-compress=" << (opts.compress.value() ? "true" : "false"); + } + if (opts.apply_compression_to_small_objects.value_or(false)) { + marshalled << opt_start << "-compress-all"; + } + if (opts.ecosystem) { + marshalled << opt_start << '-' << ((opts.ecosystem.value() == options_t::opencl) ? "opencl" : "cuda"); + } + if (opts.targeted_host_os) { + marshalled << opt_start << "-host=" << ((opts.targeted_host_os.value() == options_t::windows) ? "windows" : "linux"); + } + if (need_delimiter_after_last_option) { + marshalled << opt_start; + } + } +}; + +} // namespace detail_ + +} // namespace marshalling + +} // namespace cuda + +#endif // CUDA_API_WRAPPERS_FATBIN_BUILDER_OPTIONS_HPP_ diff --git a/src/cuda/fatbin/error.hpp b/src/cuda/fatbin/error.hpp new file mode 100644 index 00000000..fbbd360e --- /dev/null +++ b/src/cuda/fatbin/error.hpp @@ -0,0 +1,190 @@ +/** + * @file + * + * @brief Facilities for exception-based handling of errors originating + * in NVIDIA's fatbin creating library (nvFatbin), including a basic exception + * class wrapping `::std::runtime_error`. + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_FATBIN_BUILDER_ERROR_HPP_ +#define CUDA_API_WRAPPERS_FATBIN_BUILDER_ERROR_HPP_ + +#include "types.hpp" + +#include + +#include +#include +#include + +#if CUDA_VERSION >= 12040 + +namespace cuda { + +namespace fatbin_builder { + +namespace status { + +enum named_t : ::std::underlying_type::type { + success = NVFATBIN_SUCCESS, + other_internal_error = NVFATBIN_ERROR_INTERNAL, + elf_architecture_mismatch = NVFATBIN_ERROR_ELF_ARCH_MISMATCH, + elf_architecture_size = NVFATBIN_ERROR_ELF_SIZE_MISMATCH, + missing_ptx_version = NVFATBIN_ERROR_MISSING_PTX_VERSION, + unexpected_null_pointer = NVFATBIN_ERROR_NULL_POINTER, + data_compression_failed = NVFATBIN_ERROR_COMPRESSION_FAILED, + maximum_compressed_size_exceeded = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED, + unrecognized_option = NVFATBIN_ERROR_UNRECOGNIZED_OPTION, + invalid_architecture = NVFATBIN_ERROR_INVALID_ARCH, + invalid_ltoir_data = NVFATBIN_ERROR_INVALID_NVVM, + invalid_nnvm_data = NVFATBIN_ERROR_INVALID_NVVM, // Note alias to same value + empty_input = NVFATBIN_ERROR_EMPTY_INPUT, +#if CUDA_VERSION >= 12050 + missing_ptx_architecture = NVFATBIN_ERROR_MISSING_PTX_ARCH, + ptx_architecture_mismatch = NVFATBIN_ERROR_PTX_ARCH_MISMATCH, + missing_fatbin = NVFATBIN_ERROR_MISSING_FATBIN, + invalid_index = NVFATBIN_ERROR_INVALID_INDEX, + identifier_reused = NVFATBIN_ERROR_IDENTIFIER_REUSE, +#endif +#if CUDA_VERSION >= 12060 + internal_ptx_option_related_error = NVFATBIN_ERROR_INTERNAL_PTX_OPTION +#endif +}; + +///@cond +constexpr bool operator==(const status_t& lhs, const named_t& rhs) { return lhs == static_cast(rhs); } +constexpr bool operator!=(const status_t& lhs, const named_t& rhs) { return lhs != static_cast(rhs); } +constexpr bool operator==(const named_t& lhs, const status_t& rhs) { return static_cast(lhs) == rhs; } +constexpr bool operator!=(const named_t& lhs, const status_t& rhs) { return static_cast(lhs) != rhs; } +///@endcond + +} // namespace status + +} // namespace fatbin_builder + +/** + * @brief Determine whether the API call returning the specified status had succeeded + */ + ///@{ +constexpr bool is_success(fatbin_builder::status_t status) +{ + return (status == fatbin_builder::status::named_t::success); +} +///@} + +/** + * @brief Determine whether the API call returning the specified status had failed + */ +constexpr bool is_failure(fatbin_builder::status_t status) +{ + return not is_success(status); +} + +/** + * Obtain a brief textual explanation for a specified kind of CUDA Runtime API status + * or error code. + */ +///@{ +inline ::std::string describe(fatbin_builder::status_t status) +{ + return nvFatbinGetErrorString(status); +} + +namespace fatbin_builder { + +/** + * A (base?) class for exceptions raised by CUDA code; these errors are thrown by + * essentially all CUDA Runtime API wrappers upon failure. + * + * A CUDA runtime error can be constructed with either just a CUDA error code + * (=status code), or a code plus an additional message. + */ +class runtime_error : public ::std::runtime_error { +public: + // TODO: Constructor chaining; and perhaps allow for more construction mechanisms? + runtime_error(status_t error_code) : + ::std::runtime_error(describe(error_code)), + code_(error_code) + { } + // I wonder if I should do this the other way around + runtime_error(status_t error_code, ::std::string what_arg) : + ::std::runtime_error(::std::move(what_arg) + ": " + describe(error_code)), + code_(error_code) + { } + runtime_error(status::named_t error_code) : + runtime_error(static_cast(error_code)) { } + runtime_error(status::named_t error_code, const ::std::string& what_arg) : + runtime_error(static_cast(error_code), what_arg) { } + +protected: + runtime_error(status_t error_code, ::std::runtime_error err) : + ::std::runtime_error(::std::move(err)), code_(error_code) + { } + +public: + static runtime_error with_message_override(status_t error_code, ::std::string complete_what_arg) + { + return runtime_error(error_code, ::std::runtime_error(complete_what_arg)); + } + + /** + * Obtain the CUDA status code which resulted in this error being thrown. + */ + status_t code() const { return code_; } + +private: + status_t code_; +}; + + +} // namespace fatbin_builder + +// TODO: The following could use ::std::optional arguments - which would +// prevent the need for dual versions of the functions - but we're +// not writing C++17 here + +/** + * Do nothing... unless the status indicates an error, in which case + * a @ref cuda::runtime_error exception is thrown + * + * @param status should be @ref cuda::status::success - otherwise an exception is thrown + * @param message An extra description message to add to the exception + */ +inline void throw_if_error(fatbin_builder::status_t status, const ::std::string& message) noexcept(false) +{ + if (is_failure(status)) { throw fatbin_builder::runtime_error(status, message); } +} + +/** + * Does nothing - unless the status indicates an error, in which case + * a @ref cuda::runtime_error exception is thrown + * + * @param status should be @ref cuda::status::success - otherwise an exception is thrown + */ +inline void throw_if_error(fatbin_builder::status_t status) noexcept(false) +{ + if (is_failure(status)) { throw fatbin_builder::runtime_error(status); } +} + +/** + * Throws a @ref ::cuda::fatbin_builder::runtime_error exception if the status is not success + * + * @note The rationale for this macro is that neither the exception, nor its constructor + * arguments, are evaluated on the "happy path"; and that cannot be achieved with a + * function - which genertally/typically evaluates its arguments. To guarantee this + * lazy evaluation with a function, we would need exception-construction-argument-producing + * lambdas, which we would obviously rather avoid. + */ +#define throw_if_fatbin_builder_error_lazy(Kind, status__, ... ) \ +do { \ + ::cuda::fatbin_builder::status_t tie_status__ = static_cast<::cuda::fatbin_builder::status_t>(status__); \ + if (::cuda::is_failure(tie_status__)) { \ + throw ::cuda::fatbin_builder::runtime_error(tie_status__, (__VA_ARGS__)); \ + } \ +} while(false) + +} // namespace cuda + +#endif // CUDA_VERSION >= 12040 + +#endif // CUDA_API_WRAPPERS_FATBIN_BUILDER_ERROR_HPP_ diff --git a/src/cuda/fatbin/types.hpp b/src/cuda/fatbin/types.hpp new file mode 100644 index 00000000..7bf3d4b5 --- /dev/null +++ b/src/cuda/fatbin/types.hpp @@ -0,0 +1,29 @@ +/** + * @file + * + * @brief Type definitions used in CUDA real-time compilation work wrappers. + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_FATBIN_BUILDER_TYPES_HPP_ +#define CUDA_API_WRAPPERS_FATBIN_BUILDER_TYPES_HPP_ + +#if CUDA_VERSION >= 12040 + +#include "../api/types.hpp" + +#include + +namespace cuda { + +namespace fatbin_builder { + +using handle_t = nvFatbinHandle; +using status_t = nvFatbinResult; + +} // namespace fatbin_builder + +} // namespace cuda + +#endif // CUDA_VERSION >= 12040 + +#endif /* CUDA_API_WRAPPERS_FATBIN_BUILDER_TYPES_HPP_ */ diff --git a/src/cuda/fatbin/versions.hpp b/src/cuda/fatbin/versions.hpp new file mode 100644 index 00000000..c9b63945 --- /dev/null +++ b/src/cuda/fatbin/versions.hpp @@ -0,0 +1,45 @@ +/** + * @file + * + * @brief Wrappers for Runtime API functions involving versions - + * of the CUDA runtime and of the CUDA driver. Also defines a @ref cuda::version_t + * class for working with such versions (as they are not really single + * numbers) - which is what the wrappers return. + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_FATBIN_VERSIONS_HPP_ +#define CUDA_API_WRAPPERS_FATBIN_VERSIONS_HPP_ + +#include "error.hpp" + +#if CUDA_VERSION >= 12040 +#include +#endif + +#include + +namespace cuda { +namespace version_numbers { + +#if CUDA_VERSION >= 12040 + +inline version_t fatbin() { + unsigned int major { 0 }, minor { 0 }; + + auto status = nvFatbinVersion(&major, &minor); + throw_if_error_lazy(status, "Failed obtaining the nvfatbin library version"); +#ifndef NDEBUG + if ( (major == 0) or (major > ::std::numeric_limits::max()) + or (minor == 0) or (minor > ::std::numeric_limits::max())) { + throw ::std::logic_error("Invalid version encountered: (" + + ::std::to_string(major) + ", " + ::std::to_string(minor) + ')' ); + } +#endif + return version_t{ static_cast(major), static_cast(minor) }; +} +#endif // CUDA_VERSION >= 12040 + +} // namespace version_numbers +} // namespace cuda + +#endif // CUDA_API_WRAPPERS_FATBIN_VERSIONS_HPP_