From dab09dbde6d9a93cb0f9c4dd9e73510537550220 Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Mon, 23 Sep 2024 00:27:46 +0300 Subject: [PATCH] Warning fixes: * Corrected a CUDA version include guard in `graph/instance.hpp` which was enabling an unused parameter in CUDA 11.0 - 11.3.x * Explicitly cast a `size_t` to `unsigned int` in `module.hpp`, to avoid narrowing warnings * In the asyncAPI example, now Using iostreams printing rather than a `printf()` with an inexact format specifier (`size_t` vs `unsigned long`) * Now using float literals, rather than double literals, to set float variables or fill float buffers, in the bandwidthTest and jacobiCudaGraphs examples * streamOrderedAllocation example: Explicit cast from `size_t` to `int` to avoid a warning about narrowing --- examples/modified_cuda_samples/asyncAPI/asyncAPI.cu | 2 +- .../modified_cuda_samples/bandwidthtest/bandwidthtest.cpp | 4 ++-- examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp | 2 +- .../streamOrderedAllocation/streamOrderedAllocation.cu | 2 +- src/cuda/api/graph/instance.hpp | 2 +- src/cuda/api/module.hpp | 2 +- src/cuda/api/multi_wrapper_impls/graph.hpp | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu b/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu index f578cfdc..386ddb02 100644 --- a/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu +++ b/examples/modified_cuda_samples/asyncAPI/asyncAPI.cu @@ -34,7 +34,7 @@ bool correct_output(cuda::span data, const int x) for (size_t i = 0; i < data.size(); i++) if (data[i] != x) { - printf("Error! data[%lu] = %d, ref = %d\n", i, data[i], x); + std::cout << "Error! data" << i << " = " << data[i] << " ref = " << x << '\n'; return false; } return true; diff --git a/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp b/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp index d21c91c5..19857887 100644 --- a/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp +++ b/examples/modified_cuda_samples/bandwidthtest/bandwidthtest.cpp @@ -105,8 +105,8 @@ int main() cuda::memory::copy(h_aPinned, h_aPageable, bytes); // Note: the following two instructions can be replaced with CUDA API wrappers // calls - cuda::memory::host::zero(), but that won't improve anything - std::fill_n(h_bPageable, nElements, 0.0); - std::fill_n(h_bPinned, nElements, 0.0); + std::fill_n(h_bPageable, nElements, 0.0f); + std::fill_n(h_bPinned, nElements, 0.0f); std::cout << "\nDevice: " << cuda::device::current::get().name() << "\n"; std::cout << "\nTransfer size (MB): " << (bytes / Mi) << "\n"; diff --git a/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp b/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp index eb11c8a2..ea556f42 100644 --- a/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp +++ b/examples/modified_cuda_samples/jacobiCudaGraphs/main.cpp @@ -149,7 +149,7 @@ int main(int argc, char **argv) createLinearSystem(A, b); - float convergence_threshold = 1.0e-2; + float convergence_threshold = 1.0e-2f; int max_num_iterations = 4 * N_ROWS * N_ROWS; // create timer diff --git a/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu b/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu index 1a6be6e2..fd3ed314 100644 --- a/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu +++ b/examples/modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu @@ -115,7 +115,7 @@ int basicStreamOrderedAllocation( auto d_c = span(stream.enqueue.allocate(c.size() * sizeof(float))); stream.enqueue.copy(d_a, a); stream.enqueue.copy(d_b, b); - stream.enqueue.kernel_launch(vectorAddGPU, launch_config, d_a.data(), d_b.data(), d_c.data(), c.size()); + stream.enqueue.kernel_launch(vectorAddGPU, launch_config, d_a.data(), d_b.data(), d_c.data(), (int) c.size()); stream.enqueue.free(d_a); stream.enqueue.free(d_b); stream.enqueue.copy(c, d_c); diff --git a/src/cuda/api/graph/instance.hpp b/src/cuda/api/graph/instance.hpp index 027a7cf4..49612b08 100644 --- a/src/cuda/api/graph/instance.hpp +++ b/src/cuda/api/graph/instance.hpp @@ -446,7 +446,7 @@ void set_node_parameters( inline instance_t instantiate( const template_t& template_ -#if CUDA_VERSION >= 11000 +#if CUDA_VERSION >= 11040 , bool free_previous_allocations_before_relaunch = false #endif #if CUDA_VERSION >= 12000 diff --git a/src/cuda/api/module.hpp b/src/cuda/api/module.hpp index f3a1fbd4..f6a7d89d 100644 --- a/src/cuda/api/module.hpp +++ b/src/cuda/api/module.hpp @@ -67,7 +67,7 @@ inline void destroy(handle_t handle, context::handle_t context_handle, device::i inline unique_span get_kernel_handles(handle_t module_handle, size_t num_kernels) { auto result = make_unique_span(num_kernels); - auto status = cuModuleEnumerateFunctions(result.data(), num_kernels, module_handle); + auto status = cuModuleEnumerateFunctions(result.data(), (unsigned int) num_kernels, module_handle); throw_if_error_lazy(status, "Failed enumerating the kernels in " + module::detail_::identify(module_handle)); return result; } diff --git a/src/cuda/api/multi_wrapper_impls/graph.hpp b/src/cuda/api/multi_wrapper_impls/graph.hpp index 89d9c939..5f710b8f 100644 --- a/src/cuda/api/multi_wrapper_impls/graph.hpp +++ b/src/cuda/api/multi_wrapper_impls/graph.hpp @@ -112,7 +112,7 @@ inline instance_t template_t::instantiate( { return graph::instantiate( *this -#if CUDA_VERSION >= 11000 +#if CUDA_VERSION >= 11040 , free_previous_allocations_before_relaunch #endif #if CUDA_VERSION >= 11700