diff --git a/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu b/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu index 31fdca0c..25d0b52d 100644 --- a/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu +++ b/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu @@ -16,6 +16,7 @@ #include #include #include +#include __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) { @@ -37,7 +38,12 @@ int main() auto h_B = std::vector(numElements); auto h_C = std::vector(numElements); - auto generator = []() { return rand() / (float) RAND_MAX; }; + auto generator = []() { + static std::random_device random_device; + static std::mt19937 randomness_generator { random_device() }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; + return distribution(randomness_generator); + }; std::generate(h_A.begin(), h_A.end(), generator); std::generate(h_B.begin(), h_B.end(), generator); diff --git a/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp b/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp index 99b2c90d..f7dca61c 100644 --- a/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp +++ b/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp @@ -255,7 +255,7 @@ int main() auto generator = []() { static std::random_device random_device; static std::mt19937 randomness_generator { random_device() }; - static std::uniform_real_distribution<> distribution { 0.0, 1.0 }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; return distribution(randomness_generator); }; std::generate(h_A.begin(), h_A.end(), generator); diff --git a/examples/modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu b/examples/modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu index c5e01a87..a7b3a4c5 100644 --- a/examples/modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu +++ b/examples/modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu @@ -18,6 +18,7 @@ #include #include #include +#include __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) { @@ -39,7 +40,12 @@ int main() auto buffer_B = cuda::memory::managed::make_unique_span(numElements); auto buffer_C = cuda::memory::managed::make_unique_span(numElements); - auto generator = []() { return rand() / (float) RAND_MAX; }; + auto generator = []() { + static std::random_device random_device; + static std::mt19937 randomness_generator { random_device() }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; + return distribution(randomness_generator); + }; std::generate(buffer_A.begin(), buffer_A.end(), generator); std::generate(buffer_B.begin(), buffer_B.end(), generator); diff --git a/examples/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu b/examples/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu index 0068487f..f07637de 100644 --- a/examples/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu +++ b/examples/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu @@ -18,6 +18,7 @@ #include #include #include +#include __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) { @@ -45,7 +46,12 @@ int main() auto b = buffer_B.as_spans(); auto c = buffer_C.as_spans(); - auto generator = []() { return rand() / (float) RAND_MAX; }; + auto generator = []() { + static std::random_device random_device; + static std::mt19937 randomness_generator { random_device() }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; + return distribution(randomness_generator); + }; std::generate(a.host_side.begin(), b.host_side.end(), generator); // Launch the Vector Add CUDA Kernel diff --git a/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp b/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp index b80605ff..e5792754 100644 --- a/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp +++ b/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp @@ -60,7 +60,7 @@ int main(void) auto generator = []() { static std::random_device random_device; static std::mt19937 randomness_generator { random_device() }; - static std::uniform_real_distribution<> distribution { 0.0, 1.0 }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; return distribution(randomness_generator); }; std::generate(h_A.begin(), h_A.end(), generator); diff --git a/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp b/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp index 6960f8ed..02fd7df8 100644 --- a/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp +++ b/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp @@ -100,7 +100,7 @@ int main(void) auto generator = []() { static std::random_device random_device; static std::mt19937 randomness_generator { random_device() }; - static std::uniform_real_distribution<> distribution { 0.0, 1.0 }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; return distribution(randomness_generator); }; std::generate(h_A.begin(), h_A.end(), generator); diff --git a/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu b/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu index 6b2901b4..5668707d 100644 --- a/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu +++ b/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu @@ -17,6 +17,7 @@ #include #include #include +#include __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) { @@ -38,7 +39,13 @@ int main() auto h_B = std::vector(numElements); auto h_C = std::vector(numElements); - auto generator = []() { return rand() / (float) RAND_MAX; }; + auto generator = []() { + static std::random_device random_device; + static std::mt19937 randomness_generator { random_device() }; + static std::uniform_real_distribution distribution { 0.0, 1.0 }; + return distribution(randomness_generator); + }; + std::generate(h_A.begin(), h_A.end(), generator); std::generate(h_B.begin(), h_B.end(), generator); diff --git a/src/cuda/api/graph/template.hpp b/src/cuda/api/graph/template.hpp index 57b9b7b9..90c0090d 100644 --- a/src/cuda/api/graph/template.hpp +++ b/src/cuda/api/graph/template.hpp @@ -731,7 +731,7 @@ class template_t { public: // non-mutators instance_t instantiate( -#if CUDA_VERSION >= 11000 +#if CUDA_VERSION >= 11040 bool free_previous_allocations_before_relaunch = false #endif #if CUDA_VERSION >= 11700 diff --git a/src/cuda/api/launch_config_builder.hpp b/src/cuda/api/launch_config_builder.hpp index aad3c9b6..50b1710b 100644 --- a/src/cuda/api/launch_config_builder.hpp +++ b/src/cuda/api/launch_config_builder.hpp @@ -16,6 +16,9 @@ #include "device.hpp" #include "types.hpp" +#include +#include + namespace cuda { namespace detail_ { @@ -411,9 +414,10 @@ class launch_config_builder_t { /// with a specified size launch_config_builder_t& block_size(size_t size) { - if (size > static_cast(::std::numeric_limits::max())) { + static constexpr const auto max_representable_block_dim = ::std::numeric_limits::max(); + if (size > (size_t) max_representable_block_dim) { throw ::std::invalid_argument("Specified (1-dimensional) block size " + ::std::to_string(size) - + " exceeds " + ::std::to_string(::std::numeric_limits::max()) + + " exceeds " + ::std::to_string(max_representable_block_dim) + " , the maximum representable size of a block"); // and note this is a super-lenient check, since in practice, device properties // limit block sizes at much lower values; but NVIDIA doesn't "let us know that" via @@ -424,7 +428,7 @@ class launch_config_builder_t { auto max_threads_per_block = kernel_->maximum_threads_per_block(); if (size > max_threads_per_block) { throw ::std::invalid_argument("Specified (1-dimensional) block size " + ::std::to_string(size) - + " exceeds " + ::std::to_string(::std::numeric_limits::max()) + + " exceeds " + ::std::to_string(max_threads_per_block) + " , the maximum number of threads per block supported by " + kernel::detail_::identify(*kernel_)); }