Skip to content

Commit

Permalink
Fixes #683: Now using a single random value generator across all vect…
Browse files Browse the repository at this point in the history
…orAdd variants
  • Loading branch information
eyalroz committed Sep 23, 2024
1 parent 310d477 commit 9c34f0b
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 11 deletions.
8 changes: 7 additions & 1 deletion examples/modified_cuda_samples/vectorAdd/vectorAdd.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <memory>
#include <algorithm>
#include <vector>
#include <random>

__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements)
{
Expand All @@ -37,7 +38,12 @@ int main()
auto h_B = std::vector<float>(numElements);
auto h_C = std::vector<float>(numElements);

auto generator = []() { return rand() / (float) RAND_MAX; };
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(h_A.begin(), h_A.end(), generator);
std::generate(h_B.begin(), h_B.end(), generator);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ int main()
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<> distribution { 0.0, 1.0 };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(h_A.begin(), h_A.end(), generator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <iostream>
#include <algorithm>
#include <cmath>
#include <random>

__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements)
{
Expand All @@ -39,7 +40,12 @@ int main()
auto buffer_B = cuda::memory::managed::make_unique_span<float>(numElements);
auto buffer_C = cuda::memory::managed::make_unique_span<float>(numElements);

auto generator = []() { return rand() / (float) RAND_MAX; };
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(buffer_A.begin(), buffer_A.end(), generator);
std::generate(buffer_B.begin(), buffer_B.end(), generator);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <iostream>
#include <memory>
#include <algorithm>
#include <random>

__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements)
{
Expand Down Expand Up @@ -45,7 +46,12 @@ int main()
auto b = buffer_B.as_spans<float>();
auto c = buffer_C.as_spans<float>();

auto generator = []() { return rand() / (float) RAND_MAX; };
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(a.host_side.begin(), b.host_side.end(), generator);

// Launch the Vector Add CUDA Kernel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ int main(void)
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<> distribution { 0.0, 1.0 };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(h_A.begin(), h_A.end(), generator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ int main(void)
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<> distribution { 0.0, 1.0 };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};
std::generate(h_A.begin(), h_A.end(), generator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <iostream>
#include <memory>
#include <algorithm>
#include <random>

__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements)
{
Expand All @@ -38,7 +39,13 @@ int main()
auto h_B = std::vector<float>(numElements);
auto h_C = std::vector<float>(numElements);

auto generator = []() { return rand() / (float) RAND_MAX; };
auto generator = []() {
static std::random_device random_device;
static std::mt19937 randomness_generator { random_device() };
static std::uniform_real_distribution<float> distribution { 0.0, 1.0 };
return distribution(randomness_generator);
};

std::generate(h_A.begin(), h_A.end(), generator);
std::generate(h_B.begin(), h_B.end(), generator);

Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/graph/template.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ class template_t {

public: // non-mutators
instance_t instantiate(
#if CUDA_VERSION >= 11000
#if CUDA_VERSION >= 11040
bool free_previous_allocations_before_relaunch = false
#endif
#if CUDA_VERSION >= 11700
Expand Down
10 changes: 7 additions & 3 deletions src/cuda/api/launch_config_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include "device.hpp"
#include "types.hpp"

#include <limits>
#include <string>

namespace cuda {

namespace detail_ {
Expand Down Expand Up @@ -411,9 +414,10 @@ class launch_config_builder_t {
/// with a specified size
launch_config_builder_t& block_size(size_t size)
{
if (size > static_cast<size_t>(::std::numeric_limits<grid::block_dimension_t>::max())) {
static constexpr const auto max_representable_block_dim = ::std::numeric_limits<grid::block_dimension_t>::max();
if (size > (size_t) max_representable_block_dim) {
throw ::std::invalid_argument("Specified (1-dimensional) block size " + ::std::to_string(size)
+ " exceeds " + ::std::to_string(::std::numeric_limits<int>::max())
+ " exceeds " + ::std::to_string(max_representable_block_dim)
+ " , the maximum representable size of a block");
// and note this is a super-lenient check, since in practice, device properties
// limit block sizes at much lower values; but NVIDIA doesn't "let us know that" via
Expand All @@ -424,7 +428,7 @@ class launch_config_builder_t {
auto max_threads_per_block = kernel_->maximum_threads_per_block();
if (size > max_threads_per_block) {
throw ::std::invalid_argument("Specified (1-dimensional) block size " + ::std::to_string(size)
+ " exceeds " + ::std::to_string(::std::numeric_limits<int>::max())
+ " exceeds " + ::std::to_string(max_threads_per_block)
+ " , the maximum number of threads per block supported by "
+ kernel::detail_::identify(*kernel_));
}
Expand Down

0 comments on commit 9c34f0b

Please sign in to comment.