diff --git a/CMakeLists.txt b/CMakeLists.txt index dd31fad..07d6ab7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,8 @@ set(CMAKE_CXX_STANDARD 20) # Generate compile_commands.json for clang-tidy and other tools set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") + ################ # Core Library # ################ diff --git a/genmetaballs/src/cuda/core/camera.cu b/genmetaballs/src/cuda/core/camera.cu index 07c65c1..4aa31d9 100644 --- a/genmetaballs/src/cuda/core/camera.cu +++ b/genmetaballs/src/cuda/core/camera.cu @@ -1,5 +1,6 @@ #include #include +#include #include #include "camera.cuh" @@ -11,3 +12,28 @@ CUDA_CALLABLE Vec3D Intrinsics::get_ray_direction(uint32_t px, uint32_t py) cons auto y = (static_cast(py) - cy) / fy; return Vec3D{x, y, -1.0f}; } + +CUDA_CALLABLE cuda::std::pair PixelCoordRange::Iterator::operator*() const { + return cuda::std::make_pair(px, py); +} + +CUDA_CALLABLE PixelCoordRange::Iterator& PixelCoordRange::Iterator::operator++() { + ++px; // move to the next column + if (px >= px_end) { // move to the next row + px = px_start; + ++py; + } + return *this; +} + +CUDA_CALLABLE bool PixelCoordRange::Sentinel::operator==(const Iterator& it) const { + return it.py >= py_end; +} + +CUDA_CALLABLE PixelCoordRange::Iterator PixelCoordRange::begin() const { + return Iterator{px_start, px_end, py_start, px_start, py_start}; +} + +CUDA_CALLABLE PixelCoordRange::Sentinel PixelCoordRange::end() const { + return Sentinel{py_end}; +} diff --git a/genmetaballs/src/cuda/core/camera.cuh b/genmetaballs/src/cuda/core/camera.cuh index 53d5a31..428acf0 100644 --- a/genmetaballs/src/cuda/core/camera.cuh +++ b/genmetaballs/src/cuda/core/camera.cuh @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include "geometry.cuh" @@ -17,20 +18,41 @@ struct Intrinsics { // Returns the direction of the ray going through pixel (px, py) in camera frame. // For efficiency, this function does not check if the pixel is within bounds. CUDA_CALLABLE Vec3D get_ray_direction(uint32_t px, uint32_t py) const; +}; + +struct PixelCoordRange { + uint32_t px_start; + uint32_t px_end; + uint32_t py_start; + uint32_t py_end; + + // the Iterator class holds the current pixel coordinates + struct Iterator { + // pixel range + uint32_t px_start; + uint32_t px_end; + uint32_t py_start; + + // current pixel coordinates + uint32_t px; + uint32_t py; + + // Returns the (px, py) coordinates of the current pixel + CUDA_CALLABLE cuda::std::pair operator*() const; + + // pre-increment operator that advances to the next pixel + CUDA_CALLABLE Iterator& operator++(); + }; + + // the Sentinel class only needs to hold the stop value (i.e. final row) + struct Sentinel { + uint32_t py_end; + + // stopping criterion: true if current row (py) reaches py_end + CUDA_CALLABLE bool operator==(const Iterator& it) const; + }; - // Returns a 2D array of ray directions in camera frame in the specified pixel range - // and store them in the provided buffer. By default, the full image is used - template - CUDA_CALLABLE Array2D& get_ray_directions(Array2D& buffer, - uint32_t px_start = 0, - uint32_t px_end = UINT32_MAX, - uint32_t py_start = 0, - uint32_t py_end = UINT32_MAX) const { - for (auto i = max(0, px_start); i < min(height, px_end); ++i) { - for (auto j = max(0, py_start); j < min(width, py_end); ++j) { - buffer[i][j] = get_ray_direction(j, i); - } - } - return buffer; - } + // range methods + CUDA_CALLABLE Iterator begin() const; + CUDA_CALLABLE Sentinel end() const; }; diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu index 92b96d6..309729c 100644 --- a/genmetaballs/src/cuda/core/forward.cu +++ b/genmetaballs/src/cuda/core/forward.cu @@ -2,8 +2,8 @@ #include #include -constexpr NUM_BLOCKS dim3(10); // XXX madeup -constexpr THREADS_PER_BLOCK dim3(10); +constexpr auto NUM_BLOCKS = dim3(10); // XXX madeup +constexpr auto THREADS_PER_BLOCK = dim3(10); namespace FMB { diff --git a/tests/cpp_tests/test_camera.cu b/tests/cpp_tests/test_camera.cu index 1448c8a..73645e4 100644 --- a/tests/cpp_tests/test_camera.cu +++ b/tests/cpp_tests/test_camera.cu @@ -11,15 +11,19 @@ namespace test_camera_gpu { -// CUDA kernel to call get_ray_directions on device with multiple threads -// Each thread processes one row of the image +// CUDA kernel to call get_ray_direction on device with multiple threads +// Each thread processes one row of the image via PixelCoordRange __global__ void get_ray_directions_kernel(Intrinsics intrinsics, Array2D ray_buffer) { uint32_t row_start = threadIdx.x * 2; uint32_t row_end = max(row_start + 2, intrinsics.height); uint32_t col_start = threadIdx.y * 2; uint32_t col_end = max(col_start + 2, intrinsics.width); - intrinsics.get_ray_directions(ray_buffer, row_start, row_end, col_start, col_end); + auto pixel_coords = PixelCoordRange{row_start, row_end, col_start, col_end}; + + for (auto [px, py] : pixel_coords) { + ray_buffer[px][py] = intrinsics.get_ray_direction(px, py); + } } } // namespace test_camera_gpu