Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ set(CMAKE_CXX_STANDARD 20)
# Generate compile_commands.json for clang-tidy and other tools
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")

################
# Core Library #
################
Expand Down
26 changes: 26 additions & 0 deletions genmetaballs/src/cuda/core/camera.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <cstdint>
#include <cuda/std/ranges>
#include <cuda/std/utility>
#include <cuda_runtime.h>

#include "camera.cuh"
Expand All @@ -11,3 +12,28 @@ CUDA_CALLABLE Vec3D Intrinsics::get_ray_direction(uint32_t px, uint32_t py) cons
auto y = (static_cast<float>(py) - cy) / fy;
return Vec3D{x, y, -1.0f};
}

CUDA_CALLABLE cuda::std::pair<uint32_t, uint32_t> PixelCoordRange::Iterator::operator*() const {
return cuda::std::make_pair(px, py);
}

CUDA_CALLABLE PixelCoordRange::Iterator& PixelCoordRange::Iterator::operator++() {
++px; // move to the next column
if (px >= px_end) { // move to the next row
px = px_start;
++py;
}
return *this;
}

CUDA_CALLABLE bool PixelCoordRange::Sentinel::operator==(const Iterator& it) const {
return it.py >= py_end;
}

CUDA_CALLABLE PixelCoordRange::Iterator PixelCoordRange::begin() const {
return Iterator{px_start, px_end, py_start, px_start, py_start};
}

CUDA_CALLABLE PixelCoordRange::Sentinel PixelCoordRange::end() const {
return Sentinel{py_end};
}
52 changes: 37 additions & 15 deletions genmetaballs/src/cuda/core/camera.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <cstdint>
#include <cuda/std/utility>
#include <cuda_runtime.h>

#include "geometry.cuh"
Expand All @@ -17,20 +18,41 @@ struct Intrinsics {
// Returns the direction of the ray going through pixel (px, py) in camera frame.
// For efficiency, this function does not check if the pixel is within bounds.
CUDA_CALLABLE Vec3D get_ray_direction(uint32_t px, uint32_t py) const;
};

struct PixelCoordRange {
uint32_t px_start;
uint32_t px_end;
uint32_t py_start;
uint32_t py_end;

// the Iterator class holds the current pixel coordinates
struct Iterator {
// pixel range
uint32_t px_start;
uint32_t px_end;
uint32_t py_start;

// current pixel coordinates
uint32_t px;
uint32_t py;

// Returns the (px, py) coordinates of the current pixel
CUDA_CALLABLE cuda::std::pair<uint32_t, uint32_t> operator*() const;

// pre-increment operator that advances to the next pixel
CUDA_CALLABLE Iterator& operator++();
};

// the Sentinel class only needs to hold the stop value (i.e. final row)
struct Sentinel {
uint32_t py_end;

// stopping criterion: true if current row (py) reaches py_end
CUDA_CALLABLE bool operator==(const Iterator& it) const;
};

// Returns a 2D array of ray directions in camera frame in the specified pixel range
// and store them in the provided buffer. By default, the full image is used
template <MemoryLocation location>
CUDA_CALLABLE Array2D<Vec3D, location>& get_ray_directions(Array2D<Vec3D, location>& buffer,
uint32_t px_start = 0,
uint32_t px_end = UINT32_MAX,
uint32_t py_start = 0,
uint32_t py_end = UINT32_MAX) const {
for (auto i = max(0, px_start); i < min(height, px_end); ++i) {
for (auto j = max(0, py_start); j < min(width, py_end); ++j) {
buffer[i][j] = get_ray_direction(j, i);
}
}
return buffer;
}
// range methods
CUDA_CALLABLE Iterator begin() const;
CUDA_CALLABLE Sentinel end() const;
};
4 changes: 2 additions & 2 deletions genmetaballs/src/cuda/core/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include <cuda_runtime.h>
#include <vector>

constexpr NUM_BLOCKS dim3(10); // XXX madeup
constexpr THREADS_PER_BLOCK dim3(10);
constexpr auto NUM_BLOCKS = dim3(10); // XXX madeup
constexpr auto THREADS_PER_BLOCK = dim3(10);

namespace FMB {

Expand Down
10 changes: 7 additions & 3 deletions tests/cpp_tests/test_camera.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@

namespace test_camera_gpu {

// CUDA kernel to call get_ray_directions on device with multiple threads
// Each thread processes one row of the image
// CUDA kernel to call get_ray_direction on device with multiple threads
// Each thread processes one row of the image via PixelCoordRange
__global__ void get_ray_directions_kernel(Intrinsics intrinsics,
Array2D<Vec3D, MemoryLocation::DEVICE> ray_buffer) {
uint32_t row_start = threadIdx.x * 2;
uint32_t row_end = max(row_start + 2, intrinsics.height);
uint32_t col_start = threadIdx.y * 2;
uint32_t col_end = max(col_start + 2, intrinsics.width);
intrinsics.get_ray_directions(ray_buffer, row_start, row_end, col_start, col_end);
auto pixel_coords = PixelCoordRange{row_start, row_end, col_start, col_end};

for (auto [px, py] : pixel_coords) {
ray_buffer[px][py] = intrinsics.get_ray_direction(px, py);
}
}

} // namespace test_camera_gpu
Expand Down