Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Checks: >
-readability-implicit-bool-conversion,
-readability-avoid-const-params-in-decls,
-readability-braces-around-statements,
-readability-convert-member-functions-to-static,
-readability-isolate-declaration,
-readability-math-missing-parentheses,
-cppcoreguidelines-avoid-magic-numbers,
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ add_library(genmetaballs_core
genmetaballs/src/cuda/core/camera.cuh
genmetaballs/src/cuda/core/utils.cu
genmetaballs/src/cuda/core/utils.cuh
genmetaballs/src/cuda/core/fmb.cuh
genmetaballs/src/cuda/core/fmb.cu
genmetaballs/src/cuda/core/geometry.cuh
genmetaballs/src/cuda/core/geometry.cu
genmetaballs/src/cuda/core/confidence.cuh
Expand Down
19 changes: 19 additions & 0 deletions genmetaballs/src/cuda/bindings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
#include <nanobind/nanobind.h>
#include <nanobind/ndarray.h>
#include <nanobind/operators.h>
#include <nanobind/stl/tuple.h>
#include <nanobind/stl/vector.h>

#include "core/blender.cuh"
#include "core/camera.cuh"
#include "core/confidence.cuh"
#include "core/fmb.cuh"
#include "core/geometry.cuh"
#include "core/image.cuh"
#include "core/utils.cuh"
Expand All @@ -22,6 +24,23 @@ void bind_image_view(nb::module_& m, const char* name);

NB_MODULE(_genmetaballs_bindings, m) {

/*
* FMB module bindings
*/

nb::module_ fmb = m.def_submodule("fmb", "Fuzzy meta ball data types");

nb::class_<FMB>(fmb, "FMB")
.def(nb::init<Pose, float, float, float>())
.def_prop_ro("pose", &FMB::get_pose)
.def_prop_ro("extent",
[](const FMB& self) {
auto extent = self.get_extent();
return std::tuple{extent.x, extent.y, extent.z};
})
.def("quadratic_form", &FMB::quadratic_form,
"Evaluate the associated quadratic form at the given vector", nb::arg("vec"));

/*
* Geometry module bindings
*/
Expand Down
33 changes: 33 additions & 0 deletions genmetaballs/src/cuda/core/fmb.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "fmb.cuh"
#include "geometry.cuh"
#include "utils.cuh"

CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const {
const auto shftd_vec = vec - pose_.get_tran();
const auto rot_shftd_vec = pose_.get_rot().apply(shftd_vec);
const auto scaled_rot_shftd_vec = Vec3D(
rot_shftd_vec.x / extent_.x, rot_shftd_vec.y / extent_.y, rot_shftd_vec.z / extent_.z);
return dot(rot_shftd_vec, scaled_rot_shftd_vec);
}

template <>
__host__ FMBScene<MemoryLocation::HOST>::FMBScene(size_t size)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice set of helpers! We should probably do this with all our structs tbh, would be much cleaner

: fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {}

template <>
__host__ FMBScene<MemoryLocation::DEVICE>::FMBScene(size_t size) : size_{size} {
CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB)));
CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float)));
}

template <>
__host__ FMBScene<MemoryLocation::HOST>::~FMBScene() {
delete[] fmbs_;
delete[] log_weights_;
}

template <>
__host__ FMBScene<MemoryLocation::DEVICE>::~FMBScene() {
CUDA_CHECK(cudaFree(fmbs_));
CUDA_CHECK(cudaFree(log_weights_));
}
112 changes: 101 additions & 11 deletions genmetaballs/src/cuda/core/fmb.cuh
Original file line number Diff line number Diff line change
@@ -1,26 +1,116 @@
#pragma once

#include <cuda/std/span>
#include <cuda/std/tuple>
#include <stdexcept>

#include "geometry.cuh"
#include "utils.cuh"

class FMB {
private:
// In Gaussian terms:
// - mean: pose.tran
// - cov: pose.rot.mat().inv() * diag(extent) * pose.rot.mat()
Pose pose_;
float3 extent_;

public:
FMB() : pose_{}, extent_{1.0f, 1.0f, 1.0f} {};

FMB(const Pose& pose, float x_extent, float y_extent, float z_extent) noexcept(false)
Comment on lines +19 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👀 are we not going to construct FMB on the device?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As of the current pseudocode, we are initializing it in host, but we could totally make this a CUDA CALLABLE too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I think host only should be fine for now.

: pose_{pose} {
if (x_extent <= 0 || y_extent <= 0 || z_extent <= 0)
throw std::domain_error("a metaball cannot have negative extent");
extent_ = {x_extent, y_extent, z_extent};
}

CUDA_CALLABLE Pose get_pose() const {
return pose_;
}
CUDA_CALLABLE float3 get_extent() const {
return extent_;
}

struct FMB {
Pose pose; // mean + orientation
float3 extent;
CUDA_CALLABLE float quadratic_form(const Vec3D) const;
};

template <template <typename> class containter_template>
class FMBs {
template <MemoryLocation location>
class FMBScene {
private:
containter_template<FMB> fmbs_;
containter_template<float> log_weights_;
FMB* fmbs_;
float* log_weights_;
size_t size_;

public:
FMBs(uint32_t size) : fmbs_(size), log_weights_(size) {
// TODO: set all log_weights_ to 0
__host__ FMBScene(size_t size);

__host__ ~FMBScene();

CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator[](const uint32_t i) {
return cuda::std::tie(fmbs_[i], log_weights_[i]);
}
CUDA_CALLABLE const containter_template<FMB>& get_all_fmbs() const {
return fmbs_;

CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator[](const uint32_t i) const {
return cuda::std::tie(fmbs_[i], log_weights_[i]);
}

class Iterator {
private:
FMB* fmb_ptr_;
float* log_weight_ptr_;

public:
CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr)
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator*() {
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
}
CUDA_CALLABLE bool operator!=(const Iterator& other) const {
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
}
CUDA_CALLABLE Iterator& operator++() {
fmb_ptr_++, log_weight_ptr_++;
return *this;
}
};

class ConstIterator {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is ver thoughtful. I like that we're using const whenever possible :).

One pattern I've seen people using is to make the pointer type generic so you don't have to define the const/non-const version twice... though it might not worth the headache at this point haha.

private:
const FMB* fmb_ptr_;
const float* log_weight_ptr_;

public:
CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr)
: fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {}
CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator*() const {
return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_);
}
CUDA_CALLABLE bool operator!=(const ConstIterator& other) const {
return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_;
}
CUDA_CALLABLE ConstIterator& operator++() {
fmb_ptr_++, log_weight_ptr_++;
return *this;
}
};

CUDA_CALLABLE Iterator begin() {
return Iterator(fmbs_, log_weights_);
}
CUDA_CALLABLE Iterator end() {
return Iterator(fmbs_ + size_, log_weights_ + size_);
}
CUDA_CALLABLE ConstIterator begin() const {
return ConstIterator(fmbs_, log_weights_);
}
CUDA_CALLABLE ConstIterator end() const {
return ConstIterator(fmbs_ + size_, log_weights_ + size_);
}
CUDA_CALLABLE const FMB& get_fmb(uint32_t idx) const {
return fmbs_[idx];
}
CUDA_CALLABLE size_t size() const {
return size_;
}
};
4 changes: 2 additions & 2 deletions genmetaballs/src/cuda/core/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ __global__ render_kernel(const Getter fmb_getter, const Blender blender,
float w0 = 0.0f, tf = 0.0f, sumexpd = 0.0f;
for (const auto& fmb : fmb_getter->get_metaballs(ray)) {
const auto& [t, d] = Intersector::intersect(fmb, ray);
w = blender->blend(t, d);
sumexpd += exp(d);
w = blender->blend(t, d, fmb, ray);
sumexpd += exp(d); // numerically unstable. use logsumexp
tf += t;
w0 += w;
}
Expand Down
13 changes: 6 additions & 7 deletions genmetaballs/src/cuda/core/getter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,15 @@
#include "utils.cuh"

// This is the dummy version of getter, where all FMBs are relevant to any ray
template <template <typename> class containter_template>
template <MemoryLocation location>
struct AllGetter {
const FMBs<containter_template>& fmbs;
Pose extr; // Current assumption: rays are in camera frame
FMBScene<location>& scene;
Pose& extr; // Current assumption: rays are in camera frame

CUDA_CALLABLE AllGetter(const FMBs<containter_template>& fmbs, const Pose& extr)
: fmbs(fmbs), extr(extr) {}
CUDA_CALLABLE AllGetter(FMBScene<location>& scene, Pose& extr) : scene(scene), extr(extr) {}

// It does not bother using the ray, because it simply returns all FMBs
CUDA_CALLABLE const containter_template<FMB>& get_metaballs(const Ray& ray) const {
return fmbs.get_all_fmbs();
CUDA_CALLABLE FMBScene<location>& get_metaballs(const Ray& ray) const {
return scene;
}
};
3 changes: 0 additions & 3 deletions genmetaballs/src/genmetaballs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
from .gpu_add import gpu_add

__all__ = ["gpu_add"]
3 changes: 2 additions & 1 deletion genmetaballs/src/genmetaballs/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Literal

from genmetaballs._genmetaballs_bindings import geometry
from genmetaballs._genmetaballs_bindings import fmb, geometry
from genmetaballs._genmetaballs_bindings.blender import (
FourParameterBlender,
ThreeParameterBlender,
Expand Down Expand Up @@ -51,6 +51,7 @@ def make_image(height: int, width: int, device: DeviceType) -> CPUImage | GPUIma
"array2d_float",
"ZeroParameterConfidence",
"TwoParameterConfidence",
"fmb",
"geometry",
"Camera",
"Intrinsics",
Expand Down
5 changes: 0 additions & 5 deletions genmetaballs/src/genmetaballs/gpu_add.py

This file was deleted.

31 changes: 31 additions & 0 deletions tests/cpp_tests/test_fmb.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include <cstdint>
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>

#include "core/fmb.cuh"
#include "core/utils.cuh"

__global__ void dummy_kernel(FMBScene<MemoryLocation::DEVICE>& scene, int* num_fmbs) {

int _num_fmbs = 0;

for (auto [fmb, w] : scene) {
_num_fmbs += 1;
}

*num_fmbs = _num_fmbs;
}

TEST(FMBTests, KernelRangeBasedForLoopSmokeTest) {

FMBScene<MemoryLocation::DEVICE> dummy_scene(10);
thrust::device_vector<int> device_res(1);

dummy_kernel<<<1, 1>>>(dummy_scene, thrust::raw_pointer_cast(device_res.data()));

thrust::host_vector<int> host_res = device_res;

EXPECT_EQ(host_res[0], 10);
}
Loading