-
Notifications
You must be signed in to change notification settings - Fork 0
MET-32: FMB #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MET-32: FMB #24
Changes from all commits
f1fdff1
f143eb9
827b5b1
e5876f6
b6de85f
17ff36c
8fb9bcd
39fbab7
19f9d12
0427c87
49ac9c0
63787b9
cacd2bc
bada4b9
946a9a2
287f387
27a4532
bd25da4
d039c25
8582c5f
8efc527
abcec38
267c723
0037377
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| #include "fmb.cuh" | ||
| #include "geometry.cuh" | ||
| #include "utils.cuh" | ||
|
|
||
| CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const { | ||
| const auto shftd_vec = vec - pose_.get_tran(); | ||
| const auto rot_shftd_vec = pose_.get_rot().apply(shftd_vec); | ||
| const auto scaled_rot_shftd_vec = Vec3D( | ||
| rot_shftd_vec.x / extent_.x, rot_shftd_vec.y / extent_.y, rot_shftd_vec.z / extent_.z); | ||
| return dot(rot_shftd_vec, scaled_rot_shftd_vec); | ||
| } | ||
|
|
||
| template <> | ||
| __host__ FMBScene<MemoryLocation::HOST>::FMBScene(size_t size) | ||
| : fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {} | ||
|
|
||
| template <> | ||
| __host__ FMBScene<MemoryLocation::DEVICE>::FMBScene(size_t size) : size_{size} { | ||
| CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB))); | ||
| CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float))); | ||
| } | ||
|
|
||
| template <> | ||
| __host__ FMBScene<MemoryLocation::HOST>::~FMBScene() { | ||
| delete[] fmbs_; | ||
| delete[] log_weights_; | ||
| } | ||
|
|
||
| template <> | ||
| __host__ FMBScene<MemoryLocation::DEVICE>::~FMBScene() { | ||
| CUDA_CHECK(cudaFree(fmbs_)); | ||
| CUDA_CHECK(cudaFree(log_weights_)); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,26 +1,116 @@ | ||
| #pragma once | ||
|
|
||
| #include <cuda/std/span> | ||
| #include <cuda/std/tuple> | ||
| #include <stdexcept> | ||
|
|
||
| #include "geometry.cuh" | ||
| #include "utils.cuh" | ||
|
|
||
| class FMB { | ||
| private: | ||
| // In Gaussian terms: | ||
| // - mean: pose.tran | ||
| // - cov: pose.rot.mat().inv() * diag(extent) * pose.rot.mat() | ||
| Pose pose_; | ||
| float3 extent_; | ||
|
|
||
| public: | ||
| FMB() : pose_{}, extent_{1.0f, 1.0f, 1.0f} {}; | ||
|
|
||
| FMB(const Pose& pose, float x_extent, float y_extent, float z_extent) noexcept(false) | ||
|
Comment on lines
+19
to
+21
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👀 are we not going to construct
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As of the current pseudocode, we are initializing it in host, but we could totally make this a CUDA CALLABLE too.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I think host only should be fine for now. |
||
| : pose_{pose} { | ||
| if (x_extent <= 0 || y_extent <= 0 || z_extent <= 0) | ||
| throw std::domain_error("a metaball cannot have negative extent"); | ||
| extent_ = {x_extent, y_extent, z_extent}; | ||
| } | ||
|
|
||
| CUDA_CALLABLE Pose get_pose() const { | ||
| return pose_; | ||
| } | ||
| CUDA_CALLABLE float3 get_extent() const { | ||
| return extent_; | ||
| } | ||
|
|
||
| struct FMB { | ||
| Pose pose; // mean + orientation | ||
| float3 extent; | ||
| CUDA_CALLABLE float quadratic_form(const Vec3D) const; | ||
| }; | ||
|
|
||
| template <template <typename> class containter_template> | ||
| class FMBs { | ||
| template <MemoryLocation location> | ||
| class FMBScene { | ||
| private: | ||
| containter_template<FMB> fmbs_; | ||
| containter_template<float> log_weights_; | ||
| FMB* fmbs_; | ||
| float* log_weights_; | ||
| size_t size_; | ||
|
|
||
| public: | ||
| FMBs(uint32_t size) : fmbs_(size), log_weights_(size) { | ||
| // TODO: set all log_weights_ to 0 | ||
| __host__ FMBScene(size_t size); | ||
|
|
||
| __host__ ~FMBScene(); | ||
|
|
||
| CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator[](const uint32_t i) { | ||
| return cuda::std::tie(fmbs_[i], log_weights_[i]); | ||
| } | ||
| CUDA_CALLABLE const containter_template<FMB>& get_all_fmbs() const { | ||
| return fmbs_; | ||
|
|
||
| CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator[](const uint32_t i) const { | ||
| return cuda::std::tie(fmbs_[i], log_weights_[i]); | ||
| } | ||
|
|
||
| class Iterator { | ||
| private: | ||
| FMB* fmb_ptr_; | ||
| float* log_weight_ptr_; | ||
|
|
||
| public: | ||
| CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr) | ||
| : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} | ||
| CUDA_CALLABLE cuda::std::tuple<FMB&, float&> operator*() { | ||
| return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); | ||
| } | ||
| CUDA_CALLABLE bool operator!=(const Iterator& other) const { | ||
| return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; | ||
| } | ||
| CUDA_CALLABLE Iterator& operator++() { | ||
| fmb_ptr_++, log_weight_ptr_++; | ||
| return *this; | ||
| } | ||
| }; | ||
|
|
||
| class ConstIterator { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is ver thoughtful. I like that we're using One pattern I've seen people using is to make the pointer type generic so you don't have to define the const/non-const version twice... though it might not worth the headache at this point haha. |
||
| private: | ||
| const FMB* fmb_ptr_; | ||
| const float* log_weight_ptr_; | ||
|
|
||
| public: | ||
| CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr) | ||
| : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} | ||
| CUDA_CALLABLE cuda::std::tuple<const FMB&, const float&> operator*() const { | ||
| return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); | ||
| } | ||
| CUDA_CALLABLE bool operator!=(const ConstIterator& other) const { | ||
| return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; | ||
| } | ||
| CUDA_CALLABLE ConstIterator& operator++() { | ||
| fmb_ptr_++, log_weight_ptr_++; | ||
| return *this; | ||
| } | ||
| }; | ||
|
|
||
| CUDA_CALLABLE Iterator begin() { | ||
| return Iterator(fmbs_, log_weights_); | ||
| } | ||
| CUDA_CALLABLE Iterator end() { | ||
| return Iterator(fmbs_ + size_, log_weights_ + size_); | ||
| } | ||
| CUDA_CALLABLE ConstIterator begin() const { | ||
| return ConstIterator(fmbs_, log_weights_); | ||
| } | ||
| CUDA_CALLABLE ConstIterator end() const { | ||
| return ConstIterator(fmbs_ + size_, log_weights_ + size_); | ||
| } | ||
| CUDA_CALLABLE const FMB& get_fmb(uint32_t idx) const { | ||
| return fmbs_[idx]; | ||
| } | ||
| CUDA_CALLABLE size_t size() const { | ||
| return size_; | ||
| } | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +0,0 @@ | ||
| from .gpu_add import gpu_add | ||
|
|
||
| __all__ = ["gpu_add"] | ||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| #include <cstdint> | ||
| #include <cuda_runtime.h> | ||
| #include <gtest/gtest.h> | ||
| #include <thrust/device_vector.h> | ||
| #include <thrust/host_vector.h> | ||
|
|
||
| #include "core/fmb.cuh" | ||
| #include "core/utils.cuh" | ||
|
|
||
| __global__ void dummy_kernel(FMBScene<MemoryLocation::DEVICE>& scene, int* num_fmbs) { | ||
|
|
||
| int _num_fmbs = 0; | ||
|
|
||
| for (auto [fmb, w] : scene) { | ||
| _num_fmbs += 1; | ||
| } | ||
|
|
||
| *num_fmbs = _num_fmbs; | ||
| } | ||
|
|
||
| TEST(FMBTests, KernelRangeBasedForLoopSmokeTest) { | ||
|
|
||
| FMBScene<MemoryLocation::DEVICE> dummy_scene(10); | ||
| thrust::device_vector<int> device_res(1); | ||
|
|
||
| dummy_kernel<<<1, 1>>>(dummy_scene, thrust::raw_pointer_cast(device_res.data())); | ||
|
|
||
| thrust::host_vector<int> host_res = device_res; | ||
|
|
||
| EXPECT_EQ(host_res[0], 10); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice set of helpers! We should probably do this with all our structs tbh, would be much cleaner