Skip to content

Commit

Permalink
Merge pull request #23 from koide3/overlap
Browse files Browse the repository at this point in the history
batched overlap on GPU
  • Loading branch information
koide3 authored Aug 29, 2024
2 parents f301d11 + 091ba05 commit e1a5407
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 27 deletions.
89 changes: 62 additions & 27 deletions include/gtsam_points/types/gaussian_voxelmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ double overlap(const GaussianVoxelMap::ConstPtr& target, const PointCloud::Const

/**
* @brief Calculate the fraction of points fell in targets' voxels
* @param target Set of target voxelized frames
* @param source Source frame
* @param delta Set of T_target_source
* @return Overlap rate
* @param target Set of target voxelized frames
* @param source Source frame
* @param Ts_target_source Set of T_target_source
* @return Overlap rate
*/
double overlap(
const std::vector<GaussianVoxelMap::ConstPtr>& targets,
Expand All @@ -60,69 +60,104 @@ overlap(const std::vector<VoxelMapPtr>& targets, const PointCloud::ConstPtr& sou
/**
* @brief Calculate the fraction of points fell in target voxels on GPU
* @note Source points and target voxelmap must be pre-allocated on GPU.
* @param target Target voxelmap
* @param source Source frame
* @param delta_gpu T_target_source (on GPU memory)
* @return Overlap rate
* @param target Target voxelmap
* @param source Source frame
* @param T_target_source_gpu T_target_source (on GPU memory)
* @return Overlap rate
*/
double overlap_gpu(
const GaussianVoxelMap::ConstPtr& target,
const PointCloud::ConstPtr& source,
const Eigen::Isometry3f* delta_gpu,
const Eigen::Isometry3f* T_target_source_gpu,
CUstream_st* stream = 0);

/**
* @brief Calculate the fraction of points fell in target voxels on GPU
* @note Source points and target voxelmap must be pre-allocated on GPU.
* @param target Target voxelmap
* @param source Source frame
* @param delta T_target_source
* @return Overlap rate
* @param target Target voxelmap
* @param source Source frame
* @param T_target_source T_target_source
* @return Overlap rate
*/
double
overlap_gpu(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta, CUstream_st* stream = 0);
double overlap_gpu(
const GaussianVoxelMap::ConstPtr& target,
const PointCloud::ConstPtr& source,
const Eigen::Isometry3d& T_target_source,
CUstream_st* stream = 0);

/// @brief Calculate the fraction of points fell in target voxels on GPU
double overlap_gpu(const PointCloud::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta, CUstream_st* stream = 0);
double overlap_gpu(
const PointCloud::ConstPtr& target,
const PointCloud::ConstPtr& source,
const Eigen::Isometry3d& T_target_source,
CUstream_st* stream = 0);

/**
* @brief Calculate the fraction of points fell in targets' voxels on GPU
* @note Source points and targets' voxelmap must be pre-allocated on GPU.
* @param targets Set of target voxelized frames
* @param source Source frame
* @param deltas Set of T_target_source
* @return Overlap rate
* @param targets Set of target voxelized frames
* @param source Source frame
* @param Ts_target_source Set of T_target_source
* @return Overlap rate
*/
double overlap_gpu(
const std::vector<GaussianVoxelMap::ConstPtr>& targets,
const PointCloud::ConstPtr& source,
const std::vector<Eigen::Isometry3d>& deltas,
const std::vector<Eigen::Isometry3d>& Ts_target_source,
CUstream_st* stream = 0);

/**
* @breif Batch calculation of the fraction of points fell in targets' voxels on GPU
* @note Source points and targets' voxelmap must be pre-allocated on GPU.
* @param targets Set of target voxelized frames
* @param sources Set of source frames
* @param Ts_target_source Set of T_target_source
* @return Overlap rate
*/
std::vector<double> overlap_gpu(
const std::vector<GaussianVoxelMap::ConstPtr>& targets,
const std::vector<PointCloud::ConstPtr>& sources,
const std::vector<Eigen::Isometry3d>& Ts_target_source,
CUstream_st* stream = 0);

/// @brief Calculate the fraction of points fell in targets' voxels on GPU
template <typename VoxelMapPtr>
std::enable_if_t<!std::is_same_v<VoxelMapPtr, GaussianVoxelMap::ConstPtr>, double> overlap_gpu(
const std::vector<VoxelMapPtr>& targets,
const PointCloud::ConstPtr& source,
const std::vector<Eigen::Isometry3d>& deltas,
const std::vector<Eigen::Isometry3d>& Ts_target_source,
CUstream_st* stream = 0) {
const std::vector<GaussianVoxelMap::ConstPtr> targets_(targets.begin(), targets.end());
return overlap_gpu(targets_, source, deltas, stream);
return overlap_gpu(targets_, source, Ts_target_source, stream);
}

/// @brief Calculate the fraction of points fell in targets' voxels on GPU
template <typename VoxelMapPtr, typename PointCloudPtr>
std::
enable_if_t<!std::is_same_v<VoxelMapPtr, GaussianVoxelMap::ConstPtr> || !std::is_same_v<PointCloudPtr, PointCloud::ConstPtr>, std::vector<double>>
overlap_gpu(
const std::vector<VoxelMapPtr>& targets,
const std::vector<PointCloudPtr>& sources,
const std::vector<Eigen::Isometry3d>& Ts_target_source,
CUstream_st* stream = 0) {
const std::vector<GaussianVoxelMap::ConstPtr> targets_(targets.begin(), targets.end());
const std::vector<PointCloud::ConstPtr> sources_(sources.begin(), sources.end());
return overlap_gpu(targets_, sources_, Ts_target_source, stream);
}

// Automatically select CPU or GPU method
double overlap_auto(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta);
double overlap_auto(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& T_target_source);

double overlap_auto(
const std::vector<GaussianVoxelMap::ConstPtr>& targets,
const PointCloud::ConstPtr& source,
const std::vector<Eigen::Isometry3d>& deltas);
const std::vector<Eigen::Isometry3d>& Ts_target_source);

template <typename VoxelMapPtr>
std::enable_if_t<!std::is_same_v<VoxelMapPtr, GaussianVoxelMap::ConstPtr>, double>
overlap_auto(const std::vector<VoxelMapPtr>& targets, const PointCloud::ConstPtr& source, const std::vector<Eigen::Isometry3d>& deltas) {
overlap_auto(const std::vector<VoxelMapPtr>& targets, const PointCloud::ConstPtr& source, const std::vector<Eigen::Isometry3d>& Ts_target_source) {
const std::vector<GaussianVoxelMap::ConstPtr> targets_(targets.begin(), targets.end());
return overlap_auto(targets_, source, deltas);
return overlap_auto(targets_, source, Ts_target_source);
}

} // namespace gtsam_points
77 changes: 77 additions & 0 deletions src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,81 @@ double overlap_gpu(
return static_cast<double>(num_inliers_cpu) / source->size();
}

std::vector<double> overlap_gpu(
const std::vector<GaussianVoxelMap::ConstPtr>& targets_,
const std::vector<PointCloud::ConstPtr>& sources,
const std::vector<Eigen::Isometry3d>& Ts_target_source_,
CUstream_st* stream) {
if (targets_.size() != sources.size()) {
std::cerr << "error: The number of target voxelmaps and source point clouds must be the same!!" << std::endl;
abort();
}

size_t max_num_points = 0;

std::vector<GaussianVoxelMapGPU::ConstPtr> targets(targets_.size());
for (int i = 0; i < targets_.size(); i++) {
targets[i] = std::dynamic_pointer_cast<const GaussianVoxelMapGPU>(targets_[i]);
if (!targets[i]) {
std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl;
}

if (!sources[i]->has_points_gpu()) {
std::cerr << "error: GPU source points have not been allocated!!" << std::endl;
}

max_num_points = std::max(max_num_points, sources[i]->size());
}

std::vector<Eigen::Isometry3f> h_deltas(Ts_target_source_.size());
std::transform(Ts_target_source_.begin(), Ts_target_source_.end(), h_deltas.begin(), [](const Eigen::Isometry3d& delta) {
return delta.cast<float>();
});

Eigen::Isometry3f* deltas;
check_error << cudaMallocAsync(&deltas, sizeof(Eigen::Isometry3f) * Ts_target_source_.size(), stream);
check_error << cudaMemcpyAsync(deltas, h_deltas.data(), sizeof(Eigen::Isometry3f) * Ts_target_source_.size(), cudaMemcpyHostToDevice, stream);

bool* overlap;
check_error << cudaMallocAsync(&overlap, sizeof(bool) * max_num_points, stream);

int* num_inliers;
check_error << cudaMallocAsync(&num_inliers, sizeof(float) * sources.size(), stream);

char* temp_storage = nullptr;
size_t temp_storage_bytes = 0;

for (int i = 0; i < targets_.size(); i++) {
const auto& source = sources[i];
const auto& target = targets[i];
thrust::transform(
thrust::cuda::par_nosync.on(stream),
thrust::device_ptr<Eigen::Vector3f>(source->points_gpu),
thrust::device_ptr<Eigen::Vector3f>(source->points_gpu) + source->size(),
thrust::device_ptr<bool>(overlap),
overlap_count_kernel(*target, thrust::device_ptr<const Eigen::Isometry3f>(deltas + i)));

if (i == 0) {
cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, overlap, num_inliers + i, source->size(), thrust::plus<int>(), 0, stream);
check_error << cudaMallocAsync(&temp_storage, temp_storage_bytes, stream);
}
cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, overlap, num_inliers + i, source->size(), thrust::plus<int>(), 0, stream);
}

std::vector<int> h_num_inliers(sources.size());
check_error << cudaMemcpyAsync(h_num_inliers.data(), num_inliers, sizeof(int) * sources.size(), cudaMemcpyDeviceToHost, stream);

std::vector<double> overlaps(sources.size());
for (int i = 0; i < sources.size(); i++) {
overlaps[i] = static_cast<double>(h_num_inliers[i]) / sources[i]->size();
}

check_error << cudaFreeAsync(deltas, stream);
check_error << cudaFreeAsync(overlap, stream);
check_error << cudaFreeAsync(temp_storage, stream);
check_error << cudaFreeAsync(num_inliers, stream);

return overlaps;
}

} // namespace gtsam_points

0 comments on commit e1a5407

Please sign in to comment.