Skip to content

Commit c34e2f3

Browse files
authored
Port point-to-point-cloud distance algorithm to CUDA (#4230)
* WIP: Port point-to-point cloud algorithms to CUDA * Fix * Fix * Add heap bytes functions * Add comment * Add option to skip point with same index * WIP: Add generalized interface * WIP: Change API * WIP: Add progress callback * Add comments * Fix build * WIP: Add unit tests for MRCuda * Fix error * Fix tests * Increase verbosity * Refactor * Fix GCC build * Fix GCC build * Fix VS project * Refactor * Fix bindings build * Fix MSVC build * Fix build * Fix build * Fix MSVC build * Fix VS project
1 parent c26ae4c commit c34e2f3

20 files changed

+635
-8
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,9 @@ ENDIF()
314314
IF(BUILD_TESTING)
315315
enable_testing()
316316
add_subdirectory(${PROJECT_SOURCE_DIR}/MRTest ./MRTest)
317+
IF(MESHLIB_BUILD_MRCUDA)
318+
add_subdirectory(${PROJECT_SOURCE_DIR}/MRTestCuda ./MRTestCuda)
319+
ENDIF()
317320
IF(MESHLIB_EXPERIMENTAL_BUILD_C_BINDING)
318321
add_subdirectory(${PROJECT_SOURCE_DIR}/MRTestC ./MRTestC)
319322
ENDIF()

scripts/check_vcxproj.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,18 @@ def find_missing_entries(vcxproj_path):
3636
VCXPROJ_NAMESPACES,
3737
)
3838
}
39+
cuda_compiles = {
40+
item.attrib['Include'].lower()
41+
for item in project.iterfind(
42+
'msbuild:ItemGroup/msbuild:CudaCompile',
43+
VCXPROJ_NAMESPACES,
44+
)
45+
}
3946

4047
result = {
4148
'ClInclude': [],
4249
'ClCompile': [],
50+
'CudaCompile': [],
4351
}
4452
for path in vcxproj_dir.iterdir():
4553
name, suffix = path.name.lower(), path.suffix.lower()
@@ -48,22 +56,23 @@ def find_missing_entries(vcxproj_path):
4856
if suffix in {".cpp"}:
4957
if name not in compiles:
5058
result['ClCompile'].append(path)
51-
elif suffix in {".h", ".hpp"}:
59+
elif suffix in {".h", ".hpp", ".cuh"}:
5260
if name not in includes:
5361
result['ClInclude'].append(path)
62+
elif suffix in {".cu"}:
63+
if name not in cuda_compiles:
64+
result['CudaCompile'].append(path)
5465

5566
return result
5667

5768

5869
def process_file(vcxproj_path):
5970
result = find_missing_entries(vcxproj_path)
6071
ok = True
61-
for path in result['ClInclude']:
62-
print(f"{vcxproj_path}: missing ClInclude item: {path.name}", file=sys.stderr)
63-
ok = False
64-
for path in result['ClCompile']:
65-
print(f"{vcxproj_path}: missing ClCompile entry: {path.name}", file=sys.stderr)
66-
ok = False
72+
for group_name, group in result.items():
73+
for path in group:
74+
print(f"{vcxproj_path}: missing {group_name} item: {path.name}", file=sys.stderr)
75+
ok = False
6776
return ok
6877

6978

source/MRCuda/MRCuda.vcxproj

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
<CudaCompile Include="MRCudaSolarRadiation.cu" />
9393
<CudaCompile Include="MRCudaTest.cu" />
9494
<CudaCompile Include="MRCudaContoursDistanceMap.cu" />
95+
<CudaCompile Include="MRCudaPointsProject.cu" />
9596
</ItemGroup>
9697
<ItemGroup>
9798
<ClInclude Include="exports.h" />
@@ -114,6 +115,11 @@
114115
<ClInclude Include="MRCudaTest.cuh" />
115116
<ClInclude Include="MRCudaTest.h" />
116117
<ClInclude Include="MRCudaContoursDistanceMap.h" />
118+
<ClInclude Include="MRCudaMath.h" />
119+
<ClInclude Include="MRCudaPointCloud.h" />
120+
<ClInclude Include="MRCudaPointCloud.cuh" />
121+
<ClInclude Include="MRCudaPointsProject.h" />
122+
<ClInclude Include="MRCudaPointsProject.cuh" />
117123
</ItemGroup>
118124
<ItemGroup>
119125
<ClCompile Include="MRCudaBasic.cpp" />
@@ -124,6 +130,9 @@
124130
<ClCompile Include="MRCudaPointsToMeshProjector.cpp" />
125131
<ClCompile Include="MRCudaSolarRadiation.cpp" />
126132
<ClCompile Include="MRCudaTest.cpp" />
133+
<ClCompile Include="MRCudaMath.cpp" />
134+
<ClCompile Include="MRCudaPointCloud.cpp" />
135+
<ClCompile Include="MRCudaPointsProject.cpp" />
127136
</ItemGroup>
128137
<ItemGroup>
129138
<ProjectReference Include="..\MRMesh\MRMesh.vcxproj">

source/MRCuda/MRCudaFloat.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#pragma once
2+
23
#include "cuda_runtime.h"
34

5+
#include <cassert>
6+
#include <cmath>
7+
48
namespace MR
59
{
610

source/MRCuda/MRCudaMath.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "MRCudaMath.h"
2+
#include "MRCudaMath.cuh"
3+
4+
#include "MRMesh/MRAffineXf3.h"
5+
6+
namespace MR::Cuda
7+
{
8+
9+
float3 fromVec( const Vector3f& v )
10+
{
11+
return {
12+
.x = v.x,
13+
.y = v.y,
14+
.z = v.z,
15+
};
16+
}
17+
18+
Matrix4 fromXf( const MR::AffineXf3f& xf )
19+
{
20+
if ( xf == AffineXf3f{} )
21+
return Matrix4 { .isIdentity = true };
22+
23+
return {
24+
.x = fromVec( xf.A.x ),
25+
.y = fromVec( xf.A.y ),
26+
.z = fromVec( xf.A.z ),
27+
.b = fromVec( xf.b ),
28+
.isIdentity = false,
29+
};
30+
}
31+
32+
} // namespace MR::Cuda

source/MRCuda/MRCudaMath.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
#ifndef MR_PARSING_FOR_PB11_BINDINGS
3+
4+
#include "exports.h"
5+
6+
#include "MRMesh/MRMeshFwd.h"
7+
8+
struct float3;
9+
10+
namespace MR::Cuda
11+
{
12+
13+
// structs from MRCudaMath.cuh
14+
struct Matrix4;
15+
16+
// copy from CPU to GPU structs
17+
MRCUDA_API float3 fromVec( const Vector3f& v );
18+
MRCUDA_API Matrix4 fromXf( const MR::AffineXf3f& xf );
19+
20+
} // namespace MR::Cuda
21+
#endif

source/MRCuda/MRCudaPointCloud.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#include "MRCudaPointCloud.h"
2+
#include "MRCudaPointCloud.cuh"
3+
4+
#include "MRMesh/MRAABBTreePoints.h"
5+
#include "MRMesh/MRPointCloud.h"
6+
7+
namespace MR::Cuda
8+
{
9+
10+
Expected<std::unique_ptr<PointCloudDataHolder>> copyDataFrom( const PointCloud& pc, bool copyNormals,
11+
const std::vector<Vector3f>* normals )
12+
{
13+
const auto& tree = pc.getAABBTree();
14+
const auto& nodes = tree.nodes();
15+
const auto& points = tree.orderedPoints();
16+
17+
auto result = std::make_unique<PointCloudDataHolder>();
18+
19+
CUDA_LOGE_RETURN_UNEXPECTED( result->nodes.fromVector( nodes.vec_ ) );
20+
CUDA_LOGE_RETURN_UNEXPECTED( result->points.fromVector( points ) );
21+
if ( copyNormals )
22+
CUDA_LOGE_RETURN_UNEXPECTED( result->normals.fromVector( normals ? *normals : pc.normals.vec_ ) );
23+
24+
return result;
25+
}
26+
27+
size_t pointCloudHeapBytes( const PointCloud& pc, bool copyNormals, const std::vector<Vector3f>* normals )
28+
{
29+
const auto& tree = pc.getAABBTree();
30+
const auto& nodes = tree.nodes();
31+
const auto& points = tree.orderedPoints();
32+
33+
return
34+
nodes.size() * sizeof( Node3 )
35+
+ points.size() * sizeof( OrderedPoint )
36+
+ ( copyNormals ? ( normals ? normals->size() : pc.normals.size() ) * sizeof( float3 ) : 0 )
37+
;
38+
}
39+
40+
} // namespace MR::Cuda

source/MRCuda/MRCudaPointCloud.cuh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#pragma once
2+
3+
#include "MRCudaBasic.cuh"
4+
#include "MRCudaMath.cuh"
5+
6+
namespace MR::Cuda
7+
{
8+
9+
// struct similar to MR::Point
10+
struct OrderedPoint
11+
{
12+
float3 coord;
13+
int id;
14+
};
15+
16+
// point cloud data required for algorithms
17+
struct PointCloudData
18+
{
19+
const Node3* __restrict__ nodes;
20+
const OrderedPoint* __restrict__ points;
21+
const float3* __restrict__ normals;
22+
};
23+
24+
// GPU memory holder for point cloud data
25+
struct PointCloudDataHolder
26+
{
27+
DynamicArray<Node3> nodes;
28+
DynamicArray<OrderedPoint> points;
29+
DynamicArray<float3> normals;
30+
31+
[[nodiscard]] PointCloudData data() const
32+
{
33+
return {
34+
nodes.data(),
35+
points.data(),
36+
normals.data(),
37+
};
38+
}
39+
};
40+
41+
} // namespace MR::Cuda

source/MRCuda/MRCudaPointCloud.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#pragma once
2+
#ifndef MR_PARSING_FOR_PB11_BINDINGS
3+
4+
#include "exports.h"
5+
6+
#include "MRMesh/MRMeshFwd.h"
7+
#include "MRMesh/MRExpected.h"
8+
9+
namespace MR::Cuda
10+
{
11+
12+
struct PointCloudDataHolder;
13+
14+
/// copy point cloud-related data to the GPU memory
15+
MRCUDA_API Expected<std::unique_ptr<PointCloudDataHolder>> copyDataFrom( const PointCloud& pc, bool copyNormals = false,
16+
const std::vector<Vector3f>* normals = nullptr );
17+
18+
/// return the amount of GPU memory required for \ref MR::Cuda::PointCloudDataHolder
19+
MRCUDA_API size_t pointCloudHeapBytes( const PointCloud& pc, bool copyNormals = false,
20+
const std::vector<Vector3f>* normals = nullptr );
21+
22+
} // namespace MR::Cuda
23+
#endif
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include "MRCudaPointsProject.cuh"
2+
#include "MRCudaPointsProject.h"
3+
4+
#include "MRCudaBasic.cuh"
5+
#include "MRCudaBasic.h"
6+
#include "MRCudaMath.h"
7+
#include "MRCudaPointCloud.h"
8+
9+
#include "MRMesh/MRAffineXf3.h"
10+
#include "MRMesh/MRBitSet.h"
11+
#include "MRMesh/MRChunkIterator.h"
12+
#include "MRMesh/MRProgressCallback.h"
13+
14+
static_assert( sizeof( MR::Cuda::PointsProjectionResult ) == sizeof( MR::PointsProjectionResult ) );
15+
16+
namespace MR::Cuda
17+
{
18+
19+
Expected<std::vector<MR::PointsProjectionResult>> findProjectionOnPoints( const PointCloud& pointCloud,
20+
const std::vector<Vector3f>& points, const FindProjectionOnPointsSettings& settings )
21+
{
22+
std::vector<MR::PointsProjectionResult> results;
23+
PointsProjector projector;
24+
return projector.setPointCloud( pointCloud )
25+
.and_then( [&] { return projector.findProjections( results, points, settings ); } )
26+
.transform( [&] { return results; } );
27+
}
28+
29+
Expected<void> PointsProjector::setPointCloud( const PointCloud& pointCloud )
30+
{
31+
if ( auto res = copyDataFrom( pointCloud ) )
32+
{
33+
data_ = std::move( *res );
34+
return {};
35+
}
36+
else
37+
{
38+
return unexpected( std::move( res.error() ) );
39+
}
40+
}
41+
42+
Expected<void> PointsProjector::findProjections( std::vector<MR::PointsProjectionResult>& results,
43+
const std::vector<Vector3f>& points, const FindProjectionOnPointsSettings& settings ) const
44+
{
45+
if ( !data_ )
46+
return unexpected( "No reference point cloud is set" );
47+
48+
const auto totalSize = points.size();
49+
const auto bufferSize = maxBufferSize( getCudaSafeMemoryLimit(), totalSize, sizeof( float3 ) + sizeof( PointsProjectionResult ) );
50+
51+
DynamicArray<float3> cudaPoints;
52+
CUDA_LOGE_RETURN_UNEXPECTED( cudaPoints.resize( bufferSize ) );
53+
54+
DynamicArray<PointsProjectionResult> cudaResult;
55+
CUDA_LOGE_RETURN_UNEXPECTED( cudaResult.resize( bufferSize ) );
56+
57+
results.resize( totalSize );
58+
59+
DynamicArray<uint64_t> cudaValid;
60+
if ( settings.valid )
61+
{
62+
assert( points.size() <= settings.valid->size() );
63+
std::vector<uint64_t> validVec;
64+
boost::to_block_range( *settings.valid, std::back_inserter( validVec ) );
65+
CUDA_LOGE_RETURN_UNEXPECTED( cudaValid.fromVector( validVec ) );
66+
}
67+
68+
const auto cudaXf = settings.xf ? fromXf( *settings.xf ) : Matrix4{};
69+
70+
if ( !reportProgress( settings.cb, 0.60f ) )
71+
return unexpectedOperationCanceled();
72+
73+
const auto cb1 = subprogress( settings.cb, 0.60f, 1.00f );
74+
const auto iterCount = chunkCount( totalSize, bufferSize );
75+
size_t iterIndex = 0;
76+
77+
for ( const auto [offset, size] : splitByChunks( totalSize, bufferSize ) )
78+
{
79+
const auto cb2 = subprogress( cb1, iterIndex++, iterCount );
80+
81+
CUDA_LOGE_RETURN_UNEXPECTED( cudaPoints.copyFrom( points.data() + offset, size ) );
82+
83+
findProjectionOnPointsKernel( cudaResult.data(), data_->data(), cudaPoints.data(), settings.valid ? cudaValid.data() : nullptr, cudaXf, settings.upDistLimitSq, settings.loDistLimitSq, settings.skipSameIndex, size, offset );
84+
CUDA_LOGE_RETURN_UNEXPECTED( cudaGetLastError() );
85+
if ( !reportProgress( cb2, 0.33f ) )
86+
return unexpectedOperationCanceled();
87+
88+
CUDA_LOGE_RETURN_UNEXPECTED( cudaResult.copyTo( results.data() + offset, size ) );
89+
if ( !reportProgress( cb2, 1.00f ) )
90+
return unexpectedOperationCanceled();
91+
}
92+
93+
return {};
94+
}
95+
96+
size_t findProjectionOnPointsHeapBytes( const PointCloud& pointCloud, size_t pointsCount )
97+
{
98+
constexpr size_t cMinCudaBufferSize = 1 << 24; // 16 MiB
99+
return
100+
pointCloudHeapBytes( pointCloud )
101+
+ std::min( ( sizeof( float3 ) + sizeof( PointsProjectionResult ) ) * pointsCount, cMinCudaBufferSize );
102+
}
103+
104+
} // namespace MR::Cuda

0 commit comments

Comments
 (0)