Skip to content

Commit

Permalink
Merge branch 'feature/numba-cuda-contact-point' into test/auto-unit-test
Browse files Browse the repository at this point in the history
  • Loading branch information
qbp758 committed Oct 2, 2023
2 parents eb3d2b0 + 7037884 commit f2c0778
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 124 deletions.
295 changes: 185 additions & 110 deletions python/rainbow/cuda/collision_detection/compute_contacts.py

Large diffs are not rendered by default.

51 changes: 38 additions & 13 deletions python/rainbow/simulators/prox_soft_bodies/collision_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,15 @@ def _compute_contacts(engine, stats, bodyA, bodyB, results, debug_on):


def _uniform_padding(listss, padding_value):
""" Pad the listss to the same length, means that the length of each sub list is the same.
Args:
listss (List[List]): A nested list.
padding_value (DataType): A padding value, which is used to pad the list.
Returns:
List[List]: the padded listss, which has the same length of each sub list.
"""
valid_lists = [l for l in listss if l is not None]

if len(valid_lists) == 0:
Expand All @@ -258,6 +267,15 @@ def _uniform_padding(listss, padding_value):


def _assemble_body_data_to_gpu(data_lists, bodyA, bodyB, triA, triB):
""" Assemble body data to a data list for GPU computing.
Args:
data_lists (List): The data list for GPU computing.
bodyA (SoftBody): A SofyBody instance.
bodyB (SoftBody): A SofyBody instance.
triA (int): The index of triangle in bodyA.
triB (int): The index of triangle in bodyB.
"""
data_lists['bodyA_idxs'].append(bodyA.idx)
data_lists['bodyB_idxs'].append(bodyB.idx)
data_lists['overlap_results'].append((triA, triB))
Expand All @@ -277,14 +295,21 @@ def _assemble_body_data_to_gpu(data_lists, bodyA, bodyB, triA, triB):
data_lists['B_grid_Js'].append(bodyB.grid.J)
data_lists['B_grid_Ks'].append(bodyB.grid.K)


def _contact_point_gpu(overlaps, engine, stats, debug_on):
""" The GPU version of contact point computing, it flattens the data and send to GPU, then call the kernel function.
contact_optimization_timer = None
model_space_update_timer = None
Args:
overlaps (dict): A dictionary of triangles from one body that overlaps another body.
engine (Engine): The current engine instance we are working with.
stats (dict): A dictionary where to add more profiling and timing measurements.
debug_on (bool): Boolean flag for toggling debug (aka profiling) info on and off.
Returns:
dict: A dictionary with profiling and timing measurements.
"""
contact_point_generation_timer = None
if debug_on:
# model_space_update_timer = Timer("model_space_update")
# contact_optimization_timer = Timer("contact_optimization")
contact_point_generation_timer = Timer("contact_point_generation")

data_lists = {
Expand Down Expand Up @@ -348,14 +373,17 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
'B_grid_Ks': np.int32
}

# copy data to GPU
d_data = {}
for key, data in data_lists.items():
array_data = np.array(data, dtype=type_map.get(key))
d_data[f'd_{key}'] = cuda.to_device(array_data)

# setting up GPU computing (grid and block)
threads_per_block = engine.params.gpu_grid_size
blocks_per_grid = (data_length + threads_per_block - 1) // threads_per_block

# setting up result from GPU (data type and size)
result_dtype = np.dtype([
('idx_tetB', np.int32),
('idx_tetA', np.int32),
Expand All @@ -367,6 +395,7 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
])
result_gpu = cuda.device_array(data_length, dtype=result_dtype)

# call GPU kernel function
CUDA_COMPUTE_CONTACTS.contact_points_computing_kernel[blocks_per_grid, threads_per_block](
d_data['d_bodyA_idxs'], d_data['d_bodyB_idxs'], d_data['d_overlap_results'],
d_data['d_B_values'], d_data['d_A_owners'], d_data['d_B_owners'],
Expand All @@ -378,8 +407,9 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
engine.params.contact_optimization_tolerance,
engine.params.envelope, 0.5, result_gpu)

cuda.synchronize() ## wait for GPU data
result_to_cpu = result_gpu.copy_to_host() ## copy GPU data to CPU
# wait for GPU data and copy to CPU
cuda.synchronize()
result_to_cpu = result_gpu.copy_to_host()

## generate contact points
for res in result_to_cpu:
Expand All @@ -394,12 +424,6 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
engine.contact_points.append(cp)

if debug_on:
# if "model_space_update" not in stats:
# stats["model_space_update"] = 0
# stats["model_space_update"] += model_space_update_timer.total
# if "contact_optimization" not in stats:
# stats["contact_optimization"] = 0
# stats["contact_optimization"] += contact_optimization_timer.total
if "contact_point_generation" not in stats:
stats["contact_point_generation"] = 0
stats["contact_point_generation"] += contact_point_generation_timer.total
Expand Down Expand Up @@ -430,8 +454,9 @@ def _contact_determination(overlaps, engine, stats, debug_on):
if debug_on:
contact_determination_timer.end()
stats["contact_determination"] = contact_determination_timer.elapsed

return stats

# contact points computing on CPU, if GPU is not available or the flag is Flase
for key, results in overlaps.items():
# TODO 2022-12-31 Kenny: The code currently computes a lot of redundant contacts due
# to BVH traversal may return a triangle as part of several pairs. We only need
Expand Down
2 changes: 1 addition & 1 deletion python/rainbow/simulators/prox_soft_bodies/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def __init__(self):
0.1 # Any geometry within this distance generates a contact point.
)
self.resolution = 64 # The number of grid cells along each axis in the signed distance fields.
self.use_gpu = True # Boolean flag that indicates if we should use the GPU or not.
self.use_gpu = False # Boolean flag that indicates if we should use the GPU or not.
self.gpu_grid_size = 256 # The number of threads per block to use on the GPU.


Expand Down
1 change: 1 addition & 0 deletions python/unit_tests/test_cuda_geometry_barycentric.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import rainbow.cuda.unit_tests.test_geometry_barycentric_kernel as TEST_BCK


@unittest.skipIf(not cuda.is_available(), "CUDA not available")
class TestGrid3Cuda(unittest.TestCase):

def test_compute_barycentric_tetrahedron(self):
Expand Down
1 change: 1 addition & 0 deletions python/unit_tests/test_cuda_geometry_grid3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import rainbow.cuda.unit_tests.test_geometry_grid3_kernel as TEST_GRID3K


@unittest.skipIf(not cuda.is_available(), "CUDA not available")
def simpelfunc(coord):
_, _, z = coord[0], coord[1], coord[2]
return z
Expand Down
1 change: 1 addition & 0 deletions python/unit_tests/test_cuda_math_linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import rainbow.util.test_tools as TEST


@unittest.skipIf(not cuda.is_available(), "CUDA not available")
class TestLinAlgCuda(unittest.TestCase):

def test_cramer_solver(self):
Expand Down
1 change: 1 addition & 0 deletions python/unit_tests/test_cuda_math_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import rainbow.cuda.unit_tests.test_math_matrix_kernel as TEST_MK


@unittest.skipIf(not cuda.is_available(), "CUDA not available")
class TestMatrixCuda(unittest.TestCase):

def test_mat33_T(self):
Expand Down
1 change: 1 addition & 0 deletions python/unit_tests/test_cuda_math_vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import rainbow.util.test_tools as TEST


@unittest.skipIf(not cuda.is_available(), "CUDA not available")
class TestVec3Cuda(unittest.TestCase):

def test_vec3_add(self):
Expand Down

0 comments on commit f2c0778

Please sign in to comment.