Merge branch 'feature/numba-cuda-contact-point' into test/auto-unit-test

Jax922 · Oct 2, 2023 · f2c0778 · f2c0778
2 parents eb3d2b0 + 7037884
commit f2c0778
Show file tree

Hide file tree

Showing 8 changed files with 229 additions and 124 deletions.
diff --git a/python/rainbow/cuda/collision_detection/compute_contacts.py b/python/rainbow/cuda/collision_detection/compute_contacts.py
diff --git a/python/rainbow/simulators/prox_soft_bodies/collision_detection.py b/python/rainbow/simulators/prox_soft_bodies/collision_detection.py
@@ -243,6 +243,15 @@ def _compute_contacts(engine, stats, bodyA, bodyB, results, debug_on):
 
 
 def _uniform_padding(listss, padding_value):
+    """ Pad the listss to the same length, means that the length of each sub list is the same.
+
+    Args:
+        listss (List[List]): A nested list.
+        padding_value (DataType): A padding value, which is used to pad the list.
+
+    Returns:
+        List[List]: the padded listss, which has the same length of each sub list.
+    """
     valid_lists = [l for l in listss if l is not None]
 
     if len(valid_lists) == 0:
@@ -258,6 +267,15 @@ def _uniform_padding(listss, padding_value):
 
 
 def _assemble_body_data_to_gpu(data_lists, bodyA, bodyB, triA, triB):
+    """ Assemble body data to a data list for GPU computing.
+
+    Args:
+        data_lists (List): The data list for GPU computing.
+        bodyA (SoftBody): A SofyBody instance.
+        bodyB (SoftBody): A SofyBody instance.
+        triA (int): The index of triangle in bodyA.
+        triB (int): The index of triangle in bodyB.
+    """
     data_lists['bodyA_idxs'].append(bodyA.idx)
     data_lists['bodyB_idxs'].append(bodyB.idx)
     data_lists['overlap_results'].append((triA, triB))
@@ -277,14 +295,21 @@ def _assemble_body_data_to_gpu(data_lists, bodyA, bodyB, triA, triB):
     data_lists['B_grid_Js'].append(bodyB.grid.J)
     data_lists['B_grid_Ks'].append(bodyB.grid.K)
 
+
 def _contact_point_gpu(overlaps, engine, stats, debug_on):
+    """ The GPU version of contact point computing, it flattens the data and send to GPU, then call the kernel function.
 
-    contact_optimization_timer = None
-    model_space_update_timer = None
+    Args:
+        overlaps (dict): A dictionary of triangles from one body that overlaps another body.
+        engine (Engine):  The current engine instance we are working with.
+        stats (dict): A dictionary where to add more profiling and timing measurements.
+        debug_on (bool): Boolean flag for toggling debug (aka profiling) info on and off.
+
+    Returns:
+        dict:  A dictionary with profiling and timing measurements.
+    """
     contact_point_generation_timer = None
     if debug_on:
-        # model_space_update_timer = Timer("model_space_update")
-        # contact_optimization_timer = Timer("contact_optimization")
         contact_point_generation_timer = Timer("contact_point_generation")
 
     data_lists = {
@@ -348,14 +373,17 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
         'B_grid_Ks': np.int32
     }
 
+    # copy data to GPU
     d_data = {}
     for key, data in data_lists.items():
         array_data = np.array(data, dtype=type_map.get(key))
         d_data[f'd_{key}'] = cuda.to_device(array_data)
 
+    # setting up GPU computing (grid and block)
     threads_per_block = engine.params.gpu_grid_size
     blocks_per_grid = (data_length + threads_per_block - 1) // threads_per_block
 
+    # setting up result from GPU (data type and size)
     result_dtype = np.dtype([
         ('idx_tetB', np.int32),
         ('idx_tetA', np.int32),
@@ -367,6 +395,7 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
     ])
     result_gpu = cuda.device_array(data_length, dtype=result_dtype)
 
+    # call GPU kernel function
     CUDA_COMPUTE_CONTACTS.contact_points_computing_kernel[blocks_per_grid, threads_per_block](
         d_data['d_bodyA_idxs'], d_data['d_bodyB_idxs'], d_data['d_overlap_results'],
         d_data['d_B_values'], d_data['d_A_owners'], d_data['d_B_owners'],
@@ -378,8 +407,9 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
         engine.params.contact_optimization_tolerance,
         engine.params.envelope, 0.5, result_gpu)
 
-    cuda.synchronize() ## wait for GPU data 
-    result_to_cpu = result_gpu.copy_to_host() ## copy GPU data to CPU
+    # wait for GPU data and copy to CPU
+    cuda.synchronize() 
+    result_to_cpu = result_gpu.copy_to_host() 
 
     ## generate contact points
     for res in result_to_cpu:
@@ -394,12 +424,6 @@ def _contact_point_gpu(overlaps, engine, stats, debug_on):
         engine.contact_points.append(cp)
 
     if debug_on:
-        # if "model_space_update" not in stats:
-        #     stats["model_space_update"] = 0
-        # stats["model_space_update"] += model_space_update_timer.total
-        # if "contact_optimization" not in stats:
-        #     stats["contact_optimization"] = 0
-        # stats["contact_optimization"] += contact_optimization_timer.total
         if "contact_point_generation" not in stats:
             stats["contact_point_generation"] = 0
         stats["contact_point_generation"] += contact_point_generation_timer.total
@@ -430,8 +454,9 @@ def _contact_determination(overlaps, engine, stats, debug_on):
         if debug_on:
             contact_determination_timer.end()
             stats["contact_determination"] = contact_determination_timer.elapsed
-
         return stats
+
+    # contact points computing on CPU, if GPU is not available or the flag is Flase
     for key, results in overlaps.items():
         # TODO 2022-12-31 Kenny: The code currently computes a lot of redundant contacts due
         #  to BVH traversal may return a triangle as part of several pairs. We only need

diff --git a/python/rainbow/simulators/prox_soft_bodies/types.py b/python/rainbow/simulators/prox_soft_bodies/types.py
@@ -252,7 +252,7 @@ def __init__(self):
             0.1  # Any geometry within this distance generates a contact point.
         )
         self.resolution = 64  # The number of grid cells along each axis in the signed distance fields.
-        self.use_gpu = True # Boolean flag that indicates if we should use the GPU or not.
+        self.use_gpu = False # Boolean flag that indicates if we should use the GPU or not.
         self.gpu_grid_size = 256 # The number of threads per block to use on the GPU.
 
 

diff --git a/python/unit_tests/test_cuda_geometry_barycentric.py b/python/unit_tests/test_cuda_geometry_barycentric.py
@@ -11,6 +11,7 @@
 import rainbow.cuda.unit_tests.test_geometry_barycentric_kernel as TEST_BCK
 
 
+@unittest.skipIf(not cuda.is_available(), "CUDA not available")
 class TestGrid3Cuda(unittest.TestCase):
 
     def test_compute_barycentric_tetrahedron(self):

diff --git a/python/unit_tests/test_cuda_geometry_grid3.py b/python/unit_tests/test_cuda_geometry_grid3.py
@@ -11,6 +11,7 @@
 import rainbow.cuda.unit_tests.test_geometry_grid3_kernel as TEST_GRID3K
 
 
+@unittest.skipIf(not cuda.is_available(), "CUDA not available")
 def simpelfunc(coord):
     _, _, z = coord[0], coord[1], coord[2]
     return z

diff --git a/python/unit_tests/test_cuda_math_linalg.py b/python/unit_tests/test_cuda_math_linalg.py
@@ -10,6 +10,7 @@
 import rainbow.util.test_tools as TEST
 
 
+@unittest.skipIf(not cuda.is_available(), "CUDA not available")
 class TestLinAlgCuda(unittest.TestCase):
 
     def test_cramer_solver(self):

diff --git a/python/unit_tests/test_cuda_math_matrix.py b/python/unit_tests/test_cuda_math_matrix.py
@@ -10,6 +10,7 @@
 import rainbow.cuda.unit_tests.test_math_matrix_kernel as TEST_MK
 
 
+@unittest.skipIf(not cuda.is_available(), "CUDA not available")
 class TestMatrixCuda(unittest.TestCase):
 
     def test_mat33_T(self):

diff --git a/python/unit_tests/test_cuda_math_vec.py b/python/unit_tests/test_cuda_math_vec.py
@@ -10,6 +10,7 @@
 import rainbow.util.test_tools as TEST
 
 
+@unittest.skipIf(not cuda.is_available(), "CUDA not available")
 class TestVec3Cuda(unittest.TestCase):
 
     def test_vec3_add(self):