|
23 | 23 | launch, |
24 | 24 | ) |
25 | 25 | from cuda.core.experimental._memory import _SynchronousMemoryResource |
| 26 | +from cuda.core.experimental._utils.cuda_utils import CUDAError |
26 | 27 |
|
27 | 28 |
|
28 | 29 | def test_launch_config_init(init_cuda): |
@@ -59,6 +60,68 @@ def test_launch_config_shmem_size(): |
59 | 60 | assert config.shmem_size == 0 |
60 | 61 |
|
61 | 62 |
|
| 63 | +def test_launch_config_cluster_grid_conversion(init_cuda): |
| 64 | + """Test that LaunchConfig preserves original grid values and conversion happens in native config.""" |
| 65 | + try: |
| 66 | + # Test case 1: 1D - Issue #867 example |
| 67 | + config = LaunchConfig(grid=4, cluster=2, block=32) |
| 68 | + assert config.grid == (4, 1, 1), f"Expected (4, 1, 1), got {config.grid}" |
| 69 | + assert config.cluster == (2, 1, 1), f"Expected (2, 1, 1), got {config.cluster}" |
| 70 | + assert config.block == (32, 1, 1), f"Expected (32, 1, 1), got {config.block}" |
| 71 | + |
| 72 | + # Test case 2: 2D grid and cluster |
| 73 | + config = LaunchConfig(grid=(2, 3), cluster=(2, 2), block=32) |
| 74 | + assert config.grid == (2, 3, 1), f"Expected (2, 3, 1), got {config.grid}" |
| 75 | + assert config.cluster == (2, 2, 1), f"Expected (2, 2, 1), got {config.cluster}" |
| 76 | + |
| 77 | + # Test case 3: 3D full specification |
| 78 | + config = LaunchConfig(grid=(2, 2, 2), cluster=(3, 3, 3), block=(8, 8, 8)) |
| 79 | + assert config.grid == (2, 2, 2), f"Expected (2, 2, 2), got {config.grid}" |
| 80 | + assert config.cluster == (3, 3, 3), f"Expected (3, 3, 3), got {config.cluster}" |
| 81 | + |
| 82 | + # Test case 4: Identity case |
| 83 | + config = LaunchConfig(grid=1, cluster=1, block=32) |
| 84 | + assert config.grid == (1, 1, 1), f"Expected (1, 1, 1), got {config.grid}" |
| 85 | + |
| 86 | + # Test case 5: No cluster (should not convert grid) |
| 87 | + config = LaunchConfig(grid=4, block=32) |
| 88 | + assert config.grid == (4, 1, 1), f"Expected (4, 1, 1), got {config.grid}" |
| 89 | + assert config.cluster is None |
| 90 | + |
| 91 | + except CUDAError: |
| 92 | + pytest.skip("Driver or GPU not new enough for thread block clusters") |
| 93 | + |
| 94 | + |
| 95 | +def test_launch_config_native_conversion(init_cuda): |
| 96 | + """Test that _to_native_launch_config correctly converts grid from cluster units to block units.""" |
| 97 | + from cuda.core.experimental._launch_config import _to_native_launch_config |
| 98 | + |
| 99 | + try: |
| 100 | + # Test case 1: 1D - Issue #867 example |
| 101 | + config = LaunchConfig(grid=4, cluster=2, block=32) |
| 102 | + native_config = _to_native_launch_config(config) |
| 103 | + assert native_config.gridDimX == 8, f"Expected gridDimX=8, got {native_config.gridDimX}" |
| 104 | + assert native_config.gridDimY == 1, f"Expected gridDimY=1, got {native_config.gridDimY}" |
| 105 | + assert native_config.gridDimZ == 1, f"Expected gridDimZ=1, got {native_config.gridDimZ}" |
| 106 | + |
| 107 | + # Test case 2: 2D grid and cluster |
| 108 | + config = LaunchConfig(grid=(2, 3), cluster=(2, 2), block=32) |
| 109 | + native_config = _to_native_launch_config(config) |
| 110 | + assert native_config.gridDimX == 4, f"Expected gridDimX=4, got {native_config.gridDimX}" |
| 111 | + assert native_config.gridDimY == 6, f"Expected gridDimY=6, got {native_config.gridDimY}" |
| 112 | + assert native_config.gridDimZ == 1, f"Expected gridDimZ=1, got {native_config.gridDimZ}" |
| 113 | + |
| 114 | + # Test case 3: No cluster (should not convert grid) |
| 115 | + config = LaunchConfig(grid=4, block=32) |
| 116 | + native_config = _to_native_launch_config(config) |
| 117 | + assert native_config.gridDimX == 4, f"Expected gridDimX=4, got {native_config.gridDimX}" |
| 118 | + assert native_config.gridDimY == 1, f"Expected gridDimY=1, got {native_config.gridDimY}" |
| 119 | + assert native_config.gridDimZ == 1, f"Expected gridDimZ=1, got {native_config.gridDimZ}" |
| 120 | + |
| 121 | + except CUDAError: |
| 122 | + pytest.skip("Driver or GPU not new enough for thread block clusters") |
| 123 | + |
| 124 | + |
62 | 125 | def test_launch_invalid_values(init_cuda): |
63 | 126 | code = 'extern "C" __global__ void my_kernel() {}' |
64 | 127 | program = Program(code, "c++") |
|
0 commit comments