|  | 
| 7 | 7 | 
 | 
| 8 | 8 | import numpy as np | 
| 9 | 9 | import pytest | 
|  | 10 | +from conftest import skipif_need_cuda_headers | 
| 10 | 11 | 
 | 
| 11 | 12 | from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch | 
| 12 | 13 | from cuda.core.experimental._memory import _DefaultPinnedMemorySource | 
| @@ -152,3 +153,48 @@ def test_launch_scalar_argument(python_type, cpp_type, init_value): | 
| 152 | 153 | 
 | 
| 153 | 154 |     # Check result | 
| 154 | 155 |     assert arr[0] == init_value, f"Expected {init_value}, got {arr[0]}" | 
|  | 156 | + | 
|  | 157 | + | 
|  | 158 | +@skipif_need_cuda_headers  # cg | 
|  | 159 | +def test_cooperative_launch(): | 
|  | 160 | +    dev = Device() | 
|  | 161 | +    dev.set_current() | 
|  | 162 | +    s = dev.create_stream(options={"nonblocking": True}) | 
|  | 163 | + | 
|  | 164 | +    # CUDA kernel templated on type T | 
|  | 165 | +    code = r""" | 
|  | 166 | +    #include <cooperative_groups.h> | 
|  | 167 | +
 | 
|  | 168 | +    extern "C" __global__ void test_grid_sync() { | 
|  | 169 | +        namespace cg = cooperative_groups; | 
|  | 170 | +        auto grid = cg::this_grid(); | 
|  | 171 | +        grid.sync(); | 
|  | 172 | +    } | 
|  | 173 | +    """ | 
|  | 174 | + | 
|  | 175 | +    # Compile and force instantiation for this type | 
|  | 176 | +    arch = "".join(f"{i}" for i in dev.compute_capability) | 
|  | 177 | +    include_path = str(pathlib.Path(os.environ["CUDA_PATH"]) / pathlib.Path("include")) | 
|  | 178 | +    pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}", include_path=include_path) | 
|  | 179 | +    prog = Program(code, code_type="c++", options=pro_opts) | 
|  | 180 | +    ker = prog.compile("cubin").get_kernel("test_grid_sync") | 
|  | 181 | + | 
|  | 182 | +    # # Launch without setting cooperative_launch | 
|  | 183 | +    # # Commented out as this seems to be a sticky error... | 
|  | 184 | +    # config = LaunchConfig(grid=1, block=1) | 
|  | 185 | +    # launch(s, config, ker) | 
|  | 186 | +    # from cuda.core.experimental._utils.cuda_utils import CUDAError | 
|  | 187 | +    # with pytest.raises(CUDAError) as e: | 
|  | 188 | +    #     s.sync() | 
|  | 189 | +    # assert "CUDA_ERROR_LAUNCH_FAILED" in str(e) | 
|  | 190 | + | 
|  | 191 | +    # Crazy grid sizes would not work | 
|  | 192 | +    block = 128 | 
|  | 193 | +    config = LaunchConfig(grid=dev.properties.max_grid_dim_x // block + 1, block=block, cooperative_launch=True) | 
|  | 194 | +    with pytest.raises(ValueError): | 
|  | 195 | +        launch(s, config, ker) | 
|  | 196 | + | 
|  | 197 | +    # This works just fine | 
|  | 198 | +    config = LaunchConfig(grid=1, block=1, cooperative_launch=True) | 
|  | 199 | +    launch(s, config, ker) | 
|  | 200 | +    s.sync() | 
0 commit comments