Skip to content

Commit a0b0e83

Browse files
committed
Do no use bit_cast to work around initialization issues with barrier
None of the tests is constexpr so we can just use `reinterpret_cast` Fixes #6255
1 parent 271da5d commit a0b0e83

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk.pass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ __device__ void test()
4343
alignas(16) __shared__ int smem_buffer[buf_len];
4444
#if _CCCL_CUDA_COMPILER(CLANG)
4545
__shared__ char barrier_data[sizeof(barrier)];
46-
barrier& bar = cuda::std::bit_cast<barrier>(barrier_data);
46+
barrier& bar = reinterpret_cast<barrier&>(barrier_data);
4747
#else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
4848
__shared__ barrier bar;
4949
#endif // !_CCCL_CUDA_COMPILER(CLANG)

libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_ptx_compiles.pass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ __global__ void test_bulk_tensor(CUtensorMap* map)
3030
__shared__ int smem;
3131
#if _CCCL_CUDA_COMPILER(CLANG)
3232
__shared__ char barrier_data[sizeof(barrier)];
33-
barrier& bar = cuda::std::bit_cast<barrier>(barrier_data);
33+
barrier& bar = reinterpret_cast<barrier&>(barrier_data);
3434
#else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
3535
__shared__ barrier bar;
3636
#endif // !_CCCL_CUDA_COMPILER(CLANG)
@@ -57,7 +57,7 @@ __global__ void test_bulk(void* gmem)
5757
{
5858
__shared__ int smem;
5959
__shared__ char barrier_data[sizeof(barrier)];
60-
barrier& bar = *reinterpret_cast<barrier*>(&barrier_data);
60+
barrier& bar = reinterpret_cast<barrier&>(barrier_data);
6161
if (threadIdx.x == 0)
6262
{
6363
init(&bar, blockDim.x);

libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor.pass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ __device__ void test(int base_i, int base_j)
7070
alignas(128) __shared__ int smem_buffer[buf_len];
7171
#if _CCCL_CUDA_COMPILER(CLANG)
7272
__shared__ char barrier_data[sizeof(barrier)];
73-
barrier& bar = cuda::std::bit_cast<barrier>(barrier_data);
73+
barrier& bar = reinterpret_cast<barrier&>(barrier_data);
7474
#else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
7575
__shared__ barrier bar;
7676
#endif // !_CCCL_CUDA_COMPILER(CLANG)

libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_generic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ test(cuda::std::array<uint32_t, num_dims> smem_coord,
187187
alignas(128) __shared__ int smem_buffer[smem_len];
188188
#if _CCCL_CUDA_COMPILER(CLANG)
189189
__shared__ char barrier_data[sizeof(barrier)];
190-
barrier& bar = cuda::std::bit_cast<barrier>(barrier_data);
190+
barrier& bar = reinterpret_cast<barrier&>(barrier_data);
191191
#else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
192192
__shared__ barrier bar;
193193
#endif // !_CCCL_CUDA_COMPILER(CLANG)

0 commit comments

Comments
 (0)