Skip to content

Commit 8cf37ba

Browse files
authored
[SYCLomatic] Add a test case to migrate the thread_group structure and 3 relevant APIs. (#452)
Signed-off-by: Chen, Sheng S <[email protected]>
1 parent 61105e0 commit 8cf37ba

File tree

3 files changed

+91
-2
lines changed

3 files changed

+91
-2
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// ====------ cooperative_groups_thread_group.cu --------- *- CUDA -* ----===////
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//
8+
// ===----------------------------------------------------------------------===//
9+
10+
#include <cooperative_groups.h>
11+
#include <cstdio>
12+
#include <stdlib.h>
13+
namespace cg = cooperative_groups;
14+
15+
__device__ int testThreadGroup(cg::thread_group g, int *input, int val) {
16+
17+
int thread_index = g.thread_rank();
18+
for (int i = g.size() / 2; i > 0; i /= 2) {
19+
input[thread_index] = val;
20+
g.sync();
21+
22+
if (thread_index < i) {
23+
val += input[thread_index];
24+
}
25+
g.sync();
26+
}
27+
if (thread_index == 0) {
28+
return val;
29+
} else {
30+
return -1;
31+
}
32+
}
33+
34+
__global__ void kernelFunc(unsigned int *ret) {
35+
*ret = 0;
36+
auto block = cg::this_thread_block();
37+
int value = 2;
38+
__shared__ int workspace[1024];
39+
block.thread_index();
40+
auto threadBlockGroup = cg::this_thread_block();
41+
int ret1, ret2, ret3;
42+
ret1 = testThreadGroup(threadBlockGroup, workspace, value);
43+
if (threadBlockGroup.thread_rank() == 0) {
44+
printf("value1 is %d\n", ret1);
45+
}
46+
47+
cg::thread_block_tile<16> tilePartition16 =
48+
cg::tiled_partition<16>(threadBlockGroup);
49+
ret2 = testThreadGroup(tilePartition16, workspace, value);
50+
if (threadBlockGroup.thread_rank() == 0) {
51+
printf("value2 is %d\n", ret2);
52+
}
53+
54+
cg::thread_block_tile<32> tilePartition32 =
55+
cg::tiled_partition<32>(threadBlockGroup);
56+
ret3 = testThreadGroup(tilePartition32, workspace, value);
57+
if (threadBlockGroup.thread_rank() == 0) {
58+
printf("value3 is %d\n", ret3);
59+
}
60+
if (threadBlockGroup.thread_rank() == 0) {
61+
if (ret1 == 512 && ret2 == 32 && ret3 == 64) {
62+
*ret = 1;
63+
} else {
64+
*ret = -1;
65+
}
66+
}
67+
}
68+
69+
int main() {
70+
bool checker4 = false;
71+
unsigned int *ret_result;
72+
unsigned int host[1];
73+
cudaMalloc(&ret_result, sizeof(unsigned int));
74+
kernelFunc<<<1, 256>>>(ret_result);
75+
cudaMemcpy(host, ret_result, sizeof(unsigned int), cudaMemcpyDeviceToHost);
76+
cudaFree(ret_result);
77+
printf("host valu is %d \n ", host[0]);
78+
if (host[0] == 1) {
79+
printf(" thread_group migration is run success \n");
80+
checker4 = true;
81+
} else {
82+
printf("thread_group migration is run failed\n ");
83+
}
84+
85+
if (checker4)
86+
return 0;
87+
return -1;
88+
}

features/features.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@
268268
<test testName="cublas_v1_runable" configFile="config/TEMPLATE_cuBlas_11.xml" splitGroup="double"/>
269269
<test testName="complex" configFile="config/TEMPLATE_complex.xml" splitGroup="double"/>
270270
<test testName="cooperative_groups" configFile="config/TEMPLATE_cooperative_groups.xml" />
271+
<test testName="cooperative_groups_thread_group" configFile="config/TEMPLATE_cooperative_groups.xml" />
271272
<test testName="ccl-test" configFile="config/TEMPLATE_ccl_api.xml" />
272273
<test testName="ccl-test2" configFile="config/TEMPLATE_ccl_api.xml" />
273274
<test testName="cooperative_groups_reduce" configFile="config/TEMPLATE_cooperative_groups_reduce.xml" />

features/test_feature.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
'thrust_tabulate', 'thrust_for_each_n', 'device_info', 'defaultStream', 'cudnn-rnn', 'feature_profiling',
5151
'thrust_raw_reference_cast', 'thrust_partition_copy', 'thrust_stable_partition_copy',
5252
'thrust_stable_partition', 'thrust_remove', 'cub_device_segmented_sort_pairs', 'thrust_find_if_not',
53-
'thrust_find_if', 'thrust_mismatch', 'thrust_replace_copy', 'thrust_reverse', 'cooperative_groups_reduce',
53+
'thrust_find_if', 'thrust_mismatch', 'thrust_replace_copy', 'thrust_reverse', 'cooperative_groups_reduce', 'cooperative_groups_thread_group',
5454
'remove_unnecessary_wait', 'thrust_equal_range', 'thrust_transform_inclusive_scan', 'thrust_uninitialized_copy_n', 'thrust_uninitialized_copy',
5555
'thrust_random_type', 'thrust_scatter_if', 'thrust_all_of', 'thrust_none_of', 'thrust_is_partitioned',
5656
'thrust_is_sorted_until', 'thrust_set_intersection', 'thrust_set_union_by_key', 'thrust_set_union',
@@ -76,7 +76,7 @@ def migrate_test():
7676
src.append(os.path.abspath(os.path.join(dirpath, filename)))
7777

7878
nd_range_bar_exper = ['grid_sync']
79-
logical_group_exper = ['cooperative_groups']
79+
logical_group_exper = ['cooperative_groups', 'cooperative_groups_thread_group']
8080
experimental_bfloat16_tests = ['math-experimental-bf16', 'math-experimental-bf162']
8181

8282
math_extension_tests = ['math-ext-double', 'math-ext-float', 'math-ext-half', 'math-ext-half-after11', 'math-ext-half2', 'math-ext-half2-after11', 'math-ext-simd']

0 commit comments

Comments
 (0)