From 788ce62be11e8a79410e127bbc2a081ebf2e1db0 Mon Sep 17 00:00:00 2001 From: Umang Yadav <29876643+umangyadav@users.noreply.github.com> Date: Fri, 28 Oct 2022 11:34:17 -0400 Subject: [PATCH] Use minimum block size of 64 threads (#1427) Local Threads of multiples 32 were introduced in #1348 But LocalThreads that are not multiple of 64 are causing correctness issues. --- src/targets/gpu/compile_hip_code_object.cpp | 5 ++--- test/verify/test_reduce_op_large.cpp | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/targets/gpu/compile_hip_code_object.cpp b/src/targets/gpu/compile_hip_code_object.cpp index 415ce9ca426..5edde29c2aa 100644 --- a/src/targets/gpu/compile_hip_code_object.cpp +++ b/src/targets/gpu/compile_hip_code_object.cpp @@ -144,9 +144,8 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) std::size_t compute_block_size(std::size_t n, std::size_t max_block_size) { - const std::size_t min_block_size = 64; - const std::size_t base_block_size = 32; - auto block_size = (((n - 1) / base_block_size + 1)) * base_block_size; + const std::size_t min_block_size = 64; + auto block_size = (((n - 1) / min_block_size + 1)) * min_block_size; return std::min(std::max(min_block_size, block_size), max_block_size); } diff --git a/test/verify/test_reduce_op_large.cpp b/test/verify/test_reduce_op_large.cpp index c6fc0331918..a7f600dab40 100644 --- a/test/verify/test_reduce_op_large.cpp +++ b/test/verify/test_reduce_op_large.cpp @@ -51,7 +51,7 @@ template struct test_reduce_op_large; template struct test_reduce_op_large; -struct test_reduce_mean : verify_program +struct test_reduce_mean_1 : verify_program { migraphx::program create_program() const { @@ -63,3 +63,16 @@ struct test_reduce_mean : verify_program return p; }; }; + +struct test_reduce_mean_2 : verify_program +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape s{migraphx::shape::float_type, {336, 400}}; + auto x = mm->add_parameter("x", s); + mm->add_instruction(migraphx::op::reduce_mean{{1}}, x); + return p; + }; +};