Skip to content

Commit c224859

Browse files
jgu222pszymich
authored andcommitted
Fixed block2d load
For 32b_1r8x2c under simd16, its data size is 64 bytes in total. But this function returns int2, which takes 128 bytes. For this special function, don't change it to 1r16x1c. The result is valid only for lower 8 lanes. (cherry picked from commit 8e131ad)
1 parent 35f0c60 commit c224859

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/LSCFuncs/LSCFuncsResolution.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -951,10 +951,9 @@ Instruction* LSCFuncsResolution::CreateSubGroup2DBlockOperation(llvm::CallInst&
951951
IGC_ASSERT_MESSAGE(funcName.consume_front("v2"), "Unrecognized v element in __builtin_IB_subgroup_block_read/write.");
952952
}
953953

954-
// Special handling of the following when GRF size = 64 bytes
954+
// (1) Special handling of the following when GRF size = 64 bytes
955955
// intel_sub_group_2d_block_read_8b_1r32x2c (u8_m1k32v2)
956956
// intel_sub_group_2d_block_read_16b_1r16x2c (u16_m1k16v2)
957-
// intel_sub_group_2d_block_read_32b_1r8x2c (u32_m1k8v2)
958957
// They are defined to return 64 bytes, but the HW block read
959958
// returns 128 bytes (two GRFs, as a block size must be multiple
960959
// of GRF, unused part is zero-padded. Note that those APIs have
@@ -964,11 +963,17 @@ Instruction* LSCFuncsResolution::CreateSubGroup2DBlockOperation(llvm::CallInst&
964963
//
965964
// For those cases, instead of 2 blocks, using equivalent single-block
966965
// read to avoid those mov instructions by just doubling their width:
967-
// u8_m1k32v2 --> u8_m1k64v1
968-
// u16_m1k16v2 --> u16_m1k32v1
969-
// u32_m1k8v2 --> u32_m1k16v1
966+
// 8b_m1k32v2 --> 8b_m1k64v1
967+
// 16b_m1k16v2 --> 16b_m1k32v1
968+
//
969+
// (2) The following is an exception:
970+
// int2 = intel_sub_group_2d_block_read_32b_1r8x2c (u32_m1k8v2)
971+
// it is indeed defined as return 128 bytes. As this 2d read has
972+
// 64 bytes, only lower 8 lanes have data and upper 8 lanes got zero.
973+
// No change to this read!
970974
if (m_pCtx->platform.getGRFSize() == 64 && isRead && !isPrefetch &&
971-
numBlocksV == 2 && tileHeight == 1 && (elemSize * tileWidth) == 256)
975+
numBlocksV == 2 && tileHeight == 1 && (elemSize * tileWidth) == 256 &&
976+
elemSize != 32 /* exception shown above in (2) */)
972977
{
973978
numBlocksV = 1;
974979
tileWidth *= 2;

IGC/ocloc_tests/Builtins/cl_intel_subgroup_2d_block_io/PVC/block_reads.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ SPDX-License-Identifier: MIT
270270
// RUN: -DINPUT_TYPE=uint -DOUTPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_read_32b_1r8x2c -DDST_ARRAY_EL_TYPE=uint2 -DDST_ARRAY_EL_NUM=1" \
271271
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-1R-8X2C
272272

273-
// CHECK-VISAASM-32B-1R-8X2C: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.16x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
273+
// CHECK-VISAASM-32B-1R-8X2C: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.2x8x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
274274

275275
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
276276
// RUN: -DINPUT_TYPE=uint -DOUTPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_read_32b_2r8x2c -DDST_ARRAY_EL_TYPE=uint2 -DDST_ARRAY_EL_NUM=1" \

IGC/ocloc_tests/SPIRV-extenstions/SPV_INTEL_2d_block_io/PVC/block_loads.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ entry:
9696
call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 4, i32 8, i32 16, i32 1, i8 addrspace(1)* %base_address, i32 512, i32 46, i32 512, <2 x i32> %0, i8* %dst_pointer)
9797
; CHECK: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.8x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
9898
call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 4, i32 8, i32 32, i32 1, i8 addrspace(1)* %base_address, i32 512, i32 46, i32 512, <2 x i32> %0, i8* %dst_pointer)
99-
; CHECK: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.16x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
99+
; CHECK: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.2x8x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
100100
call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 4, i32 8, i32 1, i32 2, i8 addrspace(1)* %base_address, i32 512, i32 46, i32 512, <2 x i32> %0, i8* %dst_pointer)
101101
; CHECK: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d32.2x8x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
102102
call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 4, i32 8, i32 2, i32 2, i8 addrspace(1)* %base_address, i32 512, i32 46, i32 512, <2 x i32> %0, i8* %dst_pointer)

0 commit comments

Comments
 (0)