Skip to content

Commit 53a472d

Browse files
pkwasnie-inteligcbot
authored andcommitted
optimize i64 built-in variables to i32
OpenCL represents built-in variables like `get_global_id` with generic type `size_t`, which translates to i64. This change adds a new optimization that simplifies built-in calculation to i32 if built-in's use has an assumption hinting that value fits in i32 range.
1 parent 61ac4bc commit 53a472d

File tree

9 files changed

+538
-0
lines changed

9 files changed

+538
-0
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ SPDX-License-Identifier: MIT
125125
#include "Compiler/Optimizer/OpenCLPasses/MinimumValidAddressChecking/MinimumValidAddressChecking.hpp"
126126
#include "Compiler/Optimizer/OpenCLPasses/Spv2dBlockIOResolution/Spv2dBlockIOResolution.hpp"
127127
#include "Compiler/Optimizer/OpenCLPasses/SpvSubgroupMMAResolution/SpvSubgroupMMAResolution.hpp"
128+
#include "Compiler/Optimizer/OpenCLPasses/ProcessBICodeAssumption/ProcessBICodeAssumption.hpp"
128129

129130
#include "common/debug/Debug.hpp"
130131
#include "common/igc_regkeys.hpp"
@@ -361,6 +362,7 @@ static void CommonOCLBasedPasses(OpenCLProgramContext *pContext) {
361362

362363
mpm.add(new SpvSubgroupMMAResolution());
363364

365+
mpm.add(createProcessBICodeAssumptionPass());
364366
mpm.add(new PreBIImportAnalysis());
365367
mpm.add(createTimeStatsCounterPass(pContext, TIME_Unify_BuiltinImport, STATS_COUNTER_START));
366368
mpm.add(createBuiltInImportPass());

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,4 @@ void initializeInjectPrintfPass(llvm::PassRegistry &);
299299
void initializeWaveShuffleIndexSinkingPass(llvm::PassRegistry &);
300300
void initializeInstructionHoistingOptimizationPass(llvm::PassRegistry &);
301301
void initializeWaveBallotCSEPass(llvm::PassRegistry &);
302+
void initializeProcessBICodeAssumptionPass(llvm::PassRegistry &);

IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ add_subdirectory(OpenCLPrintf)
4444
add_subdirectory(PoisonFP64KernelsPass)
4545
add_subdirectory(PrepareInlineSamplerForBindless)
4646
add_subdirectory(PrivateMemory)
47+
add_subdirectory(ProcessBICodeAssumption)
4748
add_subdirectory(ProgramScopeConstants)
4849
add_subdirectory(RayTracing)
4950
add_subdirectory(ReplaceUnsupportedIntrinsics)
@@ -105,6 +106,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
105106
${IGC_BUILD__SRC__OpenCLPasses_PoisonFP64KernelsPass}
106107
${IGC_BUILD__SRC__OpenCLPasses_PrepareInlineSamplerForBindless}
107108
${IGC_BUILD__SRC__OpenCLPasses_PrivateMemory}
109+
${IGC_BUILD__SRC__OpenCLPasses_ProcessBICodeAssumption}
108110
${IGC_BUILD__SRC__OpenCLPasses_ProgramScopeConstants}
109111
${IGC_BUILD__SRC__OpenCLPasses_Raytracing}
110112
${IGC_BUILD__SRC__OpenCLPasses_ReplaceUnsupportedIntrinsics}
@@ -171,6 +173,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
171173
${IGC_BUILD__HDR__OpenCLPasses_OpenCLPrintf}
172174
${IGC_BUILD__HDR__OpenCLPasses_PrepareInlineSamplerForBindless}
173175
${IGC_BUILD__HDR__OpenCLPasses_PrivateMemory}
176+
${IGC_BUILD__HDR__OpenCLPasses_ProcessBICodeAssumption}
174177
${IGC_BUILD__HDR__OpenCLPasses_ProgramScopeConstants}
175178
${IGC_BUILD__HDR__OpenCLPasses_Raytracing}
176179
${IGC_BUILD__HDR__OpenCLPasses_ReplaceUnsupportedIntrinsics}
@@ -239,6 +242,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
239242
Compiler__OpenCLPasses_PoisonFP64KernelsPass
240243
Compiler__OpenCLPasses_PrepareInlineSamplerForBindless
241244
Compiler__OpenCLPasses_PrivateMemory
245+
Compiler__OpenCLPasses_ProcessBICodeAssumption
242246
Compiler__OpenCLPasses_ProgramScopeConstants
243247
Compiler__OpenCLPasses_Raytracing
244248
Compiler__OpenCLPasses_ReplaceUnsupportedIntrinsics
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#=========================== begin_copyright_notice ============================
2+
#
3+
# Copyright (C) 2025 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: MIT
6+
#
7+
#============================ end_copyright_notice =============================
8+
9+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
10+
11+
12+
set(IGC_BUILD__SRC__ProcessBICodeAssumption
13+
"${CMAKE_CURRENT_SOURCE_DIR}/ProcessBICodeAssumption.cpp"
14+
)
15+
set(IGC_BUILD__SRC__OpenCLPasses_ProcessBICodeAssumption ${IGC_BUILD__SRC__ProcessBICodeAssumption} PARENT_SCOPE)
16+
17+
set(IGC_BUILD__HDR__ProcessBICodeAssumption
18+
"${CMAKE_CURRENT_SOURCE_DIR}/ProcessBICodeAssumption.hpp"
19+
)
20+
set(IGC_BUILD__HDR__OpenCLPasses_ProcessBICodeAssumption ${IGC_BUILD__HDR__ProcessBICodeAssumption} PARENT_SCOPE)
21+
22+
23+
igc_sg_register(
24+
Compiler__OpenCLPasses_ProcessBICodeAssumption
25+
"ProcessBICodeAssumption"
26+
FILES
27+
${IGC_BUILD__SRC__ProcessBICodeAssumption}
28+
${IGC_BUILD__HDR__ProcessBICodeAssumption}
29+
)
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include "Compiler/Optimizer/OpenCLPasses/ProcessBICodeAssumption/ProcessBICodeAssumption.hpp"
10+
#include "Compiler/IGCPassSupport.h"
11+
12+
#include "common/LLVMWarningsPush.hpp"
13+
#include <llvm/IR/Function.h>
14+
#include <llvm/IR/InstVisitor.h>
15+
#include <llvm/IR/PatternMatch.h>
16+
#include "common/LLVMWarningsPop.hpp"
17+
18+
#include "Compiler/CodeGenPublic.h"
19+
20+
using namespace llvm;
21+
using namespace llvm::PatternMatch;
22+
using namespace IGC;
23+
24+
// OpenCL standard defines built-in variables like get_global_id as size_t, which translates to i64, but in reality
25+
// many workloads use values that fit in i32. This pass checks if there are assumptions on built-in variables and adds
26+
// trunc/zext intructions to reflect that the upper 32 bits are not used. This helps instcombine to optimize code after
27+
// BIImport.
28+
class ProcessBICodeAssumption : public llvm::FunctionPass, public llvm::InstVisitor<ProcessBICodeAssumption> {
29+
public:
30+
static char ID;
31+
ProcessBICodeAssumption();
32+
33+
virtual llvm::StringRef getPassName() const override { return "ProcessBICodeAssumption"; }
34+
35+
virtual bool runOnFunction(Function &F) override;
36+
void visitCallInst(CallInst &I);
37+
38+
private:
39+
bool matchCmp(ICmpInst::Predicate Pred, ConstantInt *CI);
40+
bool matchBuiltin(Instruction *I);
41+
void matchVectorPattern(Instruction *I);
42+
43+
SmallPtrSet<Instruction *, 8> ToTruncate;
44+
};
45+
46+
ProcessBICodeAssumption::ProcessBICodeAssumption() : FunctionPass(ID) {
47+
initializeProcessBICodeAssumptionPass(*PassRegistry::getPassRegistry());
48+
}
49+
50+
bool ProcessBICodeAssumption::runOnFunction(Function &F) {
51+
52+
ToTruncate.clear();
53+
visit(F);
54+
55+
if (ToTruncate.empty())
56+
return false;
57+
58+
// Insert trunc/zext for each matched builtin call.
59+
for (auto *I : ToTruncate) {
60+
61+
IRBuilder<> Builder(I);
62+
Builder.SetInsertPoint(I->getNextNode());
63+
64+
auto Trunc = Builder.CreateTrunc(I, Builder.getInt32Ty());
65+
auto Zext = Builder.CreateZExt(Trunc, Builder.getInt64Ty());
66+
67+
for (auto It = I->use_begin(), E = I->use_end(); It != E; ) {
68+
auto Use = It++;
69+
if (Use->getUser() != Trunc)
70+
Use->set(Zext);
71+
}
72+
}
73+
74+
return true;
75+
}
76+
77+
void ProcessBICodeAssumption::visitCallInst(CallInst &CI) {
78+
79+
Instruction *I = nullptr;
80+
ICmpInst::Predicate Pred;
81+
ConstantInt *Const = nullptr;
82+
83+
// Look for assume:
84+
// %9 = icmp ult i64 %8, 2147483648
85+
// call void @llvm.assume(i1 %9)
86+
if (!match(&CI, m_Intrinsic<Intrinsic::assume>(m_ICmp(Pred, m_Instruction(I), m_ConstantInt(Const)))))
87+
return;
88+
89+
if (!matchCmp(Pred, Const))
90+
return;
91+
92+
if (matchBuiltin(I)) {
93+
ToTruncate.insert(I);
94+
return;
95+
}
96+
97+
matchVectorPattern(I);
98+
}
99+
100+
// Look for pattern with insertelement/extractelement:
101+
// %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
102+
// %2 = insertelement <3 x i64> undef, i64 %1, i32 0
103+
// %3 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 1)
104+
// %4 = insertelement <3 x i64> %2, i64 %3, i32 1
105+
// %5 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 2)
106+
// %6 = insertelement <3 x i64> %4, i64 %5, i32 2
107+
// %7 = extractelement <3 x i64> %6, i32 0
108+
void ProcessBICodeAssumption::matchVectorPattern(Instruction *I) {
109+
110+
Instruction *EE = I;
111+
112+
// Optional select
113+
// %7 = extractelement <3 x i64> %6, i32 0
114+
// %8 = select i1 true, i64 %7, i64 0
115+
if (match(EE, m_Select(m_One(), m_Instruction(I), m_Value())))
116+
EE = I;
117+
118+
Value *IE = nullptr, *NextIE = nullptr;
119+
ConstantInt *EConst = nullptr, *IConst = nullptr;
120+
121+
if (!match(EE, m_ExtractElt(m_Value(IE), m_ConstantInt(EConst))))
122+
return;
123+
124+
while (match(IE, m_InsertElt(m_Value(NextIE), m_Instruction(I), m_ConstantInt(IConst)))) {
125+
if (IConst->getZExtValue() == EConst->getZExtValue()) {
126+
if (matchBuiltin(I))
127+
ToTruncate.insert(I);
128+
return;
129+
}
130+
IE = NextIE;
131+
}
132+
}
133+
134+
bool ProcessBICodeAssumption::matchCmp(ICmpInst::Predicate Pred, ConstantInt *CI) {
135+
switch (Pred) {
136+
case ICmpInst::ICMP_ULE:
137+
return CI->getZExtValue() <= llvm::APInt::getMaxValue(32).getZExtValue();
138+
case ICmpInst::ICMP_ULT:
139+
return CI->getZExtValue() <= llvm::APInt::getMaxValue(32).getZExtValue() + 1;
140+
case ICmpInst::ICMP_SLE:
141+
return CI->getZExtValue() <= llvm::APInt::getSignedMaxValue(32).getZExtValue();
142+
case ICmpInst::ICMP_SLT:
143+
return CI->getZExtValue() <= llvm::APInt::getSignedMaxValue(32).getZExtValue() + 1;
144+
default:
145+
return false;
146+
}
147+
}
148+
149+
bool ProcessBICodeAssumption::matchBuiltin(Instruction *I) {
150+
if (auto CI = dyn_cast<CallInst>(I)) {
151+
return CI->getCalledFunction()->getName() == "_Z33__spirv_BuiltInGlobalInvocationIdi" ||
152+
CI->getCalledFunction()->getName() == "_Z29__spirv_BuiltInGlobalLinearIdv";
153+
}
154+
return false;
155+
}
156+
157+
// Register pass to igc-opt
158+
#define PASS_FLAG "igc-process-bi-code-assumption"
159+
#define PASS_DESCRIPTION "Processes code assumptions assigned to builtin variables"
160+
#define PASS_CFG_ONLY false
161+
#define PASS_ANALYSIS false
162+
IGC_INITIALIZE_PASS_BEGIN(ProcessBICodeAssumption, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
163+
IGC_INITIALIZE_PASS_END(ProcessBICodeAssumption, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
164+
165+
char ProcessBICodeAssumption::ID = 0;
166+
167+
FunctionPass *IGC::createProcessBICodeAssumptionPass() { return new ProcessBICodeAssumption(); }
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#pragma once
10+
11+
#include "common/LLVMWarningsPush.hpp"
12+
#include <llvm/Pass.h>
13+
#include "common/LLVMWarningsPop.hpp"
14+
15+
namespace IGC {
16+
llvm::FunctionPass *createProcessBICodeAssumptionPass();
17+
} // namespace IGC
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --igc-process-bi-code-assumption -S < %s 2>&1 | FileCheck %s
10+
11+
; Test different llvm.assume conditions.
12+
13+
; CHECK-LABEL: @ule_4294967295
14+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
15+
; CHECK-NEXT: %2 = trunc i64 %1 to i32
16+
; CHECK-NEXT: %3 = zext i32 %2 to i64
17+
; CHECK-NEXT: %4 = icmp ule i64 %3, 4294967295
18+
; CHECK-NEXT: call void @llvm.assume(i1 %4)
19+
; CHECK-NEXT: ret void
20+
define void @ule_4294967295() {
21+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
22+
%2 = icmp ule i64 %1, 4294967295
23+
call void @llvm.assume(i1 %2)
24+
ret void
25+
}
26+
27+
; CHECK-LABEL: @ule_4294967296
28+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
29+
; CHECK-NEXT: %2 = icmp ule i64 %1, 4294967296
30+
; CHECK-NEXT: call void @llvm.assume(i1 %2)
31+
; CHECK-NEXT: ret void
32+
define void @ule_4294967296() {
33+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
34+
%2 = icmp ule i64 %1, 4294967296
35+
call void @llvm.assume(i1 %2)
36+
ret void
37+
}
38+
39+
; CHECK-LABEL: @ult_4294967296
40+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
41+
; CHECK-NEXT: %2 = trunc i64 %1 to i32
42+
; CHECK-NEXT: %3 = zext i32 %2 to i64
43+
; CHECK-NEXT: %4 = icmp ult i64 %3, 4294967296
44+
; CHECK-NEXT: call void @llvm.assume(i1 %4)
45+
; CHECK-NEXT: ret void
46+
define void @ult_4294967296() {
47+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
48+
%2 = icmp ult i64 %1, 4294967296
49+
call void @llvm.assume(i1 %2)
50+
ret void
51+
}
52+
53+
; CHECK-LABEL: @ult_4294967297
54+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
55+
; CHECK-NEXT: %2 = icmp ult i64 %1, 4294967297
56+
; CHECK-NEXT: call void @llvm.assume(i1 %2)
57+
; CHECK-NEXT: ret void
58+
define void @ult_4294967297() {
59+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
60+
%2 = icmp ult i64 %1, 4294967297
61+
call void @llvm.assume(i1 %2)
62+
ret void
63+
}
64+
65+
; CHECK-LABEL: @sle_2147483647
66+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
67+
; CHECK-NEXT: %2 = trunc i64 %1 to i32
68+
; CHECK-NEXT: %3 = zext i32 %2 to i64
69+
; CHECK-NEXT: %4 = icmp sle i64 %3, 2147483647
70+
; CHECK-NEXT: call void @llvm.assume(i1 %4)
71+
; CHECK-NEXT: ret void
72+
define void @sle_2147483647() {
73+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
74+
%2 = icmp sle i64 %1, 2147483647
75+
call void @llvm.assume(i1 %2)
76+
ret void
77+
}
78+
79+
; CHECK-LABEL: @sle_2147483648
80+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
81+
; CHECK-NEXT: %2 = icmp sle i64 %1, 2147483648
82+
; CHECK-NEXT: call void @llvm.assume(i1 %2)
83+
; CHECK-NEXT: ret void
84+
define void @sle_2147483648() {
85+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
86+
%2 = icmp sle i64 %1, 2147483648
87+
call void @llvm.assume(i1 %2)
88+
ret void
89+
}
90+
91+
; CHECK-LABEL: @slt_2147483648
92+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
93+
; CHECK-NEXT: %2 = trunc i64 %1 to i32
94+
; CHECK-NEXT: %3 = zext i32 %2 to i64
95+
; CHECK-NEXT: %4 = icmp slt i64 %3, 2147483648
96+
; CHECK-NEXT: call void @llvm.assume(i1 %4)
97+
; CHECK-NEXT: ret void
98+
define void @slt_2147483648() {
99+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
100+
%2 = icmp slt i64 %1, 2147483648
101+
call void @llvm.assume(i1 %2)
102+
ret void
103+
}
104+
105+
; CHECK-LABEL: @slt_2147483649
106+
; CHECK-NEXT: %1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
107+
; CHECK-NEXT: %2 = icmp slt i64 %1, 2147483649
108+
; CHECK-NEXT: call void @llvm.assume(i1 %2)
109+
; CHECK-NEXT: ret void
110+
define void @slt_2147483649() {
111+
%1 = call spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32 0)
112+
%2 = icmp slt i64 %1, 2147483649
113+
call void @llvm.assume(i1 %2)
114+
ret void
115+
}
116+
117+
declare void @llvm.assume(i1)
118+
declare spir_func i64 @_Z33__spirv_BuiltInGlobalInvocationIdi(i32)

0 commit comments

Comments
 (0)