Skip to content

Commit 108fe4b

Browse files
YuriPlyakhinigcbot
authored andcommitted
Limit PromoteToPredicatedMemoryAccess
Limite PromoteToPredicatedMemoryAccess to: - simple load/store - load/stores using only legal types Early exit if found other instructions which read from memory or calls which are convergent, since it is not safe to remove branch in this case.
1 parent 6c202ab commit 108fe4b

File tree

8 files changed

+185
-26
lines changed

8 files changed

+185
-26
lines changed

IGC/Compiler/Legalizer/TypeLegalizer.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2021 Intel Corporation
3+
Copyright (C) 2017 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -110,6 +110,21 @@ namespace IGC {
110110

111111
bool runOnFunction(Function& F) override;
112112

113+
static bool isLegalInteger(unsigned width)
114+
{
115+
switch (width)
116+
{
117+
case 8:
118+
case 16:
119+
case 32:
120+
case 64:
121+
return true;
122+
default:
123+
break;
124+
}
125+
return false;
126+
}
127+
113128
private:
114129
void getAnalysisUsage(AnalysisUsage& AU) const override;
115130

@@ -215,21 +230,6 @@ namespace IGC {
215230
return Width;
216231
}
217232

218-
bool isLegalInteger(unsigned width) const
219-
{
220-
switch (width)
221-
{
222-
case 8:
223-
case 16:
224-
case 32:
225-
case 64:
226-
return true;
227-
default:
228-
break;
229-
}
230-
return false;
231-
}
232-
233233
/// getLargestLegalIntTypeSize() - Return the size of the largest legal
234234
/// integer type with size not bigger than Width bits.
235235
unsigned getLargestLegalIntTypeSize(unsigned Width) const {

IGC/Compiler/Optimizer/PromoteToPredicatedMemoryAccess.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ SPDX-License-Identifier: MIT
1818
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
1919
#include "Compiler/CodeGenPublic.h"
2020
#include "Compiler/IGCPassSupport.h"
21+
#include "Compiler/Legalizer/TypeLegalizer.h"
2122
#include "Probe/Assertion.h"
2223

2324
#define DEBUG_TYPE "PromoteToPredicatedMemoryAccess"
2425

2526
using namespace llvm;
2627
using namespace IGC;
28+
using namespace IGC::Legalizer;
2729

2830
static cl::opt<int> PredicatedMemOpIfConvertMaxInstrs(
2931
"igc-predmem-ifconv-max-instrs", cl::init(5), cl::Hidden,
@@ -108,6 +110,16 @@ void PromoteToPredicatedMemoryAccess::fixPhiNode(PHINode &Phi, BasicBlock &Prede
108110
Phi.eraseFromParent();
109111
}
110112

113+
namespace {
114+
bool IsTypeLegal(Type *Ty) {
115+
if (!Ty->isIntOrIntVectorTy())
116+
return true;
117+
118+
unsigned Width = Ty->getScalarSizeInBits();
119+
return TypeLegalizer::isLegalInteger(Width) || Width == 1;
120+
}
121+
} // namespace
122+
111123
bool PromoteToPredicatedMemoryAccess::trySingleBlockIfConv(Value &Cond, BasicBlock &BranchBB,
112124
BasicBlock &ConvBB, BasicBlock &SuccBB,
113125
bool Inverse) {
@@ -141,26 +153,34 @@ bool PromoteToPredicatedMemoryAccess::trySingleBlockIfConv(Value &Cond, BasicBlo
141153
auto *Inst = dyn_cast<Instruction>(Phi.getIncomingValue(Idx));
142154
if (!Inst)
143155
return false;
144-
if (!isa<LoadInst>(Inst))
156+
if (auto *LI = dyn_cast<LoadInst>(Inst); !LI || !LI->isSimple())
157+
return false;
158+
159+
if (!IsTypeLegal(Inst->getType()))
145160
return false;
146161

147162
Insts[Inst] = Phi.getIncomingValueForBlock(&BranchBB);
148163
}
149164

150-
// Collect all the void
165+
// Collect the rest of the instructions
151166
for (auto &I : ConvBB) {
152167
// Check if this load is handled in the previous loop
153168
if (isa<LoadInst>(&I) && Insts.count(&I))
154169
continue;
155170

156171
// Store
157-
if(isa<StoreInst>(&I)) {
172+
if(auto *SI = dyn_cast<StoreInst>(&I)) {
173+
if (!SI->isSimple() || !IsTypeLegal(SI->getValueOperand()->getType()))
174+
return false;
158175
Insts[&I] = nullptr;
159176
continue;
160177
}
161178

162-
if (I.mayHaveSideEffects())
179+
if (I.mayHaveSideEffects() || I.mayReadFromMemory())
163180
return false;
181+
182+
if (CallInst *CI = dyn_cast<CallInst>(&I); CI && CI->isConvergent())
183+
return false;
164184
}
165185

166186
LLVM_DEBUG(dbgs() << "Found if-convertible block:\n"

IGC/Compiler/Optimizer/PromoteToPredicatedMemoryAccess.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ namespace IGC
2727
// %17 = call <4 x float> @llvm.genx.GenISA.PredicatedLoad.v4f32.p1v4f32.v4f32(<4 x float> addrspace(1)* %bitc0, i64 16, i1 %pred, <4 x float> %mergeValue)
2828
// if found in specific pattern and then performs if-conversion.
2929
//
30-
// The pass looks for conditional branches that can be if-converted. The only "hammock" form
30+
// Constraints:
31+
// 1. The pass looks for conditional branches that can be if-converted. The only "hammock" form
3132
// of the control flow is supported, i.e. the true block has a single
3233
// predecessor and the false block has two predecessors. The true block must
3334
// have a single successor that is the false block.
@@ -37,9 +38,11 @@ namespace IGC
3738
// }
3839
// false block
3940
//
40-
// All the instructions in the true block must be safe to execute in the false
41+
// 2. All the instructions in the true block must be safe to execute in the false
4142
// block. The pass makes the instructions in the true block to be executed
4243
// conditionally and replaces branch to unconditional one.
44+
// 3. Load/Store instructions should be simple.
45+
// 4. Load/Store instructions should use legal data types.
4346
//
4447
// The pass expects that the simplifycfg pass will be run after it to clean up
4548
// the CFG.

IGC/Compiler/tests/PromoteToPredicatedMemoryAccess/load.ll

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,105 @@ exit:
9595
%res = phi <64 x i32> [ %load, %then ], [ %data, %if ], [ zeroinitializer, %entry ]
9696
ret <64 x i32> %res
9797
}
98+
99+
; Test exit if load is not simple
100+
; CHECK-LABEL: @test5(
101+
define <64 x i32> @test5(<64 x i32> addrspace(1)* %src, i1 %pred, <64 x i32> %data) {
102+
entry:
103+
; CHECK: br i1 %pred, label %st, label %exit
104+
br i1 %pred, label %st, label %exit
105+
106+
st:
107+
; CHECK: %load = load volatile <64 x i32>, <64 x i32> addrspace(1)* %src, align 4
108+
%load = load volatile <64 x i32>, <64 x i32> addrspace(1)* %src, align 4
109+
br label %exit
110+
111+
exit:
112+
; CHECK: %res = phi <64 x i32> [ %load, %st ], [ %data, %entry ]
113+
; CHECK: ret <64 x i32> %res
114+
%res = phi <64 x i32> [ %load, %st ], [ %data, %entry ]
115+
ret <64 x i32> %res
116+
}
117+
118+
; Test exit if integer size is not legal
119+
; CHECK-LABEL: @test6(
120+
define i42 @test6(i42 addrspace(1)* %src, i1 %pred, i42 %data) {
121+
entry:
122+
; CHECK: br i1 %pred, label %st, label %exit
123+
br i1 %pred, label %st, label %exit
124+
125+
st:
126+
; CHECK: %load = load i42, i42 addrspace(1)* %src, align 4
127+
%load = load i42, i42 addrspace(1)* %src, align 4
128+
br label %exit
129+
130+
exit:
131+
; CHECK: %res = phi i42 [ %load, %st ], [ %data, %entry ]
132+
; CHECK: ret i42 %res
133+
%res = phi i42 [ %load, %st ], [ %data, %entry ]
134+
ret i42 %res
135+
}
136+
137+
; Call instruction with side effects
138+
; CHECK-LABEL: @test7(
139+
define i32 @test7(i32 addrspace(1)* %src, i1 %pred, i32 %data) {
140+
entry:
141+
; CHECK: br i1 %pred, label %st, label %exit
142+
br i1 %pred, label %st, label %exit
143+
144+
st:
145+
; CHECK: %load = load i32, i32 addrspace(1)* %src, align 4
146+
%load = load i32, i32 addrspace(1)* %src, align 4
147+
call void @side_effect_function()
148+
br label %exit
149+
150+
exit:
151+
; CHECK: %res = phi i32 [ %load, %st ], [ %data, %entry ]
152+
; CHECK: ret i32 %res
153+
%res = phi i32 [ %load, %st ], [ %data, %entry ]
154+
ret i32 %res
155+
}
156+
157+
declare void @side_effect_function()
158+
159+
; Convergent call
160+
; CHECK-LABEL: @test8(
161+
define i32 @test8(i32 addrspace(1)* %src, i1 %pred, i32 %data) {
162+
entry:
163+
; CHECK: br i1 %pred, label %st, label %exit
164+
br i1 %pred, label %st, label %exit
165+
166+
st:
167+
; CHECK: %load = load i32, i32 addrspace(1)* %src, align 4
168+
%load = load i32, i32 addrspace(1)* %src, align 4
169+
call void @convergent_function()
170+
br label %exit
171+
172+
exit:
173+
; CHECK: %res = phi i32 [ %load, %st ], [ %data, %entry ]
174+
; CHECK: ret i32 %res
175+
%res = phi i32 [ %load, %st ], [ %data, %entry ]
176+
ret i32 %res
177+
}
178+
179+
declare void @convergent_function() nounwind readnone willreturn convergent
180+
181+
; Instruction may read from memory
182+
; CHECK-LABEL: @test9(
183+
define i32 @test9(i32 addrspace(1)* %src, i32 addrspace(1)* %src1, i1 %pred, i32 %data) {
184+
entry:
185+
; CHECK: br i1 %pred, label %st, label %exit
186+
br i1 %pred, label %st, label %exit
187+
188+
st:
189+
; CHECK: %load = load i32, i32 addrspace(1)* %src, align 4
190+
%load = load i32, i32 addrspace(1)* %src, align 4
191+
%load1 = load i32, i32 addrspace(1)* %src1, align 4
192+
br label %exit
193+
194+
exit:
195+
; CHECK: %res = phi i32 [ %load, %st ], [ %data, %entry ]
196+
; CHECK: ret i32 %res
197+
%res = phi i32 [ %load, %st ], [ %data, %entry ]
198+
ret i32 %res
199+
}

IGC/Compiler/tests/PromoteToPredicatedMemoryAccess/store.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,35 @@ st:
4444
exit:
4545
ret void
4646
}
47+
48+
; early exit, not simple
49+
; CHECK-LABEL: @test3(
50+
define void @test3(<64 x i32> addrspace(1)* %dst, i1 %pred, <64 x i32> %data) {
51+
entry:
52+
; CHECK: br i1 %pred, label %st, label %exit
53+
br i1 %pred, label %st, label %exit
54+
55+
st:
56+
; CHECK: store volatile <64 x i32> %data, <64 x i32> addrspace(1)* %dst, align 4
57+
store volatile <64 x i32> %data, <64 x i32> addrspace(1)* %dst, align 4
58+
br label %exit
59+
60+
exit:
61+
ret void
62+
}
63+
64+
; illegal int type, exit
65+
; CHECK-LABEL: @test4(
66+
define void @test4(<64 x i33> addrspace(1)* %dst, i1 %pred, <64 x i33> %data) {
67+
entry:
68+
; CHECK: br i1 %pred, label %st, label %exit
69+
br i1 %pred, label %st, label %exit
70+
71+
st:
72+
; CHECK: store <64 x i33> %data, <64 x i33> addrspace(1)* %dst, align 4
73+
store <64 x i33> %data, <64 x i33> addrspace(1)* %dst, align 4
74+
br label %exit
75+
76+
exit:
77+
ret void
78+
}

IGC/ocloc_tests/optimizations/PromoteToPredicatedMemoryAccess/LoadNonUniformBuffer.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ __kernel void test(__global const float* buffer1,
2424
__global const float* buffers[2] = {buffer1, buffer2};
2525
int gid = get_global_id(0);
2626
int bufferIndex = gid % 2;
27+
__global const float* buffer = buffers[bufferIndex];
2728

2829
float val = 0;
2930
if (gid < predicate)
30-
val = buffers[bufferIndex][gid];
31+
val = buffer[gid];
3132
outputBuffer[gid] = val;
3233
}

IGC/ocloc_tests/optimizations/PromoteToPredicatedMemoryAccess/StoreSubDW.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ SPDX-License-Identifier: MIT
1616

1717
__kernel void basic(__global const char* in, __global char* out, const int predicate) {
1818
int gid = get_global_id(0);
19+
char val = in[gid];
1920
if (gid < predicate)
20-
out[gid] = in[gid];
21+
out[gid] = val;
2122
}

IGC/ocloc_tests/optimizations/PromoteToPredicatedMemoryAccess/StoreUniform.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ SPDX-License-Identifier: MIT
1414

1515
// CHECK-ASM: (W&f{{[0-9\.]+}}) store.ugm.d32x1t.a64 (1|M0) [r{{[0-9:]+}}] r{{[0-9:]+}}
1616

17-
__kernel void uniform_store(__global const float* in, __global float* out, const int predicate) {
17+
__kernel void uniform_store(__global float* out, const int predicate) {
1818
if (predicate)
19-
out[0] = in[0];
19+
out[0] = 3;
2020
}

0 commit comments

Comments
 (0)