Skip to content

Commit 1c10821

Browse files
authored
[LoopVectorize] Fix divide-by-zero bug (#80836) (#81721)
When attempting to use the estimated trip count to refine the costs of the runtime memory checks we should also check for sane trip counts to prevent divide-by-zero faults on some platforms. Fixes #80836
1 parent ea2d938 commit 1c10821

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,16 +2010,18 @@ class GeneratedRTChecks {
20102010
BestTripCount = *EstimatedTC;
20112011
}
20122012

2013+
BestTripCount = std::max(BestTripCount, 1U);
20132014
InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
20142015

20152016
// Let's ensure the cost is always at least 1.
20162017
NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
20172018
(InstructionCost::CostType)1);
20182019

2019-
LLVM_DEBUG(dbgs()
2020-
<< "We expect runtime memory checks to be hoisted "
2021-
<< "out of the outer loop. Cost reduced from "
2022-
<< MemCheckCost << " to " << NewMemCheckCost << '\n');
2020+
if (BestTripCount > 1)
2021+
LLVM_DEBUG(dbgs()
2022+
<< "We expect runtime memory checks to be hoisted "
2023+
<< "out of the outer loop. Cost reduced from "
2024+
<< MemCheckCost << " to " << NewMemCheckCost << '\n');
20232025

20242026
MemCheckCost = NewMemCheckCost;
20252027
}

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,43 @@ outer.exit:
177177
}
178178

179179

180+
define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
181+
; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1'
182+
; CHECK: Calculating cost of runtime checks:
183+
; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced
184+
; CHECK: Total cost of runtime checks: 6
185+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
186+
entry:
187+
br label %outer.loop
188+
189+
outer.loop:
190+
%outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
191+
%mul.us = mul nsw i64 %outer.iv, %n
192+
br label %inner.loop
193+
194+
inner.loop:
195+
%inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
196+
%add.us = add nuw nsw i64 %inner.iv, %mul.us
197+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
198+
%0 = load i8, ptr %arrayidx.us, align 1
199+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
200+
%1 = load i8, ptr %arrayidx7.us, align 1
201+
%add9.us = add i8 %1, %0
202+
store i8 %add9.us, ptr %arrayidx7.us, align 1
203+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
204+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
205+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
206+
207+
inner.exit:
208+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
209+
%exitcond26.not = icmp eq i64 %outer.iv.next, %m
210+
br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1
211+
212+
outer.exit:
213+
ret void
214+
}
215+
216+
180217
define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
181218
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
182219
; CHECK: Calculating cost of runtime checks:
@@ -215,3 +252,4 @@ outer.exit:
215252

216253

217254
!0 = !{!"branch_weights", i32 10, i32 20}
255+
!1 = !{!"branch_weights", i32 1, i32 -1}

0 commit comments

Comments
 (0)