Skip to content

Commit 3b23183

Browse files
committed
[ValueTracking] If overlap in unsigned and signed range is contiguous, return it
We have to choose between unsigned and signed if there are two overlaps, aka the range wraps around, meaning [254,255] vs. signed [-128, 125], but [254,255] correspond to [-2 -1] which is in the range [-128, 125]. However, a range that would not work would be one where one has to pick between [0, 129] vs [-127, 127] because 129 is -2 signed. Update ValueTracking.cpp Revert "[ValueTracking] If overlap in unsigned and signed range is contiguous, return it" This reverts commit 22e997c489aad3173db78f6fee17212bd16be96d. ok
1 parent 34c85ed commit 3b23183

File tree

4 files changed

+148
-69
lines changed

4 files changed

+148
-69
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 140 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -9581,63 +9581,150 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
95819581
const APInt *C;
95829582
switch (BO.getOpcode()) {
95839583
case Instruction::Sub:
9584-
if (match(BO.getOperand(0), m_APInt(C))) {
9585-
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9586-
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9587-
9588-
// If the caller expects a signed compare, then try to use a signed range.
9589-
// Otherwise if both no-wraps are set, use the unsigned range because it
9590-
// is never larger than the signed range. Example:
9591-
// "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
9592-
// "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
9593-
if (PreferSignedRange && HasNSW && HasNUW)
9594-
HasNUW = false;
9595-
9596-
if (HasNUW) {
9597-
// 'sub nuw c, x' produces [0, C].
9598-
Upper = *C + 1;
9599-
} else if (HasNSW) {
9600-
if (C->isNegative()) {
9601-
// 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN].
9602-
Lower = APInt::getSignedMinValue(Width);
9603-
Upper = *C - APInt::getSignedMaxValue(Width);
9604-
} else {
9605-
// Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap
9606-
// 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX].
9607-
Lower = *C - APInt::getSignedMaxValue(Width);
9608-
Upper = APInt::getSignedMinValue(Width);
9609-
}
9584+
if (match(BO.getOperand(0), m_APInt(C))) {
9585+
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9586+
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9587+
9588+
// Build the two candidate ranges as [lo..hi]:
9589+
// unsignedRange: NUW ⇒ [0 .. C]
9590+
// signedRange: NSW ⇒ either [SINT_MIN .. -C - SINT_MIN] or [C - SINT_MAX .. SINT_MAX]
9591+
auto makeUnsignedRange = [&]() {
9592+
return std::pair<APInt,APInt>(APInt::getZero(Width), *C);
9593+
};
9594+
auto makeSignedRange = [&]() {
9595+
if (C->isNegative()) {
9596+
// sub nsw -C, x
9597+
APInt lo = APInt::getSignedMinValue(Width);
9598+
APInt hi = *C - APInt::getSignedMinValue(Width);
9599+
return std::pair<APInt,APInt>(lo, hi);
9600+
} else {
9601+
// sub nsw C, x
9602+
APInt lo = *C - APInt::getSignedMaxValue(Width);
9603+
APInt hi = APInt::getSignedMaxValue(Width);
9604+
return std::pair<APInt,APInt>(lo, hi);
9605+
}
9606+
};
9607+
9608+
// Split a (possibly wrapping) [lo..hi] into up to two non‑wrapping pieces:
9609+
auto splitPieces = [&](std::pair<APInt,APInt> rng,
9610+
SmallVectorImpl<std::pair<APInt,APInt>>& pieces) {
9611+
APInt lo = rng.first, hi = rng.second;
9612+
if (lo.ugt(hi)) {
9613+
// wraps around 2^n
9614+
pieces.emplace_back(lo, APInt::getMaxValue(Width)); // [lo..2^n-1]
9615+
pieces.emplace_back(APInt::getZero(Width), hi); // [0..hi]
9616+
} else {
9617+
pieces.emplace_back(lo, hi);
9618+
}
9619+
};
9620+
9621+
SmallVector<std::pair<APInt,APInt>,2> piecesU, piecesS;
9622+
if (HasNUW) splitPieces(makeUnsignedRange(), piecesU);
9623+
if (HasNSW) splitPieces(makeSignedRange(), piecesS);
9624+
9625+
// Intersect piecewise:
9626+
SmallVector<std::pair<APInt,APInt>,2> inters;
9627+
for (auto &u : piecesU) {
9628+
for (auto &s : piecesS) {
9629+
APInt loI = u.first.ugt(s.first) ? u.first : s.first;
9630+
APInt hiI = u.second.ult(s.second) ? u.second : s.second;
9631+
if (loI.ule(hiI))
9632+
inters.emplace_back(loI, hiI);
96109633
}
96119634
}
9612-
break;
9613-
case Instruction::Add:
9614-
if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9615-
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9616-
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9617-
9618-
// If the caller expects a signed compare, then try to use a signed
9619-
// range. Otherwise if both no-wraps are set, use the unsigned range
9620-
// because it is never larger than the signed range. Example: "add nuw
9621-
// nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9622-
if (PreferSignedRange && HasNSW && HasNUW)
9623-
HasNUW = false;
9624-
9625-
if (HasNUW) {
9626-
// 'add nuw x, C' produces [C, UINT_MAX].
9627-
Lower = *C;
9628-
} else if (HasNSW) {
9629-
if (C->isNegative()) {
9630-
// 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9631-
Lower = APInt::getSignedMinValue(Width);
9632-
Upper = APInt::getSignedMaxValue(Width) + *C + 1;
9633-
} else {
9634-
// 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9635-
Lower = APInt::getSignedMinValue(Width) + *C;
9636-
Upper = APInt::getSignedMaxValue(Width) + 1;
9637-
}
9635+
9636+
if (inters.size() == 1) {
9637+
// Exactly one contiguous overlap → use it
9638+
Lower = inters[0].first;
9639+
Upper = inters[0].second;
9640+
} else if (HasNUW && !PreferSignedRange) {
9641+
// Fallback to plain NUW result [0..C]
9642+
Lower = APInt::getZero(Width);
9643+
Upper = *C;
9644+
} else if (HasNSW) {
9645+
// Fallback to plain NSW result
9646+
auto S = makeSignedRange();
9647+
Lower = S.first;
9648+
Upper = S.second;
9649+
}
9650+
}
9651+
break;
9652+
case Instruction::Add:
9653+
if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9654+
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9655+
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9656+
9657+
// If the caller prefers signed ranges when both wraps are forbidden:
9658+
if (PreferSignedRange && HasNSW && HasNUW)
9659+
HasNUW = false;
9660+
9661+
// Build the two candidate ranges as [lo..hi] in the unsigned 0..2^n-1 world:
9662+
// NUW: 'add nuw x, C' ⇒ [ C .. UINT_MAX ]
9663+
auto makeUnsignedRange = [&]() {
9664+
APInt lo = *C;
9665+
APInt hi = APInt::getMaxValue(Width);
9666+
return std::pair<APInt,APInt>(lo, hi);
9667+
};
9668+
9669+
// NSW: 'add nsw x, C'
9670+
// if C<0: [ SINT_MIN .. SINT_MAX + C ]
9671+
// else: [ SINT_MIN + C .. SINT_MAX ]
9672+
auto makeSignedRange = [&]() {
9673+
if (C->isNegative()) {
9674+
APInt lo = APInt::getSignedMinValue(Width);
9675+
APInt hi = APInt::getSignedMaxValue(Width) + *C;
9676+
return std::pair<APInt,APInt>(lo, hi);
9677+
} else {
9678+
APInt lo = APInt::getSignedMinValue(Width) + *C;
9679+
APInt hi = APInt::getSignedMaxValue(Width);
9680+
return std::pair<APInt,APInt>(lo, hi);
9681+
}
9682+
};
9683+
9684+
// Split [lo..hi] into up to two non‑wrapping intervals:
9685+
auto splitPieces = [&](std::pair<APInt,APInt> rng,
9686+
SmallVectorImpl<std::pair<APInt,APInt>> &dst) {
9687+
APInt lo = rng.first, hi = rng.second;
9688+
if (lo.ugt(hi)) {
9689+
// wraps around 2^n
9690+
dst.emplace_back(lo, APInt::getMaxValue(Width));
9691+
dst.emplace_back(APInt::getZero(Width), hi);
9692+
} else {
9693+
dst.emplace_back(lo, hi);
9694+
}
9695+
};
9696+
9697+
SmallVector<std::pair<APInt,APInt>,2> piecesU, piecesS;
9698+
if (HasNUW) splitPieces(makeUnsignedRange(), piecesU);
9699+
if (HasNSW) splitPieces(makeSignedRange(), piecesS);
9700+
9701+
// Intersect piecewise
9702+
SmallVector<std::pair<APInt,APInt>,2> inters;
9703+
for (auto &u : piecesU) {
9704+
for (auto &s : piecesS) {
9705+
APInt loI = u.first.ugt(s.first) ? u.first : s.first;
9706+
APInt hiI = u.second.ult(s.second) ? u.second : s.second;
9707+
if (loI.ule(hiI))
9708+
inters.emplace_back(loI, hiI);
96389709
}
96399710
}
9640-
break;
9711+
9712+
if (inters.size() == 1) {
9713+
// Exactly one contiguous overlap ⇒ use it
9714+
Lower = inters[0].first;
9715+
Upper = inters[0].second + 1; // make Upper exclusive if you’re following [Lo..Hi)
9716+
} else if (HasNUW && !PreferSignedRange) {
9717+
// Fallback to plain NUW [C..UINT_MAX]
9718+
Lower = *C;
9719+
Upper = APInt::getMaxValue(Width) + 1;
9720+
} else if (HasNSW) {
9721+
// Fallback to plain NSW
9722+
auto S = makeSignedRange();
9723+
Lower = S.first;
9724+
Upper = S.second + 1;
9725+
}
9726+
}
9727+
break;
96419728

96429729
case Instruction::And:
96439730
if (match(BO.getOperand(1), m_APInt(C)))

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,8 +1412,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea
14121412
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
14131413
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
14141414
; CHECK: middle.block:
1415-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1416-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1415+
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
14171416
; CHECK: scalar.ph:
14181417
;
14191418
entry:
@@ -1502,8 +1501,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a
15021501
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
15031502
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
15041503
; CHECK: middle.block:
1505-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1506-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
1504+
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
15071505
; CHECK: scalar.ph:
15081506
;
15091507
entry:

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
6767
; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
6868
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6969
; SCALAR_TAIL_FOLDING: middle.block:
70-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
71-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
70+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
7271
; SCALAR_TAIL_FOLDING: scalar.ph:
7372
;
7473
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1
@@ -205,8 +204,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
205204
; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
206205
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
207206
; SCALAR_TAIL_FOLDING: middle.block:
208-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
209-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
207+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
210208
; SCALAR_TAIL_FOLDING: scalar.ph:
211209
;
212210
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2
@@ -335,8 +333,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
335333
; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
336334
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
337335
; SCALAR_TAIL_FOLDING: middle.block:
338-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
339-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
336+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
340337
; SCALAR_TAIL_FOLDING: scalar.ph:
341338
;
342339
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3
@@ -490,8 +487,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
490487
; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
491488
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
492489
; SCALAR_TAIL_FOLDING: middle.block:
493-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
494-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
490+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
495491
; SCALAR_TAIL_FOLDING: scalar.ph:
496492
;
497493
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4

llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
5252
; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5353
; SCALAR_EPILOGUE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5454
; SCALAR_EPILOGUE: middle.block:
55-
; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
56-
; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
55+
; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5756
; SCALAR_EPILOGUE: scalar.ph:
5857
;
5958
; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor2
@@ -237,8 +236,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
237236
; SCALAR_EPILOGUE-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
238237
; SCALAR_EPILOGUE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
239238
; SCALAR_EPILOGUE: middle.block:
240-
; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
241-
; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
239+
; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
242240
; SCALAR_EPILOGUE: scalar.ph:
243241
;
244242
; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor4

0 commit comments

Comments
 (0)