llvm · AZero13 · Jun 15, 2025
@@ -9585,56 +9585,147 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
       bool HasNSW = IIQ.hasNoSignedWrap(&BO);
       bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
 
-      // If the caller expects a signed compare, then try to use a signed range.
-      // Otherwise if both no-wraps are set, use the unsigned range because it
-      // is never larger than the signed range. Example:
-      // "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
-      // "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
-      if (PreferSignedRange && HasNSW && HasNUW)
-        HasNUW = false;
-
-      if (HasNUW) {
-        // 'sub nuw c, x' produces [0, C].
-        Upper = *C + 1;
-      } else if (HasNSW) {
+      // Build the two candidate ranges as [lo..hi]:
+      //   unsignedRange:  NUW ⇒ [0 .. C]
+      //   signedRange:    NSW ⇒ either [SINT_MIN .. -C - SINT_MIN] or [C -
+      //   SINT_MAX .. SINT_MAX]
+      auto makeUnsignedRange = [&]() {
+        return std::pair<APInt, APInt>(APInt::getZero(Width), *C);
+      };
+      auto makeSignedRange = [&]() {
         if (C->isNegative()) {
-          // 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN].
-          Lower = APInt::getSignedMinValue(Width);
-          Upper = *C - APInt::getSignedMaxValue(Width);
+          // sub nsw -C, x
+          APInt lo = APInt::getSignedMinValue(Width);
+          APInt hi = *C - APInt::getSignedMinValue(Width);
+          return std::pair<APInt, APInt>(lo, hi);
+        } else {
+          // sub nsw C, x
+          APInt lo = *C - APInt::getSignedMaxValue(Width);
+          APInt hi = APInt::getSignedMaxValue(Width);
+          return std::pair<APInt, APInt>(lo, hi);
+        }
+      };
+
+      // Split a (possibly wrapping) [lo..hi] into up to two non‑wrapping
+      // pieces:
+      auto splitPieces = [&](std::pair<APInt, APInt> rng,
+                             SmallVectorImpl<std::pair<APInt, APInt>> &pieces) {
+        APInt lo = rng.first, hi = rng.second;
+        if (lo.ugt(hi)) {
+          // wraps around 2^n
+          pieces.emplace_back(lo, APInt::getMaxValue(Width)); // [lo..2^n-1]
+          pieces.emplace_back(APInt::getZero(Width), hi);     // [0..hi]
         } else {
-          // Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap
-          // 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX].
-          Lower = *C - APInt::getSignedMaxValue(Width);
-          Upper = APInt::getSignedMinValue(Width);
+          pieces.emplace_back(lo, hi);
+        }
+      };
+
+      SmallVector<std::pair<APInt, APInt>, 2> piecesU, piecesS;
+      if (HasNUW)
+        splitPieces(makeUnsignedRange(), piecesU);
+      if (HasNSW)
+        splitPieces(makeSignedRange(), piecesS);
+
+      // Intersect piecewise:
+      SmallVector<std::pair<APInt, APInt>, 2> inters;
+      for (auto &u : piecesU) {
+        for (auto &s : piecesS) {
+          APInt loI = u.first.ugt(s.first) ? u.first : s.first;
+          APInt hiI = u.second.ult(s.second) ? u.second : s.second;
+          if (loI.ule(hiI))
+            inters.emplace_back(loI, hiI);
         }
       }
+
+      if (inters.size() == 1) {
+        // Exactly one contiguous overlap → use it
+        Lower = inters[0].first;
+        Upper = inters[0].second;
+      } else if (HasNUW && !PreferSignedRange) {
+        // Fallback to plain NUW result [0..C]
+        Lower = APInt::getZero(Width);
+        Upper = *C;
+      } else if (HasNSW) {
+        // Fallback to plain NSW result
+        auto S = makeSignedRange();
+        Lower = S.first;
+        Upper = S.second;
+      }
     }
     break;
   case Instruction::Add:
     if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
       bool HasNSW = IIQ.hasNoSignedWrap(&BO);
       bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
 
-      // If the caller expects a signed compare, then try to use a signed
-      // range. Otherwise if both no-wraps are set, use the unsigned range
-      // because it is never larger than the signed range. Example: "add nuw
-      // nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
-      if (PreferSignedRange && HasNSW && HasNUW)
-        HasNUW = false;
+      // Build the two candidate ranges as [lo..hi] in the unsigned 0..2^n-1
+      // world:
+      //  NUW: 'add nuw x, C' ⇒ [ C .. UINT_MAX ]
+      auto makeUnsignedRange = [&]() {
+        APInt lo = *C;
+        APInt hi = APInt::getMaxValue(Width);
+        return std::pair<APInt, APInt>(lo, hi);
+      };
 
-      if (HasNUW) {
-        // 'add nuw x, C' produces [C, UINT_MAX].
-        Lower = *C;
-      } else if (HasNSW) {
+      //  NSW: 'add nsw x, C'
+      //    if C<0:  [ SINT_MIN .. SINT_MAX + C ]
+      //    else:    [ SINT_MIN + C .. SINT_MAX ]
+      auto makeSignedRange = [&]() {
         if (C->isNegative()) {
-          // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
-          Lower = APInt::getSignedMinValue(Width);
-          Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+          APInt lo = APInt::getSignedMinValue(Width);
+          APInt hi = APInt::getSignedMaxValue(Width) + *C;
+          return std::pair<APInt, APInt>(lo, hi);
         } else {
-          // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
-          Lower = APInt::getSignedMinValue(Width) + *C;
-          Upper = APInt::getSignedMaxValue(Width) + 1;
+          APInt lo = APInt::getSignedMinValue(Width) + *C;
+          APInt hi = APInt::getSignedMaxValue(Width);
+          return std::pair<APInt, APInt>(lo, hi);
         }
+      };
+
+      // Split [lo..hi] into up to two non‑wrapping intervals:
+      auto splitPieces = [&](std::pair<APInt, APInt> rng,
+                             SmallVectorImpl<std::pair<APInt, APInt>> &dst) {
+        APInt lo = rng.first, hi = rng.second;
+        if (lo.ugt(hi)) {
+          // wraps around 2^n
+          dst.emplace_back(lo, APInt::getMaxValue(Width));
+          dst.emplace_back(APInt::getZero(Width), hi);
+        } else {
+          dst.emplace_back(lo, hi);
+        }
+      };
+
+      SmallVector<std::pair<APInt, APInt>, 2> piecesU, piecesS;
+      if (HasNUW)
+        splitPieces(makeUnsignedRange(), piecesU);
+      if (HasNSW)
+        splitPieces(makeSignedRange(), piecesS);
+
+      // Intersect piecewise
+      SmallVector<std::pair<APInt, APInt>, 2> inters;
+      for (auto &u : piecesU) {
+        for (auto &s : piecesS) {
+          APInt loI = u.first.ugt(s.first) ? u.first : s.first;
+          APInt hiI = u.second.ult(s.second) ? u.second : s.second;
+          if (loI.ule(hiI))
+            inters.emplace_back(loI, hiI);
+        }
+      }
+
+      if (inters.size() == 1) {
+        // Exactly one contiguous overlap ⇒ use it
+        Lower = inters[0].first;
+        Upper = inters[0].second +
+                1; // make Upper exclusive if you’re following [Lo..Hi)
+      } else if (HasNUW && !PreferSignedRange) {
+        // Fallback to plain NUW [C..UINT_MAX]
+        Lower = *C;
+        Upper = APInt::getMaxValue(Width) + 1;
+      } else if (HasNSW) {
+        // Fallback to plain NSW
+        auto S = makeSignedRange();
+        Lower = S.first;
+        Upper = S.second + 1;
       }
     }
     break;

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1412,8 +1412,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ;
 entry:
@@ -1502,8 +1501,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ;
 entry:

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -67,8 +67,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; SCALAR_TAIL_FOLDING:       middle.block:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_TAIL_FOLDING-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_TAIL_FOLDING:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1
@@ -205,8 +204,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; SCALAR_TAIL_FOLDING:       middle.block:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_TAIL_FOLDING-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_TAIL_FOLDING:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2
@@ -335,8 +333,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; SCALAR_TAIL_FOLDING:       middle.block:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_TAIL_FOLDING-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_TAIL_FOLDING:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3
@@ -490,8 +487,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; SCALAR_TAIL_FOLDING:       middle.block:
-; SCALAR_TAIL_FOLDING-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_TAIL_FOLDING-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_TAIL_FOLDING:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
@@ -52,8 +52,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
 ; SCALAR_EPILOGUE-NEXT:    [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_EPILOGUE-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; SCALAR_EPILOGUE:       middle.block:
-; SCALAR_EPILOGUE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_EPILOGUE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_EPILOGUE-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_EPILOGUE:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor2
@@ -237,8 +236,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
 ; SCALAR_EPILOGUE-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALAR_EPILOGUE-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; SCALAR_EPILOGUE:       middle.block:
-; SCALAR_EPILOGUE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
-; SCALAR_EPILOGUE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; SCALAR_EPILOGUE-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SCALAR_EPILOGUE:       scalar.ph:
 ;
 ; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor4