Skip to content

Commit 7485f34

Browse files
authored
[X86] X86ISelDAGToDAG - don't let ADD/SUB(X,1) -> SUB/ADD(X,-1) constant fold (#169217)
Extension to #168726 - ensure we peek through bitcasts to look for constants (as constant folding will) DAG should have constant folded this, but we're still fighting the lack of proper topological sorting. Fixes #169205
1 parent 06fc87b commit 7485f34

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10041004
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
10051005
N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
10061006
APInt SplatVal;
1007-
if (!ISD::isBuildVectorOfConstantSDNodes(N->getOperand(0).getNode()) &&
1007+
if (!ISD::isBuildVectorOfConstantSDNodes(
1008+
peekThroughBitcasts(N->getOperand(0)).getNode()) &&
10081009
X86::isConstantSplat(N->getOperand(1), SplatVal) &&
10091010
SplatVal.isOne()) {
10101011
SDLoc DL(N);

llvm/test/CodeGen/X86/pr169205.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
6+
7+
define <4 x i16> @PR169205() {
8+
; SSE-LABEL: PR169205:
9+
; SSE: # %bb.0:
10+
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1,u,u,u,u]
11+
; SSE-NEXT: retq
12+
;
13+
; AVX-LABEL: PR169205:
14+
; AVX: # %bb.0:
15+
; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
16+
; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17+
; AVX-NEXT: retq
18+
%avg = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer)
19+
%shuffle24 = shufflevector <16 x i8> %avg, <16 x i8> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 9, i32 9>
20+
%conv25 = zext <4 x i8> %shuffle24 to <4 x i16>
21+
%not.neg = add <4 x i16> %conv25, splat (i16 1)
22+
ret <4 x i16> %not.neg
23+
}

0 commit comments

Comments
 (0)