Skip to content

Commit ad9e591

Browse files
authored
[SelectionDAG][RISCV] Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)) in getNode. (#144565)
We already have shl/mul vscale related folds in getNode. This is an alternative to the DAGCombine proposed in #144507.
1 parent 7b9d10d commit ad9e591

14 files changed

+682
-810
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7377,6 +7377,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
73777377
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) &&
73787378
VT.getScalarType() == MVT::i1)
73797379
return getNode(ISD::XOR, DL, VT, N1, N2);
7380+
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
7381+
if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE &&
7382+
N2.getOpcode() == ISD::VSCALE) {
7383+
const APInt &C1 = N1->getConstantOperandAPInt(0);
7384+
const APInt &C2 = N2->getConstantOperandAPInt(0);
7385+
return getVScale(DL, VT, C1 + C2);
7386+
}
73807387
break;
73817388
case ISD::MUL:
73827389
assert(VT.isInteger() && "This operator does not apply to FP types!");

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -488,8 +488,6 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
488488
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
489489
; CHECK-NEXT: vslidedown.vx v13, v10, a0
490490
; CHECK-NEXT: vslidedown.vx v12, v9, a0
491-
; CHECK-NEXT: add a1, a0, a0
492-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
493491
; CHECK-NEXT: vslideup.vx v12, v10, a0
494492
; CHECK-NEXT: vmv2r.v v8, v12
495493
; CHECK-NEXT: ret
@@ -543,8 +541,6 @@ define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat
543541
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
544542
; CHECK-NEXT: vslidedown.vx v13, v10, a0
545543
; CHECK-NEXT: vslidedown.vx v12, v9, a0
546-
; CHECK-NEXT: add a1, a0, a0
547-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
548544
; CHECK-NEXT: vslideup.vx v12, v10, a0
549545
; CHECK-NEXT: vmv2r.v v8, v12
550546
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale
8181
; CHECK-NEXT: srli a1, a0, 3
8282
; CHECK-NEXT: srli a0, a0, 2
8383
; CHECK-NEXT: add a0, a0, a1
84-
; CHECK-NEXT: add a1, a0, a1
85-
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
84+
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
8685
; CHECK-NEXT: vslideup.vx v8, v9, a0
8786
; CHECK-NEXT: ret
8887
%v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
@@ -246,8 +245,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
246245
; CHECK: # %bb.0:
247246
; CHECK-NEXT: csrr a0, vlenb
248247
; CHECK-NEXT: srli a0, a0, 3
249-
; CHECK-NEXT: add a1, a0, a0
250-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
248+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
251249
; CHECK-NEXT: vslideup.vx v8, v16, a0
252250
; CHECK-NEXT: ret
253251
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -282,8 +280,8 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vsc
282280
; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
283281
; CHECK: # %bb.0:
284282
; CHECK-NEXT: csrr a0, vlenb
283+
; CHECK-NEXT: srli a1, a0, 2
285284
; CHECK-NEXT: srli a0, a0, 3
286-
; CHECK-NEXT: add a1, a0, a0
287285
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
288286
; CHECK-NEXT: vslideup.vx v8, v10, a0
289287
; CHECK-NEXT: ret
@@ -310,11 +308,11 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vsc
310308
; CHECK: # %bb.0:
311309
; CHECK-NEXT: csrr a0, vlenb
312310
; CHECK-NEXT: srli a1, a0, 3
313-
; CHECK-NEXT: srli a0, a0, 2
314-
; CHECK-NEXT: add a0, a0, a1
315-
; CHECK-NEXT: add a1, a0, a1
316-
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
317-
; CHECK-NEXT: vslideup.vx v8, v10, a0
311+
; CHECK-NEXT: srli a2, a0, 2
312+
; CHECK-NEXT: add a1, a2, a1
313+
; CHECK-NEXT: srli a0, a0, 1
314+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
315+
; CHECK-NEXT: vslideup.vx v8, v10, a1
318316
; CHECK-NEXT: ret
319317
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
320318
ret <vscale x 16 x i8> %v
@@ -363,8 +361,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
363361
; CHECK: # %bb.0:
364362
; CHECK-NEXT: csrr a0, vlenb
365363
; CHECK-NEXT: srli a0, a0, 2
366-
; CHECK-NEXT: add a1, a0, a0
367-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
364+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
368365
; CHECK-NEXT: vslideup.vx v8, v16, a0
369366
; CHECK-NEXT: ret
370367
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -376,8 +373,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
376373
; CHECK: # %bb.0:
377374
; CHECK-NEXT: csrr a0, vlenb
378375
; CHECK-NEXT: srli a0, a0, 2
379-
; CHECK-NEXT: add a1, a0, a0
380-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
376+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
381377
; CHECK-NEXT: vslideup.vx v14, v16, a0
382378
; CHECK-NEXT: ret
383379
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)
@@ -422,8 +418,8 @@ define <vscale x 32 x i1> @insert_nxv32i1_nxv8i1_8(<vscale x 32 x i1> %v, <vscal
422418
; CHECK-LABEL: insert_nxv32i1_nxv8i1_8:
423419
; CHECK: # %bb.0:
424420
; CHECK-NEXT: csrr a0, vlenb
421+
; CHECK-NEXT: srli a1, a0, 2
425422
; CHECK-NEXT: srli a0, a0, 3
426-
; CHECK-NEXT: add a1, a0, a0
427423
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
428424
; CHECK-NEXT: vslideup.vx v0, v8, a0
429425
; CHECK-NEXT: ret
@@ -570,8 +566,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_2(<vscale x 32 x bfloat
570566
; CHECK: # %bb.0:
571567
; CHECK-NEXT: csrr a0, vlenb
572568
; CHECK-NEXT: srli a0, a0, 2
573-
; CHECK-NEXT: add a1, a0, a0
574-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
569+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
575570
; CHECK-NEXT: vslideup.vx v8, v16, a0
576571
; CHECK-NEXT: ret
577572
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 2)
@@ -583,8 +578,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_26(<vscale x 32 x bfloa
583578
; CHECK: # %bb.0:
584579
; CHECK-NEXT: csrr a0, vlenb
585580
; CHECK-NEXT: srli a0, a0, 2
586-
; CHECK-NEXT: add a1, a0, a0
587-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
581+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
588582
; CHECK-NEXT: vslideup.vx v14, v16, a0
589583
; CHECK-NEXT: ret
590584
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 26)

0 commit comments

Comments
 (0)