Skip to content

Commit c32a44e

Browse files
committed
[AArch64] Reduce the costs of and/or/xor reductions
Since the costs were added the codegen for i8/i16 and/or/xor reductions has improved. This updates the cost model to produce the same costs in terms of number of instructions.
1 parent 43a9ec2 commit c32a44e

File tree

5 files changed

+56
-58
lines changed

5 files changed

+56
-58
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5213,34 +5213,34 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
52135213
// XOR: llvm/test/CodeGen/AArch64/reduce-xor.ll
52145214
// AND: llvm/test/CodeGen/AArch64/reduce-and.ll
52155215
static const CostTblEntry CostTblNoPairwise[]{
5216-
{ISD::ADD, MVT::v8i8, 2},
5217-
{ISD::ADD, MVT::v16i8, 2},
5218-
{ISD::ADD, MVT::v4i16, 2},
5219-
{ISD::ADD, MVT::v8i16, 2},
5220-
{ISD::ADD, MVT::v2i32, 2},
5221-
{ISD::ADD, MVT::v4i32, 2},
5222-
{ISD::ADD, MVT::v2i64, 2},
5223-
{ISD::OR, MVT::v8i8, 15},
5224-
{ISD::OR, MVT::v16i8, 17},
5225-
{ISD::OR, MVT::v4i16, 7},
5226-
{ISD::OR, MVT::v8i16, 9},
5227-
{ISD::OR, MVT::v2i32, 3},
5228-
{ISD::OR, MVT::v4i32, 5},
5229-
{ISD::OR, MVT::v2i64, 3},
5230-
{ISD::XOR, MVT::v8i8, 15},
5231-
{ISD::XOR, MVT::v16i8, 17},
5232-
{ISD::XOR, MVT::v4i16, 7},
5233-
{ISD::XOR, MVT::v8i16, 9},
5234-
{ISD::XOR, MVT::v2i32, 3},
5235-
{ISD::XOR, MVT::v4i32, 5},
5236-
{ISD::XOR, MVT::v2i64, 3},
5237-
{ISD::AND, MVT::v8i8, 15},
5238-
{ISD::AND, MVT::v16i8, 17},
5239-
{ISD::AND, MVT::v4i16, 7},
5240-
{ISD::AND, MVT::v8i16, 9},
5241-
{ISD::AND, MVT::v2i32, 3},
5242-
{ISD::AND, MVT::v4i32, 5},
5243-
{ISD::AND, MVT::v2i64, 3},
5216+
{ISD::ADD, MVT::v8i8, 2},
5217+
{ISD::ADD, MVT::v16i8, 2},
5218+
{ISD::ADD, MVT::v4i16, 2},
5219+
{ISD::ADD, MVT::v8i16, 2},
5220+
{ISD::ADD, MVT::v2i32, 2},
5221+
{ISD::ADD, MVT::v4i32, 2},
5222+
{ISD::ADD, MVT::v2i64, 2},
5223+
{ISD::OR, MVT::v8i8, 5}, // fmov + or_lsr + or_lsr + lsr + or
5224+
{ISD::OR, MVT::v16i8, 7}, // ext + orr + ^
5225+
{ISD::OR, MVT::v4i16, 4}, // fmov + or_lsr + lsr + or
5226+
{ISD::OR, MVT::v8i16, 6}, // ext + orr + ^
5227+
{ISD::OR, MVT::v2i32, 3}, // fmov + lsr + or
5228+
{ISD::OR, MVT::v4i32, 5}, // ext + orr + ^
5229+
{ISD::OR, MVT::v2i64, 3}, // ext + orr + fmov
5230+
{ISD::XOR, MVT::v8i8, 5}, // Same as above
5231+
{ISD::XOR, MVT::v16i8, 7},
5232+
{ISD::XOR, MVT::v4i16, 4},
5233+
{ISD::XOR, MVT::v8i16, 6},
5234+
{ISD::XOR, MVT::v2i32, 3},
5235+
{ISD::XOR, MVT::v4i32, 5},
5236+
{ISD::XOR, MVT::v2i64, 3},
5237+
{ISD::AND, MVT::v8i8, 5}, // Same as above
5238+
{ISD::AND, MVT::v16i8, 7},
5239+
{ISD::AND, MVT::v4i16, 4},
5240+
{ISD::AND, MVT::v8i16, 6},
5241+
{ISD::AND, MVT::v2i32, 3},
5242+
{ISD::AND, MVT::v4i32, 5},
5243+
{ISD::AND, MVT::v2i64, 3},
52445244
};
52455245
switch (ISD) {
52465246
default:

llvm/test/Analysis/CostModel/AArch64/reduce-and.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ define void @reduce() {
1515
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
1616
; CHECK-NEXT: Cost Model: Found costs of 0 for: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
1717
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
18-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
19-
; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
20-
; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
21-
; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
22-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
23-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
24-
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
25-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
18+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
19+
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
20+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
21+
; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
22+
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
23+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
24+
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
25+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
2626
; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
2727
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
2828
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)

llvm/test/Analysis/CostModel/AArch64/reduce-or.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ define void @reduce() {
1515
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
1616
; CHECK-NEXT: Cost Model: Found costs of 0 for: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
1717
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
18-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
19-
; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
20-
; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
21-
; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
22-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
23-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
24-
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
25-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
18+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
19+
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
20+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
21+
; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
22+
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
23+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
24+
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
25+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
2626
; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
2727
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
2828
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)

llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ define void @reduce() {
1515
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
1616
; CHECK-NEXT: Cost Model: Found costs of 0 for: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
1717
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
18-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
19-
; CHECK-NEXT: Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
20-
; CHECK-NEXT: Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
21-
; CHECK-NEXT: Cost Model: Found costs of 18 for: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
22-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
23-
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
24-
; CHECK-NEXT: Cost Model: Found costs of 9 for: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
25-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
18+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
19+
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
20+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
21+
; CHECK-NEXT: Cost Model: Found costs of 8 for: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
22+
; CHECK-NEXT: Cost Model: Found costs of 10 for: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
23+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
24+
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
25+
; CHECK-NEXT: Cost Model: Found costs of 7 for: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
2626
; CHECK-NEXT: Cost Model: Found costs of 3 for: %V2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
2727
; CHECK-NEXT: Cost Model: Found costs of 5 for: %V4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
2828
; CHECK-NEXT: Cost Model: Found costs of 6 for: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)

llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,11 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
6262
; CHECK-NEXT: store <8 x i16> [[PREDPHI]], ptr [[DCT]], align 2, !alias.scope [[META0]], !noalias [[META3]]
6363
; CHECK-NEXT: store <8 x i16> [[PREDPHI34]], ptr [[TMP0]], align 2, !alias.scope [[META0]], !noalias [[META3]]
6464
; CHECK-NEXT: [[BIN_RDX35:%.*]] = or <8 x i16> [[PREDPHI34]], [[PREDPHI]]
65-
; CHECK-NEXT: [[BIN_RDX:%.*]] = sext <8 x i16> [[BIN_RDX35]] to <8 x i32>
66-
; CHECK-NEXT: [[TMP29:%.*]] = tail call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[BIN_RDX]])
65+
; CHECK-NEXT: [[TMP29:%.*]] = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[BIN_RDX35]])
6766
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
6867
; CHECK: for.cond.cleanup:
69-
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[TMP29]], [[VECTOR_BODY]] ], [ [[OR_15:%.*]], [[IF_END_15:%.*]] ]
70-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR_LCSSA]], 0
68+
; CHECK-NEXT: [[OR_LCSSA_IN:%.*]] = phi i16 [ [[TMP29]], [[VECTOR_BODY]] ], [ [[OR_1551:%.*]], [[IF_END_15:%.*]] ]
69+
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[OR_LCSSA_IN]], 0
7170
; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL]] to i32
7271
; CHECK-NEXT: ret i32 [[LNOT_EXT]]
7372
; CHECK: for.body:
@@ -514,8 +513,7 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
514513
; CHECK: if.end.15:
515514
; CHECK-NEXT: [[STOREMERGE_15:%.*]] = phi i16 [ [[CONV28_15]], [[IF_ELSE_15]] ], [ [[CONV12_15]], [[IF_THEN_15]] ]
516515
; CHECK-NEXT: store i16 [[STOREMERGE_15]], ptr [[ARRAYIDX_15]], align 2
517-
; CHECK-NEXT: [[OR_1551:%.*]] = or i16 [[OR_1450]], [[STOREMERGE_15]]
518-
; CHECK-NEXT: [[OR_15]] = sext i16 [[OR_1551]] to i32
516+
; CHECK-NEXT: [[OR_1551]] = or i16 [[OR_1450]], [[STOREMERGE_15]]
519517
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
520518
;
521519
entry:

0 commit comments

Comments
 (0)