Skip to content

Commit 92342e0

Browse files
committed
[VPlan] Implement compressed widening of memory instructions
1 parent f39e2a0 commit 92342e0

File tree

5 files changed

+61
-30
lines changed

5 files changed

+61
-30
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,7 @@ class TargetTransformInfo {
14421442
Normal, ///< The cast is used with a normal load/store.
14431443
Masked, ///< The cast is used with a masked load/store.
14441444
GatherScatter, ///< The cast is used with a gather/scatter.
1445+
Compressed, ///< The cast is used with an expand load/compress store.
14451446
Interleave, ///< The cast is used with an interleaved load/store.
14461447
Reversed, ///< The cast is used with a reversed load/store.
14471448
};

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,7 @@ class LoopVectorizationCostModel {
10271027
CM_Widen_Reverse, // For consecutive accesses with stride -1.
10281028
CM_Interleave,
10291029
CM_GatherScatter,
1030+
CM_Compressed,
10301031
CM_Scalarize,
10311032
CM_VectorCall,
10321033
CM_IntrinsicCall
@@ -3109,9 +3110,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
31093110
if (IsUniformMemOpUse(I))
31103111
return true;
31113112

3112-
return (WideningDecision == CM_Widen ||
3113-
WideningDecision == CM_Widen_Reverse ||
3114-
WideningDecision == CM_Interleave);
3113+
return (
3114+
WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
3115+
WideningDecision == CM_Interleave || WideningDecision == CM_Compressed);
31153116
};
31163117

31173118
// Returns true if Ptr is the pointer operand of a memory access instruction
@@ -5192,11 +5193,16 @@ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
51925193
Instruction *I, ElementCount VF, InstWidening Decision) {
51935194
Type *ValTy = getLoadStoreType(I);
51945195
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
5196+
const Align Alignment = getLoadStoreAlignment(I);
51955197
unsigned AS = getLoadStoreAddressSpace(I);
51965198

5199+
if (Decision == CM_Compressed)
5200+
return TTI.getExpandCompressMemoryOpCost(I->getOpcode(), VectorTy,
5201+
/*VariableMask*/ true, Alignment,
5202+
CostKind, I);
5203+
51975204
assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
51985205
"Expected widen decision.");
5199-
const Align Alignment = getLoadStoreAlignment(I);
52005206
InstructionCost Cost = 0;
52015207
if (Legal->isMaskRequired(I)) {
52025208
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -6300,6 +6306,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
63006306
switch (getWideningDecision(I, VF)) {
63016307
case LoopVectorizationCostModel::CM_GatherScatter:
63026308
return TTI::CastContextHint::GatherScatter;
6309+
case LoopVectorizationCostModel::CM_Compressed:
6310+
return TTI::CastContextHint::Compressed;
63036311
case LoopVectorizationCostModel::CM_Interleave:
63046312
return TTI::CastContextHint::Interleave;
63056313
case LoopVectorizationCostModel::CM_Scalarize:
@@ -7515,8 +7523,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75157523
LoopVectorizationCostModel::InstWidening Decision =
75167524
CM.getWideningDecision(I, Range.Start);
75177525
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
7526+
bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
75187527
bool Consecutive =
7519-
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
7528+
Reverse || Compressed || Decision == LoopVectorizationCostModel::CM_Widen;
75207529

75217530
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
75227531
if (Consecutive) {
@@ -7546,11 +7555,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75467555
}
75477556
if (LoadInst *Load = dyn_cast<LoadInst>(I))
75487557
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
7549-
VPIRMetadata(*Load, LVer), I->getDebugLoc());
7558+
Compressed, VPIRMetadata(*Load, LVer),
7559+
I->getDebugLoc());
75507560

75517561
StoreInst *Store = cast<StoreInst>(I);
75527562
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
7553-
Reverse, VPIRMetadata(*Store, LVer),
7563+
Reverse, Compressed, VPIRMetadata(*Store, LVer),
75547564
I->getDebugLoc());
75557565
}
75567566

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3193,6 +3193,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
31933193
/// Whether the consecutive accessed addresses are in reverse order.
31943194
bool Reverse;
31953195

3196+
/// Whether the consecutive accessed addresses are compressed with mask value.
3197+
bool Compressed;
3198+
31963199
/// Whether the memory access is masked.
31973200
bool IsMasked = false;
31983201

@@ -3206,12 +3209,13 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
32063209

32073210
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
32083211
std::initializer_list<VPValue *> Operands,
3209-
bool Consecutive, bool Reverse,
3212+
bool Consecutive, bool Reverse, bool Compressed,
32103213
const VPIRMetadata &Metadata, DebugLoc DL)
32113214
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
32123215
Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
3213-
Reverse(Reverse) {
3216+
Reverse(Reverse), Compressed(Compressed) {
32143217
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3218+
assert((Consecutive || !Compressed) && "Compressed implies consecutive");
32153219
assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
32163220
!Reverse &&
32173221
"Reversed acccess without VPVectorEndPointerRecipe address?");
@@ -3241,6 +3245,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
32413245
/// order.
32423246
bool isReverse() const { return Reverse; }
32433247

3248+
/// Return whether the consecutive loaded/stored addresses are compressed.
3249+
bool isCompressed() const { return Compressed; }
3250+
32443251
/// Return the address accessed by this recipe.
32453252
VPValue *getAddr() const { return getOperand(0); }
32463253

@@ -3274,18 +3281,18 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
32743281
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
32753282
public VPValue {
32763283
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3277-
bool Consecutive, bool Reverse,
3284+
bool Consecutive, bool Reverse, bool Compressed,
32783285
const VPIRMetadata &Metadata, DebugLoc DL)
32793286
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3280-
Reverse, Metadata, DL),
3287+
Reverse, Compressed, Metadata, DL),
32813288
VPValue(this, &Load) {
32823289
setMask(Mask);
32833290
}
32843291

32853292
VPWidenLoadRecipe *clone() override {
32863293
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3287-
getMask(), Consecutive, Reverse, *this,
3288-
getDebugLoc());
3294+
getMask(), Consecutive, Reverse, Compressed,
3295+
*this, getDebugLoc());
32893296
}
32903297

32913298
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3316,8 +3323,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
33163323
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
33173324
VPValue *Mask)
33183325
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3319-
{Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3320-
L.getDebugLoc()),
3326+
{Addr, &EVL}, L.isConsecutive(), L.isReverse(),
3327+
L.isCompressed(), L, L.getDebugLoc()),
33213328
VPValue(this, &getIngredient()) {
33223329
setMask(Mask);
33233330
}
@@ -3355,16 +3362,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
33553362
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
33563363
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
33573364
VPValue *Mask, bool Consecutive, bool Reverse,
3358-
const VPIRMetadata &Metadata, DebugLoc DL)
3365+
bool Compressed, const VPIRMetadata &Metadata, DebugLoc DL)
33593366
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3360-
Consecutive, Reverse, Metadata, DL) {
3367+
Consecutive, Reverse, Compressed, Metadata, DL) {
33613368
setMask(Mask);
33623369
}
33633370

33643371
VPWidenStoreRecipe *clone() override {
33653372
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
33663373
getStoredValue(), getMask(), Consecutive,
3367-
Reverse, *this, getDebugLoc());
3374+
Reverse, Compressed, *this, getDebugLoc());
33683375
}
33693376

33703377
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3399,7 +3406,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
33993406
VPValue *Mask)
34003407
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
34013408
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3402-
S.isReverse(), S, S.getDebugLoc()) {
3409+
S.isReverse(), S.isCompressed(), S,
3410+
S.getDebugLoc()) {
34033411
setMask(Mask);
34043412
}
34053413

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,8 +3565,12 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
35653565

35663566
InstructionCost Cost = 0;
35673567
if (IsMasked) {
3568-
Cost +=
3569-
Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
3568+
Cost += Compressed
3569+
? Ctx.TTI.getExpandCompressMemoryOpCost(Opcode, Ty,
3570+
/*VariableMask*/ true,
3571+
Alignment, Ctx.CostKind)
3572+
: Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS,
3573+
Ctx.CostKind);
35703574
} else {
35713575
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
35723576
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3603,9 +3607,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
36033607
NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
36043608
"wide.masked.gather");
36053609
} else if (Mask) {
3606-
NewLI =
3607-
Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
3608-
PoisonValue::get(DataTy), "wide.masked.load");
3610+
NewLI = Compressed
3611+
? Builder.CreateMaskedExpandLoad(DataTy, Addr, Alignment, Mask,
3612+
PoisonValue::get(DataTy),
3613+
"wide.masked.expand.load")
3614+
: Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
3615+
PoisonValue::get(DataTy),
3616+
"wide.masked.load");
36093617
} else {
36103618
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
36113619
}
@@ -3732,7 +3740,10 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
37323740
if (CreateScatter)
37333741
NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
37343742
else if (Mask)
3735-
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
3743+
NewSI = Compressed
3744+
? Builder.CreateMaskedCompressStore(StoredVal, Addr, Alignment,
3745+
Mask)
3746+
: Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
37363747
else
37373748
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
37383749
applyMetadata(*NewSI);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,14 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
9191
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
9292
NewRecipe = new VPWidenLoadRecipe(
9393
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
94-
false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
95-
Ingredient.getDebugLoc());
94+
false /*Consecutive*/, false /*Reverse*/, false /*Compressed*/,
95+
VPIRMetadata(*Load), Ingredient.getDebugLoc());
9696
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
9797
NewRecipe = new VPWidenStoreRecipe(
9898
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
9999
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
100-
VPIRMetadata(*Store), Ingredient.getDebugLoc());
100+
false /*Compressed*/, VPIRMetadata(*Store),
101+
Ingredient.getDebugLoc());
101102
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
102103
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
103104
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -4207,7 +4208,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
42074208
auto *LI = cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
42084209
auto *L = new VPWidenLoadRecipe(
42094210
*LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
4210-
/*Reverse=*/false, {}, LoadGroup->getDebugLoc());
4211+
/*Reverse=*/false, /*Compressed=*/false, {}, LoadGroup->getDebugLoc());
42114212
L->insertBefore(LoadGroup);
42124213
NarrowedOps.insert(L);
42134214
return L;
@@ -4344,7 +4345,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
43444345
cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
43454346
auto *S = new VPWidenStoreRecipe(
43464347
*SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
4347-
/*Reverse=*/false, {}, StoreGroup->getDebugLoc());
4348+
/*Reverse=*/false, /*Compressed=*/false, {}, StoreGroup->getDebugLoc());
43484349
S->insertBefore(StoreGroup);
43494350
StoreGroup->eraseFromParent();
43504351
}

0 commit comments

Comments
 (0)