@@ -1405,6 +1405,11 @@ class LoopVectorizationCostModel {
14051405 return InLoopReductions.contains (Phi);
14061406 }
14071407
1408+ // / Returns the set of in-loop reduction PHIs.
1409+ const SmallPtrSetImpl<PHINode *> &getInLoopReductions () const {
1410+ return InLoopReductions;
1411+ }
1412+
14081413 // / Returns true if the predicated reduction select should be used to set the
14091414 // / incoming value for the reduction phi.
14101415 bool usePredicatedReductionSelect () const {
@@ -7683,60 +7688,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76837688 Consecutive, Reverse, *VPI, VPI->getDebugLoc ());
76847689}
76857690
7686- // / Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
7687- // / also insert a recipe to expand the step for the induction recipe.
7688- static VPWidenIntOrFpInductionRecipe *
7689- createWidenInductionRecipes (VPInstruction *PhiR,
7690- const InductionDescriptor &IndDesc, VPlan &Plan,
7691- ScalarEvolution &SE, Loop &OrigLoop) {
7692- assert (SE.isLoopInvariant (IndDesc.getStep (), &OrigLoop) &&
7693- " step must be loop invariant" );
7694-
7695- VPValue *Start = PhiR->getOperand (0 );
7696- assert ((Plan.getLiveIn (IndDesc.getStartValue ()) == Start ||
7697- (SE.isSCEVable (IndDesc.getStartValue ()->getType ()) &&
7698- SE.getSCEV (IndDesc.getStartValue ()) ==
7699- vputils::getSCEVExprForVPValue (Start, SE))) &&
7700- " Start VPValue must match IndDesc's start value" );
7701-
7702- // It is always safe to copy over the NoWrap and FastMath flags. In
7703- // particular, when folding tail by masking, the masked-off lanes are never
7704- // used, so it is safe.
7705- VPIRFlags Flags = vputils::getFlagsFromIndDesc (IndDesc);
7706- VPValue *Step =
7707- vputils::getOrCreateVPValueForSCEVExpr (Plan, IndDesc.getStep ());
7708-
7709- // Update wide induction increments to use the same step as the corresponding
7710- // wide induction. This enables detecting induction increments directly in
7711- // VPlan and removes redundant splats.
7712- using namespace llvm ::VPlanPatternMatch;
7713- if (match (PhiR->getOperand (1 ), m_Add (m_Specific (PhiR), m_VPValue ())))
7714- PhiR->getOperand (1 )->getDefiningRecipe ()->setOperand (1 , Step);
7715-
7716- PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr ());
7717- return new VPWidenIntOrFpInductionRecipe (Phi, Start, Step, &Plan.getVF (),
7718- IndDesc, Flags, PhiR->getDebugLoc ());
7719- }
7720-
7721- VPHeaderPHIRecipe *
7722- VPRecipeBuilder::tryToOptimizeInductionPHI (VPInstruction *VPI) {
7723- auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr ());
7724-
7725- // Check if this is an integer or fp induction. If so, build the recipe that
7726- // produces its scalar and vector values.
7727- if (auto *II = Legal->getIntOrFpInductionDescriptor (Phi))
7728- return createWidenInductionRecipes (VPI, *II, Plan, *PSE.getSE (), *OrigLoop);
7729-
7730- // Check if this is pointer induction. If so, build the recipe for it.
7731- if (auto *II = Legal->getPointerInductionDescriptor (Phi)) {
7732- VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr (Plan, II->getStep ());
7733- return new VPWidenPointerInductionRecipe (Phi, VPI->getOperand (0 ), Step,
7734- &Plan.getVFxUF (), *II,
7735- VPI->getDebugLoc ());
7736- }
7737- return nullptr ;
7738- }
7739-
77407691VPWidenIntOrFpInductionRecipe *
77417692VPRecipeBuilder::tryToOptimizeInductionTruncate (VPInstruction *VPI,
77427693 VFRange &Range) {
@@ -8218,56 +8169,15 @@ bool VPRecipeBuilder::getScaledReductions(
82188169 return false ;
82198170}
82208171
8221- VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe (VPSingleDefRecipe *R,
8222- VFRange &Range) {
8223- // First, check for specific widening recipes that deal with inductions, Phi
8224- // nodes, calls and memory operations.
8225- VPRecipeBase *Recipe;
8226- if (auto *PhiR = dyn_cast<VPPhi>(R)) {
8227- VPBasicBlock *Parent = PhiR->getParent ();
8228- [[maybe_unused]] VPRegionBlock *LoopRegionOf =
8229- Parent->getEnclosingLoopRegion ();
8230- assert (LoopRegionOf && LoopRegionOf->getEntry () == Parent &&
8231- " Non-header phis should have been handled during predication" );
8232- auto *Phi = cast<PHINode>(R->getUnderlyingInstr ());
8233- assert (R->getNumOperands () == 2 && " Must have 2 operands for header phis" );
8234- if ((Recipe = tryToOptimizeInductionPHI (PhiR)))
8235- return Recipe;
8236-
8237- assert ((Legal->isReductionVariable (Phi) ||
8238- Legal->isFixedOrderRecurrence (Phi)) &&
8239- " can only widen reductions and fixed-order recurrences here" );
8240- VPValue *StartV = R->getOperand (0 );
8241- VPValue *BackedgeValue = R->getOperand (1 );
8242- if (Legal->isReductionVariable (Phi)) {
8243- const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor (Phi);
8244- assert (RdxDesc.getRecurrenceStartValue () ==
8245- Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8246-
8247- // If the PHI is used by a partial reduction, set the scale factor.
8248- bool UseInLoopReduction = CM.isInLoopReduction (Phi);
8249- bool UseOrderedReductions = CM.useOrderedReductions (RdxDesc);
8250- // Will be updated later to >1 if reduction is partial.
8251- unsigned ScaleFactor = 1 ;
8252-
8253- return new VPReductionPHIRecipe (
8254- Phi, RdxDesc.getRecurrenceKind (), *StartV, *BackedgeValue,
8255- getReductionStyle (UseInLoopReduction, UseOrderedReductions,
8256- ScaleFactor),
8257- RdxDesc.hasUsesOutsideReductionChain ());
8258- }
8259-
8260- // TODO: Currently fixed-order recurrences are modeled as chains of
8261- // first-order recurrences. If there are no users of the intermediate
8262- // recurrences in the chain, the fixed order recurrence should be modeled
8263- // directly, enabling more efficient codegen.
8264- return new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV, *BackedgeValue);
8265- }
8266-
8267- assert (!R->isPhi () && " only VPPhi nodes expected at this point" );
8172+ VPRecipeBase *
8173+ VPRecipeBuilder::tryToCreateWidenNonPhiRecipe (VPSingleDefRecipe *R,
8174+ VFRange &Range) {
8175+ assert (!R->isPhi () && " phis must be handled earlier" );
8176+ // First, check for specific widening recipes that deal with optimizing
8177+ // truncates, calls and memory operations.
82688178
8179+ VPRecipeBase *Recipe;
82698180 auto *VPI = cast<VPInstruction>(R);
8270- Instruction *Instr = R->getUnderlyingInstr ();
82718181 if (VPI->getOpcode () == Instruction::Trunc &&
82728182 (Recipe = tryToOptimizeInductionTruncate (VPI, Range)))
82738183 return Recipe;
@@ -8280,6 +8190,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82808190 if (VPI->getOpcode () == Instruction::Call)
82818191 return tryToWidenCall (VPI, Range);
82828192
8193+ Instruction *Instr = R->getUnderlyingInstr ();
82838194 if (VPI->getOpcode () == Instruction::Store)
82848195 if (auto HistInfo = Legal->getHistogramInfo (cast<StoreInst>(Instr)))
82858196 return tryToWidenHistogram (*HistInfo, VPI);
@@ -8377,6 +8288,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
83778288 OrigLoop, *LI, Legal->getWidestInductionType (),
83788289 getDebugLocFromInstOrOperands (Legal->getPrimaryInduction ()), PSE, &LVer);
83798290
8291+ // Create recipes for header phis.
8292+ VPlanTransforms::createHeaderPhiRecipes (
8293+ *VPlan0, *PSE.getSE (), *OrigLoop, Legal->getInductionVars (),
8294+ Legal->getReductionVars (), Legal->getFixedOrderRecurrences (),
8295+ CM.getInLoopReductions (), Hints.allowReordering ());
8296+
83808297 auto MaxVFTimes2 = MaxVF * 2 ;
83818298 for (ElementCount VF = MinVF; ElementCount::isKnownLT (VF, MaxVFTimes2);) {
83828299 VFRange SubRange = {VF, MaxVFTimes2};
@@ -8482,8 +8399,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84828399 // Construct wide recipes and apply predication for original scalar
84838400 // VPInstructions in the loop.
84848401 // ---------------------------------------------------------------------------
8485- VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE ,
8486- Builder, BlockMaskCache);
8402+ VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, Builder ,
8403+ BlockMaskCache);
84878404 // TODO: Handle partial reductions with EVL tail folding.
84888405 if (!CM.foldTailWithEVL ())
84898406 RecipeBuilder.collectScaledReductions (Range);
@@ -8499,26 +8416,22 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84998416 // Mapping from VPValues in the initial plan to their widened VPValues. Needed
85008417 // temporarily to update created block masks.
85018418 DenseMap<VPValue *, VPValue *> Old2New;
8419+
85028420 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
85038421 // Convert input VPInstructions to widened recipes.
85048422 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
8505- auto *SingleDef = cast<VPSingleDefRecipe>(&R);
8506- auto *UnderlyingValue = SingleDef->getUnderlyingValue ();
8507- // Skip recipes that do not need transforming, including canonical IV,
8508- // wide canonical IV and VPInstructions without underlying values. The
8509- // latter are added above for masking.
8510- // FIXME: Migrate code relying on the underlying instruction from VPlan0
8511- // to construct recipes below to not use the underlying instruction.
8512- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
8513- &R) ||
8514- (isa<VPInstruction>(&R) && !UnderlyingValue))
8423+ auto *VPI = dyn_cast<VPInstruction>(&R);
8424+ // Skip recipes that do not need transforming, including
8425+ // non-VPInstructions (such as ...) and VPInstructions without underlying
8426+ // values. The latter are added above for masking.
8427+ if (!VPI || !VPI->getUnderlyingValue ())
85158428 continue ;
8516- assert (isa<VPInstruction>(&R) && UnderlyingValue && " unsupported recipe" );
85178429
85188430 // TODO: Gradually replace uses of underlying instruction by analyses on
8519- // VPlan.
8520- Instruction *Instr = cast<Instruction>(UnderlyingValue);
8521- Builder.setInsertPoint (SingleDef);
8431+ // VPlan. Migrate code relying on the underlying instruction from VPlan0
8432+ // to construct recipes below to not use the underlying instruction.
8433+ Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue ());
8434+ Builder.setInsertPoint (VPI);
85228435
85238436 // The stores with invariant address inside the loop will be deleted, and
85248437 // in the exit block, a uniform store recipe will be created for the final
@@ -8528,7 +8441,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85288441 Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
85298442 // Only create recipe for the final invariant store of the reduction.
85308443 if (Legal->isInvariantStoreOfReduction (SI)) {
8531- auto *VPI = cast<VPInstruction>(SingleDef);
85328444 auto *Recipe = new VPReplicateRecipe (
85338445 SI, R.operands (), true /* IsUniform */ , nullptr /* Mask*/ , *VPI,
85348446 *VPI, VPI->getDebugLoc ());
@@ -8539,10 +8451,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85398451 }
85408452
85418453 VPRecipeBase *Recipe =
8542- RecipeBuilder.tryToCreateWidenRecipe (SingleDef , Range);
8454+ RecipeBuilder.tryToCreateWidenNonPhiRecipe (VPI , Range);
85438455 if (!Recipe)
8544- Recipe = RecipeBuilder. handleReplication (cast<VPInstruction>(SingleDef),
8545- Range);
8456+ Recipe =
8457+ RecipeBuilder. handleReplication (cast<VPInstruction>(VPI), Range);
85468458
85478459 RecipeBuilder.setRecipe (Instr, Recipe);
85488460 if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {
@@ -8553,8 +8465,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85538465 Builder.insert (Recipe);
85548466 }
85558467 if (Recipe->getNumDefinedValues () == 1 ) {
8556- SingleDef ->replaceAllUsesWith (Recipe->getVPSingleValue ());
8557- Old2New[SingleDef ] = Recipe->getVPSingleValue ();
8468+ VPI ->replaceAllUsesWith (Recipe->getVPSingleValue ());
8469+ Old2New[VPI ] = Recipe->getVPSingleValue ();
85588470 } else {
85598471 assert (Recipe->getNumDefinedValues () == 0 &&
85608472 " Unexpected multidef recipe" );
0 commit comments