diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 327 |
1 files changed, 286 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cbf111b00e3d..83bfdfd09d19 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "VPlanTransforms.h" +#include "VPlanDominatorTree.h" +#include "VPRecipeBuilder.h" #include "VPlanCFG.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -22,11 +24,10 @@ using namespace llvm; void VPlanTransforms::VPInstructionsToVPRecipes( - Loop *OrigLoop, VPlanPtr &Plan, + VPlanPtr &Plan, function_ref<const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, - SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE, - const TargetLibraryInfo &TLI) { + ScalarEvolution &SE, const TargetLibraryInfo &TLI) { ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( Plan->getEntry()); @@ -39,22 +40,15 @@ void VPlanTransforms::VPInstructionsToVPRecipes( VPValue *VPV = Ingredient.getVPSingleValue(); Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); - if (DeadInstructions.count(Inst)) { - VPValue DummyValue; - VPV->replaceAllUsesWith(&DummyValue); - Ingredient.eraseFromParent(); - continue; - } VPRecipeBase *NewRecipe = nullptr; if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { - VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); + VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue()); VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); - NewRecipe = - new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true); + NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -66,28 +60,25 @@ void VPlanTransforms::VPInstructionsToVPRecipes( // Create VPWidenMemoryInstructionRecipe for loads and stores. if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( - *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), - nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); + *Load, Ingredient.getOperand(0), nullptr /*Mask*/, + false /*Consecutive*/, false /*Reverse*/); } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( - *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), - Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, - false /*Consecutive*/, false /*Reverse*/); + *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), + nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - NewRecipe = new VPWidenGEPRecipe( - GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); + NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { NewRecipe = - new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()), + new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()), getVectorIntrinsicIDForCall(CI, &TLI)); } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { - bool InvariantCond = - SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); - NewRecipe = new VPWidenSelectRecipe( - *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); + NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); + } else if (auto *CI = dyn_cast<CastInst>(Inst)) { + NewRecipe = new VPWidenCastRecipe( + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI); } else { - NewRecipe = - new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); + NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); } } @@ -98,15 +89,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes( assert(NewRecipe->getNumDefinedValues() == 0 && "Only recpies with zero or one defined values expected"); Ingredient.eraseFromParent(); - Plan->removeVPValueFor(Inst); - for (auto *Def : NewRecipe->definedValues()) { - Plan->addVPValue(Inst, Def); - } } } } -bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { +static bool sinkScalarOperands(VPlan &Plan) { auto Iter = vp_depth_first_deep(Plan.getEntry()); bool Changed = false; // First, collect the operands of all recipes in replicate blocks as seeds for @@ -167,8 +154,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { continue; Instruction *I = cast<Instruction>( cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue()); - auto *Clone = - new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); + auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true); // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); @@ -224,7 +210,10 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { return nullptr; } -bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { +// Merge replicate regions in their successor region, if a replicate region +// is connected to a successor replicate region with the same predicate by a +// single, empty VPBasicBlock. +static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { SetVector<VPRegionBlock *> DeletedRegions; // Collect replicate regions followed by an empty block, followed by another @@ -312,6 +301,81 @@ bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { return !DeletedRegions.empty(); } +static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, + VPlan &Plan) { + Instruction *Instr = PredRecipe->getUnderlyingInstr(); + // Build the triangular if-then region. + std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); + assert(Instr->getParent() && "Predicated instruction not in any basic block"); + auto *BlockInMask = PredRecipe->getMask(); + auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); + auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); + + // Replace predicated replicate recipe with a replicate recipe without a + // mask but in the replicate region. + auto *RecipeWithoutMask = new VPReplicateRecipe( + PredRecipe->getUnderlyingInstr(), + make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), + PredRecipe->isUniform()); + auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); + + VPPredInstPHIRecipe *PHIRecipe = nullptr; + if (PredRecipe->getNumUsers() != 0) { + PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask); + PredRecipe->replaceAllUsesWith(PHIRecipe); + PHIRecipe->setOperand(0, RecipeWithoutMask); + } + PredRecipe->eraseFromParent(); + auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); + VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); + + // Note: first set Entry as region entry and then connect successors starting + // from it in order, to propagate the "parent" of each VPBasicBlock. + VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); + VPBlockUtils::connectBlocks(Pred, Exiting); + + return Region; +} + +static void addReplicateRegions(VPlan &Plan) { + SmallVector<VPReplicateRecipe *> WorkList; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : *VPBB) + if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { + if (RepR->isPredicated()) + WorkList.push_back(RepR); + } + } + + unsigned BBNum = 0; + for (VPReplicateRecipe *RepR : WorkList) { + VPBasicBlock *CurrentBlock = RepR->getParent(); + VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); + + BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); + SplitBlock->setName( + OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); + // Record predicated instructions for above packing optimizations. + VPBlockBase *Region = createReplicateRegion(RepR, Plan); + Region->setParent(CurrentBlock->getParent()); + VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); + VPBlockUtils::connectBlocks(CurrentBlock, Region); + VPBlockUtils::connectBlocks(Region, SplitBlock); + } +} + +void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) { + // Convert masked VPReplicateRecipes to if-then region blocks. + addReplicateRegions(Plan); + + bool ShouldSimplify = true; + while (ShouldSimplify) { + ShouldSimplify = sinkScalarOperands(Plan); + ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan); + ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan); + } +} bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { SmallVector<VPBasicBlock *> WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( @@ -395,7 +459,10 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { // everything WidenNewIV's users need. That is, WidenOriginalIV will // generate a vector phi or all users of WidenNewIV demand the first lane // only. - if (WidenOriginalIV->needsVectorIV() || + if (any_of(WidenOriginalIV->users(), + [WidenOriginalIV](VPUser *U) { + return !U->usesScalars(WidenOriginalIV); + }) || vputils::onlyFirstLaneUsed(WidenNewIV)) { WidenNewIV->replaceAllUsesWith(WidenOriginalIV); WidenNewIV->eraseFromParent(); @@ -440,10 +507,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { if (Instruction *TruncI = WideIV->getTruncInst()) ResultTy = TruncI->getType(); const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); + VPValue *Step = WideIV->getStepValue(); VPValue *BaseIV = CanonicalIV; - if (!CanonicalIV->isCanonical(ID, ResultTy)) { + if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step, + ResultTy)) { BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV, Step, ResultTy); HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); @@ -522,9 +589,9 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, return; LLVMContext &Ctx = SE.getContext(); - auto *BOC = - new VPInstruction(VPInstruction::BranchOnCond, - {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))}); + auto *BOC = new VPInstruction( + VPInstruction::BranchOnCond, + {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))}); Term->eraseFromParent(); ExitingVPBB->appendRecipe(BOC); Plan.setVF(BestVF); @@ -533,3 +600,181 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, // 1. Replace inductions with constants. // 2. Replace vector loop region with VPBasicBlock. } + +#ifndef NDEBUG +static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) { + auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); + if (Region && Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && + Region->getNumPredecessors() == 1 && "Expected SESE region!"); + assert(R->getParent()->size() == 1 && + "A recipe in an original replicator region must be the only " + "recipe in its block"); + return Region; + } + return nullptr; +} +#endif + +static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B, + VPDominatorTree &VPDT) { + if (A == B) + return false; + + auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { + for (auto &R : *A->getParent()) { + if (&R == A) + return true; + if (&R == B) + return false; + } + llvm_unreachable("recipe not found"); + }; + const VPBlockBase *ParentA = A->getParent(); + const VPBlockBase *ParentB = B->getParent(); + if (ParentA == ParentB) + return LocalComesBefore(A, B); + + assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && + "No replicate regions expected at this point"); + assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && + "No replicate regions expected at this point"); + return VPDT.properlyDominates(ParentA, ParentB); +} + +/// Sink users of \p FOR after the recipe defining the previous value \p +/// Previous of the recurrence. \returns true if all users of \p FOR could be +/// re-arranged as needed or false if it is not possible. +static bool +sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, + VPRecipeBase *Previous, + VPDominatorTree &VPDT) { + // Collect recipes that need sinking. + SmallVector<VPRecipeBase *> WorkList; + SmallPtrSet<VPRecipeBase *, 8> Seen; + Seen.insert(Previous); + auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { + // The previous value must not depend on the users of the recurrence phi. In + // that case, FOR is not a fixed order recurrence. + if (SinkCandidate == Previous) + return false; + + if (isa<VPHeaderPHIRecipe>(SinkCandidate) || + !Seen.insert(SinkCandidate).second || + properlyDominates(Previous, SinkCandidate, VPDT)) + return true; + + if (SinkCandidate->mayHaveSideEffects()) + return false; + + WorkList.push_back(SinkCandidate); + return true; + }; + + // Recursively sink users of FOR after Previous. + WorkList.push_back(FOR); + for (unsigned I = 0; I != WorkList.size(); ++I) { + VPRecipeBase *Current = WorkList[I]; + assert(Current->getNumDefinedValues() == 1 && + "only recipes with a single defined value expected"); + + for (VPUser *User : Current->getVPSingleValue()->users()) { + if (auto *R = dyn_cast<VPRecipeBase>(User)) + if (!TryToPushSinkCandidate(R)) + return false; + } + } + + // Keep recipes to sink ordered by dominance so earlier instructions are + // processed first. + sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) { + return properlyDominates(A, B, VPDT); + }); + + for (VPRecipeBase *SinkCandidate : WorkList) { + if (SinkCandidate == FOR) + continue; + + SinkCandidate->moveAfter(Previous); + Previous = SinkCandidate; + } + return true; +} + +bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, + VPBuilder &Builder) { + VPDominatorTree VPDT; + VPDT.recalculate(Plan); + + SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; + for (VPRecipeBase &R : + Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis()) + if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) + RecurrencePhis.push_back(FOR); + + for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) { + SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis; + VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe(); + // Fixed-order recurrences do not contain cycles, so this loop is guaranteed + // to terminate. + while (auto *PrevPhi = + dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) { + assert(PrevPhi->getParent() == FOR->getParent()); + assert(SeenPhis.insert(PrevPhi).second); + Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); + } + + if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) + return false; + + // Introduce a recipe to combine the incoming and previous values of a + // fixed-order recurrence. + VPBasicBlock *InsertBlock = Previous->getParent(); + if (isa<VPHeaderPHIRecipe>(Previous)) + Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); + else + Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator())); + + auto *RecurSplice = cast<VPInstruction>( + Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, + {FOR, FOR->getBackedgeValue()})); + + FOR->replaceAllUsesWith(RecurSplice); + // Set the first operand of RecurSplice to FOR again, after replacing + // all users. + RecurSplice->setOperand(0, FOR); + } + return true; +} + +void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { + for (VPRecipeBase &R : + Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); + if (!PhiR) + continue; + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + RecurKind RK = RdxDesc.getRecurrenceKind(); + if (RK != RecurKind::Add && RK != RecurKind::Mul) + continue; + + SmallSetVector<VPValue *, 8> Worklist; + Worklist.insert(PhiR); + + for (unsigned I = 0; I != Worklist.size(); ++I) { + VPValue *Cur = Worklist[I]; + if (auto *RecWithFlags = + dyn_cast<VPRecipeWithIRFlags>(Cur->getDefiningRecipe())) { + RecWithFlags->dropPoisonGeneratingFlags(); + } + + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast<VPRecipeBase>(U); + if (!UserRecipe) + continue; + for (VPValue *V : UserRecipe->definedValues()) + Worklist.insert(V); + } + } + } +} |