aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp327
1 files changed, 286 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cbf111b00e3d..83bfdfd09d19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "VPlanTransforms.h"
+#include "VPlanDominatorTree.h"
+#include "VPRecipeBuilder.h"
#include "VPlanCFG.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -22,11 +24,10 @@
using namespace llvm;
void VPlanTransforms::VPInstructionsToVPRecipes(
- Loop *OrigLoop, VPlanPtr &Plan,
+ VPlanPtr &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
- SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE,
- const TargetLibraryInfo &TLI) {
+ ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan->getEntry());
@@ -39,22 +40,15 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPValue *VPV = Ingredient.getVPSingleValue();
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
- if (DeadInstructions.count(Inst)) {
- VPValue DummyValue;
- VPV->replaceAllUsesWith(&DummyValue);
- Ingredient.eraseFromParent();
- continue;
- }
VPRecipeBase *NewRecipe = nullptr;
if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
- VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
+ VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue());
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
- NewRecipe =
- new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
+ NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -66,28 +60,25 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
+ *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
+ false /*Consecutive*/, false /*Reverse*/);
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/);
+ *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
+ nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
- NewRecipe = new VPWidenGEPRecipe(
- GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
+ NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
NewRecipe =
- new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()),
+ new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()),
getVectorIntrinsicIDForCall(CI, &TLI));
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
- bool InvariantCond =
- SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
- NewRecipe = new VPWidenSelectRecipe(
- *SI, Plan->mapToVPValues(SI->operands()), InvariantCond);
+ NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
+ } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
+ NewRecipe = new VPWidenCastRecipe(
+ CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
} else {
- NewRecipe =
- new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
+ NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
}
}
@@ -98,15 +89,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
assert(NewRecipe->getNumDefinedValues() == 0 &&
"Only recpies with zero or one defined values expected");
Ingredient.eraseFromParent();
- Plan->removeVPValueFor(Inst);
- for (auto *Def : NewRecipe->definedValues()) {
- Plan->addVPValue(Inst, Def);
- }
}
}
}
-bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
+static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
@@ -167,8 +154,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
continue;
Instruction *I = cast<Instruction>(
cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
- auto *Clone =
- new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
+ auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
@@ -224,7 +210,10 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
return nullptr;
}
-bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
+// Merge replicate regions in their successor region, if a replicate region
+// is connected to a successor replicate region with the same predicate by a
+// single, empty VPBasicBlock.
+static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
SetVector<VPRegionBlock *> DeletedRegions;
// Collect replicate regions followed by an empty block, followed by another
@@ -312,6 +301,81 @@ bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
return !DeletedRegions.empty();
}
+static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
+ VPlan &Plan) {
+ Instruction *Instr = PredRecipe->getUnderlyingInstr();
+ // Build the triangular if-then region.
+ std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
+ assert(Instr->getParent() && "Predicated instruction not in any basic block");
+ auto *BlockInMask = PredRecipe->getMask();
+ auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
+ auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
+
+ // Replace predicated replicate recipe with a replicate recipe without a
+ // mask but in the replicate region.
+ auto *RecipeWithoutMask = new VPReplicateRecipe(
+ PredRecipe->getUnderlyingInstr(),
+ make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
+ PredRecipe->isUniform());
+ auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
+
+ VPPredInstPHIRecipe *PHIRecipe = nullptr;
+ if (PredRecipe->getNumUsers() != 0) {
+ PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
+ PredRecipe->replaceAllUsesWith(PHIRecipe);
+ PHIRecipe->setOperand(0, RecipeWithoutMask);
+ }
+ PredRecipe->eraseFromParent();
+ auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
+ VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
+
+ // Note: first set Entry as region entry and then connect successors starting
+ // from it in order, to propagate the "parent" of each VPBasicBlock.
+ VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
+ VPBlockUtils::connectBlocks(Pred, Exiting);
+
+ return Region;
+}
+
+static void addReplicateRegions(VPlan &Plan) {
+ SmallVector<VPReplicateRecipe *> WorkList;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ if (RepR->isPredicated())
+ WorkList.push_back(RepR);
+ }
+ }
+
+ unsigned BBNum = 0;
+ for (VPReplicateRecipe *RepR : WorkList) {
+ VPBasicBlock *CurrentBlock = RepR->getParent();
+ VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
+
+ BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
+ SplitBlock->setName(
+ OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
+ // Record predicated instructions for above packing optimizations.
+ VPBlockBase *Region = createReplicateRegion(RepR, Plan);
+ Region->setParent(CurrentBlock->getParent());
+ VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
+ VPBlockUtils::connectBlocks(CurrentBlock, Region);
+ VPBlockUtils::connectBlocks(Region, SplitBlock);
+ }
+}
+
+void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
+ // Convert masked VPReplicateRecipes to if-then region blocks.
+ addReplicateRegions(Plan);
+
+ bool ShouldSimplify = true;
+ while (ShouldSimplify) {
+ ShouldSimplify = sinkScalarOperands(Plan);
+ ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
+ ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan);
+ }
+}
bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -395,7 +459,10 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
// everything WidenNewIV's users need. That is, WidenOriginalIV will
// generate a vector phi or all users of WidenNewIV demand the first lane
// only.
- if (WidenOriginalIV->needsVectorIV() ||
+ if (any_of(WidenOriginalIV->users(),
+ [WidenOriginalIV](VPUser *U) {
+ return !U->usesScalars(WidenOriginalIV);
+ }) ||
vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
@@ -440,10 +507,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
if (Instruction *TruncI = WideIV->getTruncInst())
ResultTy = TruncI->getType();
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE);
+ VPValue *Step = WideIV->getStepValue();
VPValue *BaseIV = CanonicalIV;
- if (!CanonicalIV->isCanonical(ID, ResultTy)) {
+ if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step,
+ ResultTy)) {
BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
Step, ResultTy);
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
@@ -522,9 +589,9 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
return;
LLVMContext &Ctx = SE.getContext();
- auto *BOC =
- new VPInstruction(VPInstruction::BranchOnCond,
- {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
+ auto *BOC = new VPInstruction(
+ VPInstruction::BranchOnCond,
+ {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))});
Term->eraseFromParent();
ExitingVPBB->appendRecipe(BOC);
Plan.setVF(BestVF);
@@ -533,3 +600,181 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
// 1. Replace inductions with constants.
// 2. Replace vector loop region with VPBasicBlock.
}
+
+#ifndef NDEBUG
+static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
+ auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
+ if (Region && Region->isReplicator()) {
+ assert(Region->getNumSuccessors() == 1 &&
+ Region->getNumPredecessors() == 1 && "Expected SESE region!");
+ assert(R->getParent()->size() == 1 &&
+ "A recipe in an original replicator region must be the only "
+ "recipe in its block");
+ return Region;
+ }
+ return nullptr;
+}
+#endif
+
+static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
+ VPDominatorTree &VPDT) {
+ if (A == B)
+ return false;
+
+ auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
+ for (auto &R : *A->getParent()) {
+ if (&R == A)
+ return true;
+ if (&R == B)
+ return false;
+ }
+ llvm_unreachable("recipe not found");
+ };
+ const VPBlockBase *ParentA = A->getParent();
+ const VPBlockBase *ParentB = B->getParent();
+ if (ParentA == ParentB)
+ return LocalComesBefore(A, B);
+
+ assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
+ "No replicate regions expected at this point");
+ assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
+ "No replicate regions expected at this point");
+ return VPDT.properlyDominates(ParentA, ParentB);
+}
+
+/// Sink users of \p FOR after the recipe defining the previous value \p
+/// Previous of the recurrence. \returns true if all users of \p FOR could be
+/// re-arranged as needed or false if it is not possible.
+static bool
+sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
+ VPRecipeBase *Previous,
+ VPDominatorTree &VPDT) {
+ // Collect recipes that need sinking.
+ SmallVector<VPRecipeBase *> WorkList;
+ SmallPtrSet<VPRecipeBase *, 8> Seen;
+ Seen.insert(Previous);
+ auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
+ // The previous value must not depend on the users of the recurrence phi. In
+ // that case, FOR is not a fixed order recurrence.
+ if (SinkCandidate == Previous)
+ return false;
+
+ if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
+ !Seen.insert(SinkCandidate).second ||
+ properlyDominates(Previous, SinkCandidate, VPDT))
+ return true;
+
+ if (SinkCandidate->mayHaveSideEffects())
+ return false;
+
+ WorkList.push_back(SinkCandidate);
+ return true;
+ };
+
+ // Recursively sink users of FOR after Previous.
+ WorkList.push_back(FOR);
+ for (unsigned I = 0; I != WorkList.size(); ++I) {
+ VPRecipeBase *Current = WorkList[I];
+ assert(Current->getNumDefinedValues() == 1 &&
+ "only recipes with a single defined value expected");
+
+ for (VPUser *User : Current->getVPSingleValue()->users()) {
+ if (auto *R = dyn_cast<VPRecipeBase>(User))
+ if (!TryToPushSinkCandidate(R))
+ return false;
+ }
+ }
+
+ // Keep recipes to sink ordered by dominance so earlier instructions are
+ // processed first.
+ sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
+ return properlyDominates(A, B, VPDT);
+ });
+
+ for (VPRecipeBase *SinkCandidate : WorkList) {
+ if (SinkCandidate == FOR)
+ continue;
+
+ SinkCandidate->moveAfter(Previous);
+ Previous = SinkCandidate;
+ }
+ return true;
+}
+
+bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+ VPBuilder &Builder) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+
+ SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
+ for (VPRecipeBase &R :
+ Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
+ RecurrencePhis.push_back(FOR);
+
+ for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
+ SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
+ VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
+ // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
+ // to terminate.
+ while (auto *PrevPhi =
+ dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
+ assert(PrevPhi->getParent() == FOR->getParent());
+ assert(SeenPhis.insert(PrevPhi).second);
+ Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
+ }
+
+ if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
+ return false;
+
+ // Introduce a recipe to combine the incoming and previous values of a
+ // fixed-order recurrence.
+ VPBasicBlock *InsertBlock = Previous->getParent();
+ if (isa<VPHeaderPHIRecipe>(Previous))
+ Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
+ else
+ Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator()));
+
+ auto *RecurSplice = cast<VPInstruction>(
+ Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
+ {FOR, FOR->getBackedgeValue()}));
+
+ FOR->replaceAllUsesWith(RecurSplice);
+ // Set the first operand of RecurSplice to FOR again, after replacing
+ // all users.
+ RecurSplice->setOperand(0, FOR);
+ }
+ return true;
+}
+
+void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
+ for (VPRecipeBase &R :
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR)
+ continue;
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ if (RK != RecurKind::Add && RK != RecurKind::Mul)
+ continue;
+
+ SmallSetVector<VPValue *, 8> Worklist;
+ Worklist.insert(PhiR);
+
+ for (unsigned I = 0; I != Worklist.size(); ++I) {
+ VPValue *Cur = Worklist[I];
+ if (auto *RecWithFlags =
+ dyn_cast<VPRecipeWithIRFlags>(Cur->getDefiningRecipe())) {
+ RecWithFlags->dropPoisonGeneratingFlags();
+ }
+
+ for (VPUser *U : Cur->users()) {
+ auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+ if (!UserRecipe)
+ continue;
+ for (VPValue *V : UserRecipe->definedValues())
+ Worklist.insert(V);
+ }
+ }
+ }
+}