aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp243
1 files changed, 173 insertions, 70 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cca484e13bf1..cbf111b00e3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
#include "VPlanTransforms.h"
+#include "VPlanCFG.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Intrinsics.h"
using namespace llvm;
@@ -22,10 +25,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
Loop *OrigLoop, VPlanPtr &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
- SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) {
+ SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE,
+ const TargetLibraryInfo &TLI) {
- ReversePostOrderTraversal<VPBlockRecursiveTraversalWrapper<VPBlockBase *>>
- RPOT(Plan->getEntry());
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
VPRecipeBase *Term = VPBB->getTerminator();
auto EndIter = Term ? Term->getIterator() : VPBB->end();
@@ -74,7 +78,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
NewRecipe =
- new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()));
+ new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()),
+ getVectorIntrinsicIDForCall(CI, &TLI));
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
bool InvariantCond =
SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
@@ -102,40 +107,46 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
}
bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
- auto Iter = depth_first(
- VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()));
+ auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
- // First, collect the operands of all predicated replicate recipes as seeds
- // for sinking.
- SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList;
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
+ // First, collect the operands of all recipes in replicate blocks as seeds for
+ // sinking.
+ SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList;
+ for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
+ VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
+ if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
+ continue;
+ VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
+ if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
+ continue;
for (auto &Recipe : *VPBB) {
- auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
- if (!RepR || !RepR->isPredicated())
- continue;
- for (VPValue *Op : RepR->operands())
- WorkList.insert(std::make_pair(RepR->getParent(), Op));
+ for (VPValue *Op : Recipe.operands())
+ if (auto *Def = Op->getDefiningRecipe())
+ WorkList.insert(std::make_pair(VPBB, Def));
}
}
- // Try to sink each replicate recipe in the worklist.
- while (!WorkList.empty()) {
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
+ // Try to sink each replicate or scalar IV steps recipe in the worklist.
+ for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
- VPValue *C;
- std::tie(SinkTo, C) = WorkList.pop_back_val();
- auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def);
- if (!SinkCandidate || SinkCandidate->isUniform() ||
- SinkCandidate->getParent() == SinkTo ||
+ VPRecipeBase *SinkCandidate;
+ std::tie(SinkTo, SinkCandidate) = WorkList[I];
+ if (SinkCandidate->getParent() == SinkTo ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadOrWriteMemory())
continue;
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
+ if (!ScalarVFOnly && RepR->isUniform())
+ continue;
+ } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
+ continue;
bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
// or all users outside of SinkTo must be uniform-after-vectorization (
// i.e., only first lane is used) . In the latter case, we need to duplicate
- // SinkCandidate. At the moment, we identify such UAV's by looking for the
- // address operands of widened memory recipes.
+ // SinkCandidate.
auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
SinkCandidate](VPUser *U) {
auto *UI = dyn_cast<VPRecipeBase>(U);
@@ -143,31 +154,31 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
return false;
if (UI->getParent() == SinkTo)
return true;
- auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI);
- if (WidenI && WidenI->getAddr() == SinkCandidate) {
- NeedsDuplicating = true;
- return true;
- }
- return false;
+ NeedsDuplicating =
+ UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue());
+ // We only know how to duplicate VPRecipeRecipes for now.
+ return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
};
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser))
continue;
if (NeedsDuplicating) {
- Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue());
+ if (ScalarVFOnly)
+ continue;
+ Instruction *I = cast<Instruction>(
+ cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
auto *Clone =
new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
- SmallVector<VPUser *, 4> Users(SinkCandidate->users());
- for (auto *U : Users) {
+ for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) {
auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
continue;
for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
- if (UI->getOperand(Idx) != SinkCandidate)
+ if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue())
continue;
UI->setOperand(Idx, Clone);
}
@@ -175,7 +186,8 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
}
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
for (VPValue *Op : SinkCandidate->operands())
- WorkList.insert(std::make_pair(SinkTo, Op));
+ if (auto *Def = Op->getDefiningRecipe())
+ WorkList.insert(std::make_pair(SinkTo, Def));
Changed = true;
}
return Changed;
@@ -212,21 +224,16 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
return nullptr;
}
-bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
+bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
SetVector<VPRegionBlock *> DeletedRegions;
- bool Changed = false;
-
- // Collect region blocks to process up-front, to avoid iterator invalidation
- // issues while merging regions.
- SmallVector<VPRegionBlock *, 8> CandidateRegions(
- VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
- VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()))));
- // Check if Base is a predicated triangle, followed by an empty block,
- // followed by another predicate triangle. If that's the case, move the
- // recipes from the first to the second triangle.
- for (VPRegionBlock *Region1 : CandidateRegions) {
- if (DeletedRegions.contains(Region1))
+ // Collect replicate regions followed by an empty block, followed by another
+ // replicate region with matching masks to process front. This is to avoid
+ // iterator invalidation issues while merging regions.
+ SmallVector<VPRegionBlock *, 8> WorkList;
+ for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ if (!Region1->isReplicator())
continue;
auto *MiddleBasicBlock =
dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
@@ -235,20 +242,30 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
auto *Region2 =
dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
- if (!Region2)
+ if (!Region2 || !Region2->isReplicator())
continue;
VPValue *Mask1 = getPredicatedMask(Region1);
VPValue *Mask2 = getPredicatedMask(Region2);
if (!Mask1 || Mask1 != Mask2)
continue;
+
+ assert(Mask1 && Mask2 && "both region must have conditions");
+ WorkList.push_back(Region1);
+ }
+
+ // Move recipes from Region1 to its successor region, if both are triangles.
+ for (VPRegionBlock *Region1 : WorkList) {
+ if (DeletedRegions.contains(Region1))
+ continue;
+ auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
+ auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
+
VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
if (!Then1 || !Then2)
continue;
- assert(Mask1 && Mask2 && "both region must have conditions");
-
// Note: No fusion-preventing memory dependencies are expected in either
// region. Such dependencies should be rejected during earlier dependence
// checks, which guarantee accesses can be re-ordered for vectorization.
@@ -267,8 +284,7 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
- SmallVector<VPUser *> Users(Phi1ToMoveV->users());
- for (VPUser *U : Users) {
+ for (VPUser *U : to_vector(Phi1ToMoveV->users())) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
continue;
@@ -293,7 +309,34 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
for (VPRegionBlock *ToDelete : DeletedRegions)
delete ToDelete;
- return Changed;
+ return !DeletedRegions.empty();
+}
+
+bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
+ SmallVector<VPBasicBlock *> WorkList;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ auto *PredVPBB =
+ dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
+ if (PredVPBB && PredVPBB->getNumSuccessors() == 1)
+ WorkList.push_back(VPBB);
+ }
+
+ for (VPBasicBlock *VPBB : WorkList) {
+ VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ R.moveBefore(*PredVPBB, PredVPBB->end());
+ VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
+ auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
+ if (ParentRegion && ParentRegion->getExiting() == VPBB)
+ ParentRegion->setExiting(PredVPBB);
+ for (auto *Succ : to_vector(VPBB->successors())) {
+ VPBlockUtils::disconnectBlocks(VPBB, Succ);
+ VPBlockUtils::connectBlocks(PredVPBB, Succ);
+ }
+ delete VPBB;
+ }
+ return !WorkList.empty();
}
void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
@@ -362,8 +405,8 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
}
void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
- ReversePostOrderTraversal<VPBlockRecursiveTraversalWrapper<VPBlockBase *>>
- RPOT(Plan.getEntry());
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
// The recipes in the block are processed in reverse order, to catch chains
@@ -383,30 +426,40 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
- auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
- if (!IV)
+ auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+ if (!WideIV)
continue;
- if (HasOnlyVectorVFs &&
- none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); }))
+ if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
+ return U->usesScalars(WideIV);
+ }))
continue;
- const InductionDescriptor &ID = IV->getInductionDescriptor();
+ auto IP = HeaderVPBB->getFirstNonPhi();
+ VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ Type *ResultTy = WideIV->getPHINode()->getType();
+ if (Instruction *TruncI = WideIV->getTruncInst())
+ ResultTy = TruncI->getType();
+ const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE);
- Instruction *TruncI = IV->getTruncInst();
- VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
- IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(),
- IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr);
- HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi());
+ VPValue *BaseIV = CanonicalIV;
+ if (!CanonicalIV->isCanonical(ID, ResultTy)) {
+ BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
+ Step, ResultTy);
+ HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ }
+
+ VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
+ HeaderVPBB->insert(Steps, IP);
// Update scalar users of IV to use Step instead. Use SetVector to ensure
// the list of users doesn't contain duplicates.
- SetVector<VPUser *> Users(IV->user_begin(), IV->user_end());
+ SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end());
for (VPUser *U : Users) {
- if (HasOnlyVectorVFs && !U->usesScalars(IV))
+ if (HasOnlyVectorVFs && !U->usesScalars(WideIV))
continue;
for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
- if (U->getOperand(I) != IV)
+ if (U->getOperand(I) != WideIV)
continue;
U->setOperand(I, Steps);
}
@@ -430,3 +483,53 @@ void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) {
ExpR->eraseFromParent();
}
}
+
+static bool canSimplifyBranchOnCond(VPInstruction *Term) {
+ VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
+ if (!Not || Not->getOpcode() != VPInstruction::Not)
+ return false;
+
+ VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
+ return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
+}
+
+void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
+ unsigned BestUF,
+ PredicatedScalarEvolution &PSE) {
+ assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+ assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+ VPBasicBlock *ExitingVPBB =
+ Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
+ // Try to simplify the branch condition if TC <= VF * UF when preparing to
+ // execute the plan for the main vector loop. We only do this if the
+ // terminator is:
+ // 1. BranchOnCount, or
+ // 2. BranchOnCond where the input is Not(ActiveLaneMask).
+ if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount &&
+ (Term->getOpcode() != VPInstruction::BranchOnCond ||
+ !canSimplifyBranchOnCond(Term))))
+ return;
+
+ Type *IdxTy =
+ Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
+ const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
+ ScalarEvolution &SE = *PSE.getSE();
+ const SCEV *C =
+ SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF);
+ if (TripCount->isZero() ||
+ !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
+ return;
+
+ LLVMContext &Ctx = SE.getContext();
+ auto *BOC =
+ new VPInstruction(VPInstruction::BranchOnCond,
+ {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
+ Term->eraseFromParent();
+ ExitingVPBB->appendRecipe(BOC);
+ Plan.setVF(BestVF);
+ Plan.setUF(BestUF);
+ // TODO: Further simplifications are possible
+ // 1. Replace inductions with constants.
+ // 2. Replace vector loop region with VPBasicBlock.
+}