aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/Vectorize/VPlan.cpp
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp1161
1 files changed, 294 insertions, 867 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 342d4a074e10..4d709097c306 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -23,11 +23,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -35,13 +34,13 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
-#include <iterator>
#include <string>
#include <vector>
@@ -60,7 +59,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
}
#endif
-Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
+Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
const ElementCount &VF) const {
switch (LaneKind) {
case VPLane::Kind::ScalableLast:
@@ -158,25 +157,25 @@ void VPBlockBase::setPlan(VPlan *ParentPlan) {
}
/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
-const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
+const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
const VPBlockBase *Block = this;
while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
- Block = Region->getExit();
+ Block = Region->getExiting();
return cast<VPBasicBlock>(Block);
}
-VPBasicBlock *VPBlockBase::getExitBasicBlock() {
+VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
VPBlockBase *Block = this;
while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
- Block = Region->getExit();
+ Block = Region->getExiting();
return cast<VPBasicBlock>(Block);
}
VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
if (!Successors.empty() || !Parent)
return this;
- assert(Parent->getExit() == this &&
- "Block w/o successors not the exit of its parent.");
+ assert(Parent->getExiting() == this &&
+ "Block w/o successors not the exiting block of its parent.");
return Parent->getEnclosingBlockWithSuccessors();
}
@@ -188,28 +187,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-VPValue *VPBlockBase::getCondBit() {
- return CondBitUser.getSingleOperandOrNull();
-}
-
-const VPValue *VPBlockBase::getCondBit() const {
- return CondBitUser.getSingleOperandOrNull();
-}
-
-void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); }
-
-VPValue *VPBlockBase::getPredicate() {
- return PredicateUser.getSingleOperandOrNull();
-}
-
-const VPValue *VPBlockBase::getPredicate() const {
- return PredicateUser.getSingleOperandOrNull();
-}
-
-void VPBlockBase::setPredicate(VPValue *CV) {
- PredicateUser.resetSingleOpUser(CV);
-}
-
void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
@@ -245,6 +222,52 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
// set(Def, Extract, Instance);
return Extract;
}
+BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
+ VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
+ return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+}
+
+void VPTransformState::addNewMetadata(Instruction *To,
+ const Instruction *Orig) {
+ // If the loop was versioned with memchecks, add the corresponding no-alias
+ // metadata.
+ if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
+ LVer->annotateInstWithNoAlias(To, Orig);
+}
+
+void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
+ propagateMetadata(To, From);
+ addNewMetadata(To, From);
+}
+
+void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
+ for (Value *V : To) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ addMetadata(I, From);
+ }
+}
+
+void VPTransformState::setDebugLocFromInst(const Value *V) {
+ if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) {
+ const DILocation *DIL = Inst->getDebugLoc();
+
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
+ // FIXME: For scalable vectors, assume vscale=1.
+ auto NewDIL =
+ DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
+ if (NewDIL)
+ Builder.SetCurrentDebugLocation(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ } else
+ Builder.SetCurrentDebugLocation(DIL);
+ } else
+ Builder.SetCurrentDebugLocation(DebugLoc());
+}
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
@@ -252,43 +275,36 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
// Pred stands for Predessor. Prev stands for Previous - last visited/created.
BasicBlock *PrevBB = CFG.PrevBB;
BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
- PrevBB->getParent(), CFG.LastBB);
+ PrevBB->getParent(), CFG.ExitBB);
LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
// Hook up the new basic block to its predecessors.
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
- VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
- auto &PredVPSuccessors = PredVPBB->getSuccessors();
+ VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
+ auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
- // In outer loop vectorization scenario, the predecessor BBlock may not yet
- // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
- // vectorization. We do not encounter this case in inner loop vectorization
- // as we start out by building a loop skeleton with the vector loop header
- // and latch blocks. As a result, we never enter this function for the
- // header block in the non VPlan-native path.
- if (!PredBB) {
- assert(EnableVPlanNativePath &&
- "Unexpected null predecessor in non VPlan-native path");
- CFG.VPBBsToFix.push_back(PredVPBB);
- continue;
- }
-
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
+
+ auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
+ DebugLoc DL = PredBBTerminator->getDebugLoc();
PredBBTerminator->eraseFromParent();
- BranchInst::Create(NewBB, PredBB);
+ auto *Br = BranchInst::Create(NewBB, PredBB);
+ Br->setDebugLoc(DL);
+ } else if (TermBr && !TermBr->isConditional()) {
+ TermBr->setSuccessor(0, NewBB);
} else {
- assert(PredVPSuccessors.size() == 2 &&
- "Predecessor ending with branch must have two successors.");
+ // Set each forward successor here when it is created, excluding
+ // backedges. A backward successor is set when the branch is created.
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!PredBBTerminator->getSuccessor(idx) &&
+ assert(!TermBr->getSuccessor(idx) &&
"Trying to reset an existing successor block.");
- PredBBTerminator->setSuccessor(idx, NewBB);
+ TermBr->setSuccessor(idx, NewBB);
}
}
return NewBB;
@@ -300,27 +316,51 @@ void VPBasicBlock::execute(VPTransformState *State) {
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
- // 1. Create an IR basic block, or reuse the last one if possible.
- // The last IR basic block is reused, as an optimization, in three cases:
- // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
- // B. when the current VPBB has a single (hierarchical) predecessor which
- // is PrevVPBB and the latter has a single (hierarchical) successor; and
- // C. when the current VPBB is an entry of a region replica - where PrevVPBB
- // is the exit of this region from a previous instance, or the predecessor
- // of this region.
- if (PrevVPBB && /* A */
- !((SingleHPred = getSingleHierarchicalPredecessor()) &&
- SingleHPred->getExitBasicBlock() == PrevVPBB &&
- PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
- !(Replica && getPredecessors().empty())) { /* C */
+ auto IsLoopRegion = [](VPBlockBase *BB) {
+ auto *R = dyn_cast<VPRegionBlock>(BB);
+ return R && !R->isReplicator();
+ };
+
+ // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
+ if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
+ // ExitBB can be re-used for the exit block of the Plan.
+ NewBB = State->CFG.ExitBB;
+ State->CFG.PrevBB = NewBB;
+
+ // Update the branch instruction in the predecessor to branch to ExitBB.
+ VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
+ VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
+ assert(PredVPB->getSingleSuccessor() == this &&
+ "predecessor must have the current block as only successor");
+ BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
+ // The Exit block of a loop is always set to be successor 0 of the Exiting
+ // block.
+ cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
+ } else if (PrevVPBB && /* A */
+ !((SingleHPred = getSingleHierarchicalPredecessor()) &&
+ SingleHPred->getExitingBasicBlock() == PrevVPBB &&
+ PrevVPBB->getSingleHierarchicalSuccessor() &&
+ (SingleHPred->getParent() == getEnclosingLoopRegion() &&
+ !IsLoopRegion(SingleHPred))) && /* B */
+ !(Replica && getPredecessors().empty())) { /* C */
+ // The last IR basic block is reused, as an optimization, in three cases:
+ // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
+ // B. when the current VPBB has a single (hierarchical) predecessor which
+ // is PrevVPBB and the latter has a single (hierarchical) successor which
+ // both are in the same non-replicator region; and
+ // C. when the current VPBB is an entry of a region replica - where PrevVPBB
+ // is the exiting VPBB of this region from a previous instance, or the
+ // predecessor of this region.
+
NewBB = createEmptyBasicBlock(State->CFG);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
+ // Register NewBB in its loop. In innermost loops its the same for all
+ // BB's.
+ if (State->CurrentVectorLoop)
+ State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
- // Register NewBB in its loop. In innermost loops its the same for all BB's.
- Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
- L->addBasicBlockToLoop(NewBB, *State->LI);
State->CFG.PrevBB = NewBB;
}
@@ -334,29 +374,6 @@ void VPBasicBlock::execute(VPTransformState *State) {
for (VPRecipeBase &Recipe : Recipes)
Recipe.execute(*State);
- VPValue *CBV;
- if (EnableVPlanNativePath && (CBV = getCondBit())) {
- assert(CBV->getUnderlyingValue() &&
- "Unexpected null underlying value for condition bit");
-
- // Condition bit value in a VPBasicBlock is used as the branch selector. In
- // the VPlan-native path case, since all branches are uniform we generate a
- // branch instruction using the condition value from vector lane 0 and dummy
- // successors. The successors are fixed later when the successor blocks are
- // visited.
- Value *NewCond = State->get(CBV, {0, 0});
-
- // Replace the temporary unreachable terminator with the new conditional
- // branch.
- auto *CurrentTerminator = NewBB->getTerminator();
- assert(isa<UnreachableInst>(CurrentTerminator) &&
- "Expected to replace unreachable terminator with conditional "
- "branch.");
- auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
- CondBr->setSuccessor(0, nullptr);
- ReplaceInstWithInst(CurrentTerminator, CondBr);
- }
-
LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
}
@@ -395,6 +412,61 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
return SplitBlock;
}
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+ VPRegionBlock *P = getParent();
+ if (P && P->isReplicator()) {
+ P = P->getParent();
+ assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+ "unexpected nested replicate regions");
+ }
+ return P;
+}
+
+static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
+ if (VPBB->empty()) {
+ assert(
+ VPBB->getNumSuccessors() < 2 &&
+ "block with multiple successors doesn't have a recipe as terminator");
+ return false;
+ }
+
+ const VPRecipeBase *R = &VPBB->back();
+ auto *VPI = dyn_cast<VPInstruction>(R);
+ bool IsCondBranch =
+ isa<VPBranchOnMaskRecipe>(R) ||
+ (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
+ VPI->getOpcode() == VPInstruction::BranchOnCount));
+ (void)IsCondBranch;
+
+ if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
+ assert(IsCondBranch && "block with multiple successors not terminated by "
+ "conditional branch recipe");
+
+ return true;
+ }
+
+ assert(
+ !IsCondBranch &&
+ "block with 0 or 1 successors terminated by conditional branch recipe");
+ return false;
+}
+
+VPRecipeBase *VPBasicBlock::getTerminator() {
+ if (hasConditionalTerminator(this))
+ return &back();
+ return nullptr;
+}
+
+const VPRecipeBase *VPBasicBlock::getTerminator() const {
+ if (hasConditionalTerminator(this))
+ return &back();
+ return nullptr;
+}
+
+bool VPBasicBlock::isExiting() const {
+ return getParent()->getExitingBasicBlock() == this;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
if (getSuccessors().empty()) {
@@ -411,13 +483,6 @@ void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << getName() << ":\n";
- if (const VPValue *Pred = getPredicate()) {
- O << Indent << "BlockPredicate:";
- Pred->printAsOperand(O, SlotTracker);
- if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
- O << " (" << PredInst->getParent()->getName() << ")";
- O << '\n';
- }
auto RecipeIndent = Indent + " ";
for (const VPRecipeBase &Recipe : *this) {
@@ -426,14 +491,6 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
}
printSuccessors(O, Indent);
-
- if (const VPValue *CBV = getCondBit()) {
- O << Indent << "CondBit: ";
- CBV->printAsOperand(O, SlotTracker);
- if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
- O << " (" << CBI->getParent()->getName() << ")";
- O << '\n';
- }
}
#endif
@@ -448,25 +505,26 @@ void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
if (!isReplicator()) {
+ // Create and register the new vector loop.
+ Loop *PrevLoop = State->CurrentVectorLoop;
+ State->CurrentVectorLoop = State->LI->AllocateLoop();
+ BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
- if (EnableVPlanNativePath) {
- // The inner loop vectorization path does not represent loop preheader
- // and exit blocks as part of the VPlan. In the VPlan-native path, skip
- // vectorizing loop preheader block. In future, we may replace this
- // check with the check for loop preheader.
- if (Block->getNumPredecessors() == 0)
- continue;
-
- // Skip vectorizing loop exit block. In future, we may replace this
- // check with the check for loop exit.
- if (Block->getNumSuccessors() == 0)
- continue;
- }
-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
+
+ State->CurrentVectorLoop = PrevLoop;
return;
}
@@ -508,341 +566,32 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
-bool VPRecipeBase::mayWriteToMemory() const {
- switch (getVPDefID()) {
- case VPWidenMemoryInstructionSC: {
- return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
- case VPReplicateSC:
- case VPWidenCallSC:
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
- ->mayWriteToMemory();
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayWriteToMemory()) &&
- "underlying instruction may write to memory");
- return false;
- }
- default:
- return true;
- }
-}
-
-bool VPRecipeBase::mayReadFromMemory() const {
- switch (getVPDefID()) {
- case VPWidenMemoryInstructionSC: {
- return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
- case VPReplicateSC:
- case VPWidenCallSC:
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
- ->mayReadFromMemory();
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayReadFromMemory()) &&
- "underlying instruction may read from memory");
- return false;
- }
- default:
- return true;
- }
-}
-
-bool VPRecipeBase::mayHaveSideEffects() const {
- switch (getVPDefID()) {
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayHaveSideEffects()) &&
- "underlying instruction has side-effects");
- return false;
- }
- case VPReplicateSC: {
- auto *R = cast<VPReplicateRecipe>(this);
- return R->getUnderlyingInstr()->mayHaveSideEffects();
- }
- default:
- return true;
- }
-}
-
-void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
- assert(!Parent && "Recipe already in some VPBasicBlock");
- assert(InsertPos->getParent() &&
- "Insertion position not in any VPBasicBlock");
- Parent = InsertPos->getParent();
- Parent->getRecipeList().insert(InsertPos->getIterator(), this);
-}
-
-void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
- assert(!Parent && "Recipe already in some VPBasicBlock");
- assert(InsertPos->getParent() &&
- "Insertion position not in any VPBasicBlock");
- Parent = InsertPos->getParent();
- Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
-}
-
-void VPRecipeBase::removeFromParent() {
- assert(getParent() && "Recipe not in any VPBasicBlock");
- getParent()->getRecipeList().remove(getIterator());
- Parent = nullptr;
-}
-
-iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
- assert(getParent() && "Recipe not in any VPBasicBlock");
- return getParent()->getRecipeList().erase(getIterator());
-}
-
-void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
- removeFromParent();
- insertAfter(InsertPos);
-}
-
-void VPRecipeBase::moveBefore(VPBasicBlock &BB,
- iplist<VPRecipeBase>::iterator I) {
- assert(I == BB.end() || I->getParent() == &BB);
- removeFromParent();
- Parent = &BB;
- BB.getRecipeList().insert(I, this);
-}
-
-void VPInstruction::generateInstruction(VPTransformState &State,
- unsigned Part) {
- IRBuilder<> &Builder = State.Builder;
- Builder.SetCurrentDebugLocation(DL);
-
- if (Instruction::isBinaryOp(getOpcode())) {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
- Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
- State.set(this, V, Part);
- return;
- }
-
- switch (getOpcode()) {
- case VPInstruction::Not: {
- Value *A = State.get(getOperand(0), Part);
- Value *V = Builder.CreateNot(A);
- State.set(this, V, Part);
- break;
- }
- case VPInstruction::ICmpULE: {
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
- Value *V = Builder.CreateICmpULE(IV, TC);
- State.set(this, V, Part);
- break;
- }
- case Instruction::Select: {
- Value *Cond = State.get(getOperand(0), Part);
- Value *Op1 = State.get(getOperand(1), Part);
- Value *Op2 = State.get(getOperand(2), Part);
- Value *V = Builder.CreateSelect(Cond, Op1, Op2);
- State.set(this, V, Part);
- break;
- }
- case VPInstruction::ActiveLaneMask: {
- // Get first lane of vector induction variable.
- Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
- // Get the original loop tripcount.
- Value *ScalarTC = State.get(getOperand(1), Part);
-
- auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
- auto *PredTy = VectorType::get(Int1Ty, State.VF);
- Instruction *Call = Builder.CreateIntrinsic(
- Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
- {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
- State.set(this, Call, Part);
- break;
- }
- case VPInstruction::FirstOrderRecurrenceSplice: {
- // Generate code to combine the previous and current values in vector v3.
- //
- // vector.ph:
- // v_init = vector(..., ..., ..., a[-1])
- // br vector.body
- //
- // vector.body
- // i = phi [0, vector.ph], [i+4, vector.body]
- // v1 = phi [v_init, vector.ph], [v2, vector.body]
- // v2 = a[i, i+1, i+2, i+3];
- // v3 = vector(v1(3), v2(0, 1, 2))
-
- // For the first part, use the recurrence phi (v1), otherwise v2.
- auto *V1 = State.get(getOperand(0), 0);
- Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
- if (!PartMinus1->getType()->isVectorTy()) {
- State.set(this, PartMinus1, Part);
- } else {
- Value *V2 = State.get(getOperand(1), Part);
- State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
- }
- break;
- }
-
- case VPInstruction::CanonicalIVIncrement:
- case VPInstruction::CanonicalIVIncrementNUW: {
- Value *Next = nullptr;
- if (Part == 0) {
- bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
- auto *Phi = State.get(getOperand(0), 0);
- // The loop step is equal to the vectorization factor (num of SIMD
- // elements) times the unroll factor (num of SIMD instructions).
- Value *Step =
- createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
- } else {
- Next = State.get(this, 0);
- }
-
- State.set(this, Next, Part);
- break;
- }
- case VPInstruction::BranchOnCount: {
- if (Part != 0)
- break;
- // First create the compare.
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
- Value *Cond = Builder.CreateICmpEQ(IV, TC);
-
- // Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath &&
- "empty entry block only expected in VPlanNativePath");
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
+void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
+ Value *CanonicalIVStartValue,
+ VPTransformState &State,
+ bool IsEpilogueVectorization) {
+
+ VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
+ auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
+ // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when
+ // preparing to execute the plan for the main vector loop.
+ if (!IsEpilogueVectorization && Term &&
+ Term->getOpcode() == VPInstruction::BranchOnCount &&
+ isa<ConstantInt>(TripCountV)) {
+ ConstantInt *C = cast<ConstantInt>(TripCountV);
+ uint64_t TCVal = C->getZExtValue();
+ if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
+ auto *BOC =
+ new VPInstruction(VPInstruction::BranchOnCond,
+ {getOrAddExternalDef(State.Builder.getTrue())});
+ Term->eraseFromParent();
+ ExitingVPBB->appendRecipe(BOC);
+ // TODO: Further simplifications are possible
+ // 1. Replace inductions with constants.
+ // 2. Replace vector loop region with VPBasicBlock.
}
- // TODO: Once the exit block is modeled in VPlan, use it instead of going
- // through State.CFG.LastBB.
- BasicBlock *Exit =
- cast<BranchInst>(State.CFG.LastBB->getTerminator())->getSuccessor(0);
-
- Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]);
- Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- break;
- }
- default:
- llvm_unreachable("Unsupported opcode for instruction");
- }
-}
-
-void VPInstruction::execute(VPTransformState &State) {
- assert(!State.Instance && "VPInstruction executing an Instance");
- IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- State.Builder.setFastMathFlags(FMF);
- for (unsigned Part = 0; Part < State.UF; ++Part)
- generateInstruction(State, Part);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPInstruction::dump() const {
- VPSlotTracker SlotTracker(getParent()->getPlan());
- print(dbgs(), "", SlotTracker);
-}
-
-void VPInstruction::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
-
- if (hasResult()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
-
- switch (getOpcode()) {
- case VPInstruction::Not:
- O << "not";
- break;
- case VPInstruction::ICmpULE:
- O << "icmp ule";
- break;
- case VPInstruction::SLPLoad:
- O << "combined load";
- break;
- case VPInstruction::SLPStore:
- O << "combined store";
- break;
- case VPInstruction::ActiveLaneMask:
- O << "active lane mask";
- break;
- case VPInstruction::FirstOrderRecurrenceSplice:
- O << "first-order splice";
- break;
- case VPInstruction::CanonicalIVIncrement:
- O << "VF * UF + ";
- break;
- case VPInstruction::CanonicalIVIncrementNUW:
- O << "VF * UF +(nuw) ";
- break;
- case VPInstruction::BranchOnCount:
- O << "branch-on-count ";
- break;
- default:
- O << Instruction::getOpcodeName(getOpcode());
- }
-
- O << FMF;
-
- for (const VPValue *Operand : operands()) {
- O << " ";
- Operand->printAsOperand(O, SlotTracker);
}
- if (DL) {
- O << ", !dbg ";
- DL.print(O);
- }
-}
-#endif
-
-void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
- // Make sure the VPInstruction is a floating-point operation.
- assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
- Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
- Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
- Opcode == Instruction::FCmp) &&
- "this op can't take fast-math flags");
- FMF = FMFNew;
-}
-
-void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
- Value *CanonicalIVStartValue,
- VPTransformState &State) {
// Check if the trip count is needed, and if so build it.
if (TripCount && TripCount->getNumUsers()) {
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
@@ -868,111 +617,78 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
if (CanonicalIVStartValue) {
- VPValue *VPV = new VPValue(CanonicalIVStartValue);
- addExternalDef(VPV);
+ VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
+ if (isa<VPScalarIVStepsRecipe>(U))
+ return true;
auto *VPI = cast<VPInstruction>(U);
return VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrement ||
VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrementNUW;
}) &&
- "the canonical IV should only be used by its increments when "
+ "the canonical IV should only be used by its increments or "
+ "ScalarIVSteps when "
"resetting the start value");
IV->setOperand(0, VPV);
}
}
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
+/// Generate the code inside the preheader and body of the vectorized loop.
+/// Assumes a single pre-header basic-block was created for this. Introduce
+/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
- // 0. Set the reverse mapping from VPValues to Values for code generation.
+ // Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
- BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
- State->CFG.VectorPreHeader = VectorPreHeaderBB;
- BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
- assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
-
- // 1. Make room to generate basic-blocks inside loop body if needed.
- BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
- VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
- Loop *L = State->LI->getLoopFor(VectorHeaderBB);
- L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
- // Remove the edge between Header and Latch to allow other connections.
- // Temporarily terminate with unreachable until CFG is rewired.
- // Note: this asserts the generated code's assumption that
- // getFirstInsertionPt() can be dereferenced into an Instruction.
- VectorHeaderBB->getTerminator()->eraseFromParent();
- State->Builder.SetInsertPoint(VectorHeaderBB);
- UnreachableInst *Terminator = State->Builder.CreateUnreachable();
- State->Builder.SetInsertPoint(Terminator);
-
- // 2. Generate code in loop body.
+ // Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
- State->CFG.PrevBB = VectorHeaderBB;
- State->CFG.LastBB = VectorLatchBB;
+ State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+ BasicBlock *VectorPreHeader = State->CFG.PrevBB;
+ State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
+ // Generate code in the loop pre-header and body.
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
- // Setup branch terminator successors for VPBBs in VPBBsToFix based on
- // VPBB's successors.
- for (auto VPBB : State->CFG.VPBBsToFix) {
- assert(EnableVPlanNativePath &&
- "Unexpected VPBBsToFix in non VPlan-native path");
- BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
- assert(BB && "Unexpected null basic block for VPBB");
-
- unsigned Idx = 0;
- auto *BBTerminator = BB->getTerminator();
-
- for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
- VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
- BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
- ++Idx;
- }
- }
-
- // 3. Merge the temporary latch created with the last basic-block filled.
- BasicBlock *LastBB = State->CFG.PrevBB;
- assert(isa<BranchInst>(LastBB->getTerminator()) &&
- "Expected VPlan CFG to terminate with branch");
-
- // Move both the branch and check from LastBB to VectorLatchBB.
- auto *LastBranch = cast<BranchInst>(LastBB->getTerminator());
- LastBranch->moveBefore(VectorLatchBB->getTerminator());
- VectorLatchBB->getTerminator()->eraseFromParent();
- // Move condition so it is guaranteed to be next to branch. This is only done
- // to avoid excessive test updates.
- // TODO: Remove special handling once the increments for all inductions are
- // modeled explicitly in VPlan.
- cast<Instruction>(LastBranch->getCondition())->moveBefore(LastBranch);
- // Connect LastBB to VectorLatchBB to facilitate their merge.
- BranchInst::Create(VectorLatchBB, LastBB);
-
- // Merge LastBB with Latch.
- bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
- (void)Merged;
- assert(Merged && "Could not merge last basic block with latch.");
- VectorLatchBB = LastBB;
+ VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = Entry->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath);
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
- }
+ VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
- // TODO: Model their backedge values explicitly.
- if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
+
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ // TODO: Split off the case that all users of a pointer phi are scalar
+ // from the VPWidenPointerInductionRecipe.
+ if (WidenPhi->onlyScalarsGenerated(State->VF))
+ continue;
+
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
continue;
+ }
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
// For canonical IV, first-order recurrences and in-order reduction phis,
@@ -993,9 +709,12 @@ void VPlan::execute(VPTransformState *State) {
}
// We do not attempt to preserve DT for outer loop vectorization currently.
- if (!EnableVPlanNativePath)
- updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
- L->getExitBlock());
+ if (!EnableVPlanNativePath) {
+ BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
+ State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
+ updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
+ State->CFG.ExitBB);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1021,6 +740,17 @@ void VPlan::print(raw_ostream &O) const {
O << '\n';
Block->print(O, "", SlotTracker);
}
+
+ if (!LiveOuts.empty())
+ O << "\n";
+ for (auto &KV : LiveOuts) {
+ O << "Live-out ";
+ KV.second->getPhi()->printAsOperand(O);
+ O << " = ";
+ KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
+ O << "\n";
+ }
+
O << "}\n";
}
@@ -1034,11 +764,14 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
+void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
+ assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
+ LiveOuts.insert({PN, new VPLiveOut(PN, V)});
+}
+
+void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
BasicBlock *LoopLatchBB,
BasicBlock *LoopExitBB) {
- BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
- assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
// The vector body may be more than a single basic-block by this point.
// Update the dominator tree information inside the vector body by propagating
// it from header to latch, expecting only triangular control-flow, if any.
@@ -1075,6 +808,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
Twine(getOrCreateBID(Block));
@@ -1122,8 +856,8 @@ void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
bool Hidden, const Twine &Label) {
// Due to "dot" we print an edge between two regions as an edge between the
- // exit basic block and the entry basic of the respective regions.
- const VPBlockBase *Tail = From->getExitBasicBlock();
+ // exiting basic block and the entry basic of the respective regions.
+ const VPBlockBase *Tail = From->getExitingBasicBlock();
const VPBlockBase *Head = To->getEntryBasicBlock();
OS << Indent << getUID(Tail) << " -> " << getUID(Head);
OS << " [ label=\"" << Label << '\"';
@@ -1213,328 +947,6 @@ void VPlanIngredient::print(raw_ostream &O) const {
V->printAsOperand(O, false);
}
-void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-CALL ";
-
- auto *CI = cast<CallInst>(getUnderlyingInstr());
- if (CI->getType()->isVoidTy())
- O << "void ";
- else {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
-
- O << "call @" << CI->getCalledFunction()->getName() << "(";
- printOperands(O, SlotTracker);
- O << ")";
-}
-
-void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-SELECT ";
- printAsOperand(O, SlotTracker);
- O << " = select ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << ", ";
- getOperand(1)->printAsOperand(O, SlotTracker);
- O << ", ";
- getOperand(2)->printAsOperand(O, SlotTracker);
- O << (InvariantCond ? " (condition is loop invariant)" : "");
-}
-
-void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN ";
- printAsOperand(O, SlotTracker);
- O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-INDUCTION";
- if (getTruncInst()) {
- O << "\\l\"";
- O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
- O << " +\n" << Indent << "\" ";
- getVPValue(0)->printAsOperand(O, SlotTracker);
- } else
- O << " " << VPlanIngredient(IV);
-}
-#endif
-
-bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
- auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
- return StartC && StartC->isZero() && StepC && StepC->isOne();
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-GEP ";
- O << (IsPtrLoopInvariant ? "Inv" : "Var");
- size_t IndicesNumber = IsIndexLoopInvariant.size();
- for (size_t I = 0; I < IndicesNumber; ++I)
- O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
-
- O << " ";
- printAsOperand(O, SlotTracker);
- O << " = getelementptr ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-PHI ";
-
- auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
- // Unless all incoming values are modeled in VPlan print the original PHI
- // directly.
- // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
- // values as VPValues.
- if (getNumOperands() != OriginalPhi->getNumOperands()) {
- O << VPlanIngredient(OriginalPhi);
- return;
- }
-
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
-
-void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "BLEND ";
- Phi->printAsOperand(O, false);
- O << " =";
- if (getNumIncomingValues() == 1) {
- // Not a User of any mask: not really blending, this is a
- // single-predecessor phi.
- O << " ";
- getIncomingValue(0)->printAsOperand(O, SlotTracker);
- } else {
- for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
- O << " ";
- getIncomingValue(I)->printAsOperand(O, SlotTracker);
- O << "/";
- getMask(I)->printAsOperand(O, SlotTracker);
- }
- }
-}
-
-void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "REDUCE ";
- printAsOperand(O, SlotTracker);
- O << " = ";
- getChainOp()->printAsOperand(O, SlotTracker);
- O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- O << getUnderlyingInstr()->getFastMathFlags();
- O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
- getVecOp()->printAsOperand(O, SlotTracker);
- if (getCondOp()) {
- O << ", ";
- getCondOp()->printAsOperand(O, SlotTracker);
- }
- O << ")";
-}
-
-void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
-
- if (!getUnderlyingInstr()->getType()->isVoidTy()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
- O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
- printOperands(O, SlotTracker);
-
- if (AlsoPack)
- O << " (S->V)";
-}
-
-void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "PHI-PREDICATED-INSTRUCTION ";
- printAsOperand(O, SlotTracker);
- O << " = ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN ";
-
- if (!isStore()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
- O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
-
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
- Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *EntryPart = PHINode::Create(
- Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
- EntryPart->setDebugLoc(DL);
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, EntryPart, Part);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
- printAsOperand(O, SlotTracker);
- O << " = CANONICAL-INDUCTION";
-}
-#endif
-
-void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
- Value *CanonicalIV = State.get(getOperand(0), 0);
- Type *STy = CanonicalIV->getType();
- IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
- ElementCount VF = State.VF;
- Value *VStart = VF.isScalar()
- ? CanonicalIV
- : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
- Value *VStep = createStepForVF(Builder, STy, VF, Part);
- if (VF.isVector()) {
- VStep = Builder.CreateVectorSplat(VF, VStep);
- VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
- }
- Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
- State.set(this, CanonicalVectorIV, Part);
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
- printAsOperand(O, SlotTracker);
- O << " = WIDEN-CANONICAL-INDUCTION ";
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- // Create a vector from the initial value.
- auto *VectorInit = getStartValue()->getLiveInIRValue();
-
- Type *VecTy = State.VF.isScalar()
- ? VectorInit->getType()
- : VectorType::get(VectorInit->getType(), State.VF);
-
- if (State.VF.isVector()) {
- auto *IdxTy = Builder.getInt32Ty();
- auto *One = ConstantInt::get(IdxTy, 1);
- IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
- auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- VectorInit = Builder.CreateInsertElement(
- PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
- }
-
- // Create a phi node for the new recurrence.
- PHINode *EntryPart = PHINode::Create(
- VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
- State.set(this, EntryPart, 0);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPReductionPHIRecipe::execute(VPTransformState &State) {
- PHINode *PN = cast<PHINode>(getUnderlyingValue());
- auto &Builder = State.Builder;
-
- // In order to support recurrences we need to be able to vectorize Phi nodes.
- // Phi nodes have cycles, so we need to vectorize them in two stages. This is
- // stage #1: We create a new vector PHI node with no incoming edges. We'll use
- // this value when we vectorize all of the instructions that use the PHI.
- bool ScalarPHI = State.VF.isScalar() || IsInLoop;
- Type *VecTy =
- ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
-
- BasicBlock *HeaderBB = State.CFG.PrevBB;
- assert(State.LI->getLoopFor(HeaderBB)->getHeader() == HeaderBB &&
- "recipe must be in the vector loop header");
- unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart =
- PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
- State.set(this, EntryPart, Part);
- }
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- VPValue *StartVPV = getStartValue();
- Value *StartV = StartVPV->getLiveInIRValue();
-
- Value *Iden = nullptr;
- RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
- RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
- // MinMax reduction have the start value as their identify.
- if (ScalarPHI) {
- Iden = StartV;
- } else {
- IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- StartV = Iden =
- Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
- }
- } else {
- Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
- RdxDesc.getFastMathFlags());
-
- if (!ScalarPHI) {
- Iden = Builder.CreateVectorSplat(State.VF, Iden);
- IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- Constant *Zero = Builder.getInt32(0);
- StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
- }
- }
-
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart = State.get(this, Part);
- // Make sure to add the reduction start value only to the
- // first unroll part.
- Value *StartVal = (Part == 0) ? StartV : Iden;
- cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-REDUCTION-PHI ";
-
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
#endif
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
@@ -1594,7 +1006,10 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
continue;
assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
auto *VPInst = cast<VPInstruction>(&VPI);
- auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
+
+ auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
+ if (!Inst)
+ continue;
auto *IG = IAI.getInterleaveGroup(Inst);
if (!IG)
continue;
@@ -1622,7 +1037,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
InterleavedAccessInfo &IAI) {
Old2NewTy Old2New;
- visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
+ visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
}
void VPSlotTracker::assignSlot(const VPValue *V) {
@@ -1632,8 +1047,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
void VPSlotTracker::assignSlots(const VPlan &Plan) {
- for (const VPValue *V : Plan.VPExternalDefs)
- assignSlot(V);
+ for (const auto &P : Plan.VPExternalDefs)
+ assignSlot(P.second);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
@@ -1651,7 +1066,19 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
}
bool vputils::onlyFirstLaneUsed(VPValue *Def) {
- return all_of(Def->users(), [Def](VPUser *U) {
- return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
- });
+ return all_of(Def->users(),
+ [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
+}
+
+VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
+ ScalarEvolution &SE) {
+ if (auto *E = dyn_cast<SCEVConstant>(Expr))
+ return Plan.getOrAddExternalDef(E->getValue());
+ if (auto *E = dyn_cast<SCEVUnknown>(Expr))
+ return Plan.getOrAddExternalDef(E->getValue());
+
+ VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
+ VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
+ Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
+ return Step;
}