summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp171
1 files changed, 165 insertions, 6 deletions
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index 0780e70809d0..05a5400beb4e 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -44,6 +44,7 @@
#include <vector>
using namespace llvm;
+extern cl::opt<bool> EnableVPlanNativePath;
#define DEBUG_TYPE "vplan"
@@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
auto &PredVPSuccessors = PredVPBB->getSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+
+ // In outer loop vectorization scenario, the predecessor BBlock may not yet
+ // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
+ // vectorization. We do not encounter this case in inner loop vectorization
+ // as we start out by building a loop skeleton with the vector loop header
+ // and latch blocks. As a result, we never enter this function for the
+ // header block in the non VPlan-native path.
+ if (!PredBB) {
+ assert(EnableVPlanNativePath &&
+ "Unexpected null predecessor in non VPlan-native path");
+ CFG.VPBBsToFix.push_back(PredVPBB);
+ continue;
+ }
+
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
@@ -185,6 +200,31 @@ void VPBasicBlock::execute(VPTransformState *State) {
for (VPRecipeBase &Recipe : Recipes)
Recipe.execute(*State);
+ VPValue *CBV;
+ if (EnableVPlanNativePath && (CBV = getCondBit())) {
+ Value *IRCBV = CBV->getUnderlyingValue();
+ assert(IRCBV && "Unexpected null underlying value for condition bit");
+
+ // Condition bit value in a VPBasicBlock is used as the branch selector. In
+ // the VPlan-native path case, since all branches are uniform we generate a
+ // branch instruction using the condition value from vector lane 0 and dummy
+ // successors. The successors are fixed later when the successor blocks are
+ // visited.
+ Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
+ NewCond = State->Builder.CreateExtractElement(NewCond,
+ State->Builder.getInt32(0));
+
+ // Replace the temporary unreachable terminator with the new conditional
+ // branch.
+ auto *CurrentTerminator = NewBB->getTerminator();
+ assert(isa<UnreachableInst>(CurrentTerminator) &&
+ "Expected to replace unreachable terminator with conditional "
+ "branch.");
+ auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
+ CondBr->setSuccessor(0, nullptr);
+ ReplaceInstWithInst(CurrentTerminator, CondBr);
+ }
+
LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
}
@@ -194,6 +234,20 @@ void VPRegionBlock::execute(VPTransformState *State) {
if (!isReplicator()) {
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
+ if (EnableVPlanNativePath) {
+ // The inner loop vectorization path does not represent loop preheader
+ // and exit blocks as part of the VPlan. In the VPlan-native path, skip
+ // vectorizing loop preheader block. In future, we may replace this
+ // check with the check for loop preheader.
+ if (Block->getNumPredecessors() == 0)
+ continue;
+
+ // Skip vectorizing loop exit block. In future, we may replace this
+ // check with the check for loop exit.
+ if (Block->getNumSuccessors() == 0)
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
@@ -249,6 +303,13 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, V, Part);
break;
}
+ case VPInstruction::ICmpULE: {
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateICmpULE(IV, TC);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -274,6 +335,15 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::Not:
O << "not";
break;
+ case VPInstruction::ICmpULE:
+ O << "icmp ule";
+ break;
+ case VPInstruction::SLPLoad:
+ O << "combined load";
+ break;
+ case VPInstruction::SLPStore:
+ O << "combined store";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -288,6 +358,15 @@ void VPInstruction::print(raw_ostream &O) const {
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ // -1. Check if the backedge taken count is needed, and if so build it.
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ Value *TC = State->TripCount;
+ IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
+ auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+ "trip.count.minus.1");
+ Value2VPValue[TCMO] = BackedgeTakenCount;
+ }
+
// 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
@@ -319,11 +398,32 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
+ // Setup branch terminator successors for VPBBs in VPBBsToFix based on
+ // VPBB's successors.
+ for (auto VPBB : State->CFG.VPBBsToFix) {
+ assert(EnableVPlanNativePath &&
+ "Unexpected VPBBsToFix in non VPlan-native path");
+ BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
+ assert(BB && "Unexpected null basic block for VPBB");
+
+ unsigned Idx = 0;
+ auto *BBTerminator = BB->getTerminator();
+
+ for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
+ VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
+ BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
+ ++Idx;
+ }
+ }
+
// 3. Merge the temporary latch created with the last basic-block filled.
BasicBlock *LastBB = State->CFG.PrevBB;
// Connect LastBB to VectorLatchBB to facilitate their merge.
- assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
- "Expected VPlan CFG to terminate with unreachable");
+ assert((EnableVPlanNativePath ||
+ isa<UnreachableInst>(LastBB->getTerminator())) &&
+ "Expected InnerLoop VPlan CFG to terminate with unreachable");
+ assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+ "Expected VPlan CFG to terminate with branch in NativePath");
LastBB->getTerminator()->eraseFromParent();
BranchInst::Create(VectorLatchBB, LastBB);
@@ -333,7 +433,9 @@ void VPlan::execute(VPTransformState *State) {
assert(Merged && "Could not merge last basic block with latch.");
VectorLatchBB = LastBB;
- updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
+ // We do not attempt to preserve DT for outer loop vectorization currently.
+ if (!EnableVPlanNativePath)
+ updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
}
void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
@@ -366,7 +468,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
"One successor of a basic block does not lead to the other.");
assert(InterimSucc->getSinglePredecessor() &&
"Interim successor has more than one predecessor.");
- assert(pred_size(PostDomSucc) == 2 &&
+ assert(PostDomSucc->hasNPredecessors(2) &&
"PostDom successor has more than two predecessors.");
DT->addNewBlock(InterimSucc, BB);
DT->addNewBlock(PostDomSucc, BB);
@@ -392,8 +494,11 @@ void VPlanPrinter::dump() {
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
- if (!Plan.Value2VPValue.empty()) {
+ if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
OS << ", where:";
+ if (Plan.BackedgeTakenCount)
+ OS << "\\n"
+ << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
for (auto Entry : Plan.Value2VPValue) {
OS << "\\n" << *Entry.second;
OS << DOT::EscapeString(" := ");
@@ -466,8 +571,10 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) {
CBI->printAsOperand(OS);
OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\"";
- } else
+ } else {
CBV->printAsOperand(OS);
+ OS << "\"";
+ }
}
bumpIndent(-2);
@@ -579,3 +686,55 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
}
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+
+void VPValue::replaceAllUsesWith(VPValue *New) {
+ for (VPUser *User : users())
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
+ if (User->getOperand(I) == this)
+ User->setOperand(I, New);
+}
+
+void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
+ Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+ for (VPBlockBase *Base : RPOT) {
+ visitBlock(Base, Old2New, IAI);
+ }
+}
+
+void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
+ for (VPRecipeBase &VPI : *VPBB) {
+ assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
+ auto *VPInst = cast<VPInstruction>(&VPI);
+ auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
+ auto *IG = IAI.getInterleaveGroup(Inst);
+ if (!IG)
+ continue;
+
+ auto NewIGIter = Old2New.find(IG);
+ if (NewIGIter == Old2New.end())
+ Old2New[IG] = new InterleaveGroup<VPInstruction>(
+ IG->getFactor(), IG->isReverse(), IG->getAlignment());
+
+ if (Inst == IG->getInsertPos())
+ Old2New[IG]->setInsertPos(VPInst);
+
+ InterleaveGroupMap[VPInst] = Old2New[IG];
+ InterleaveGroupMap[VPInst]->insertMember(
+ VPInst, IG->getIndex(Inst),
+ IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor());
+ }
+ } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
+ visitRegion(Region, Old2New, IAI);
+ else
+ llvm_unreachable("Unsupported kind of VPBlock.");
+}
+
+VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
+ InterleavedAccessInfo &IAI) {
+ Old2NewTy Old2New;
+ visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
+}