diff options
Diffstat (limited to 'lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/VPlan.cpp | 171 |
1 files changed, 165 insertions, 6 deletions
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 0780e70809d0..05a5400beb4e 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -44,6 +44,7 @@ #include <vector> using namespace llvm; +extern cl::opt<bool> EnableVPlanNativePath; #define DEBUG_TYPE "vplan" @@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); auto &PredVPSuccessors = PredVPBB->getSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; + + // In outer loop vectorization scenario, the predecessor BBlock may not yet + // be visited(backedge). Mark the VPBasicBlock for fixup at the end of + // vectorization. We do not encounter this case in inner loop vectorization + // as we start out by building a loop skeleton with the vector loop header + // and latch blocks. As a result, we never enter this function for the + // header block in the non VPlan-native path. + if (!PredBB) { + assert(EnableVPlanNativePath && + "Unexpected null predecessor in non VPlan-native path"); + CFG.VPBBsToFix.push_back(PredVPBB); + continue; + } + assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); @@ -185,6 +200,31 @@ void VPBasicBlock::execute(VPTransformState *State) { for (VPRecipeBase &Recipe : Recipes) Recipe.execute(*State); + VPValue *CBV; + if (EnableVPlanNativePath && (CBV = getCondBit())) { + Value *IRCBV = CBV->getUnderlyingValue(); + assert(IRCBV && "Unexpected null underlying value for condition bit"); + + // Condition bit value in a VPBasicBlock is used as the branch selector. In + // the VPlan-native path case, since all branches are uniform we generate a + // branch instruction using the condition value from vector lane 0 and dummy + // successors. The successors are fixed later when the successor blocks are + // visited. + Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0); + NewCond = State->Builder.CreateExtractElement(NewCond, + State->Builder.getInt32(0)); + + // Replace the temporary unreachable terminator with the new conditional + // branch. + auto *CurrentTerminator = NewBB->getTerminator(); + assert(isa<UnreachableInst>(CurrentTerminator) && + "Expected to replace unreachable terminator with conditional " + "branch."); + auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); + CondBr->setSuccessor(0, nullptr); + ReplaceInstWithInst(CurrentTerminator, CondBr); + } + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } @@ -194,6 +234,20 @@ void VPRegionBlock::execute(VPTransformState *State) { if (!isReplicator()) { // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { + if (EnableVPlanNativePath) { + // The inner loop vectorization path does not represent loop preheader + // and exit blocks as part of the VPlan. In the VPlan-native path, skip + // vectorizing loop preheader block. In future, we may replace this + // check with the check for loop preheader. + if (Block->getNumPredecessors() == 0) + continue; + + // Skip vectorizing loop exit block. In future, we may replace this + // check with the check for loop exit. + if (Block->getNumSuccessors() == 0) + continue; + } + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } @@ -249,6 +303,13 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, V, Part); break; } + case VPInstruction::ICmpULE: { + Value *IV = State.get(getOperand(0), Part); + Value *TC = State.get(getOperand(1), Part); + Value *V = Builder.CreateICmpULE(IV, TC); + State.set(this, V, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -274,6 +335,15 @@ void VPInstruction::print(raw_ostream &O) const { case VPInstruction::Not: O << "not"; break; + case VPInstruction::ICmpULE: + O << "icmp ule"; + break; + case VPInstruction::SLPLoad: + O << "combined load"; + break; + case VPInstruction::SLPStore: + O << "combined store"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -288,6 +358,15 @@ void VPInstruction::print(raw_ostream &O) const { /// LoopVectorBody basic-block was created for this. Introduce additional /// basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { + // -1. Check if the backedge taken count is needed, and if so build it. + if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { + Value *TC = State->TripCount; + IRBuilder<> Builder(State->CFG.PrevBB->getTerminator()); + auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1), + "trip.count.minus.1"); + Value2VPValue[TCMO] = BackedgeTakenCount; + } + // 0. Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) State->VPValue2Value[Entry.second] = Entry.first; @@ -319,11 +398,32 @@ void VPlan::execute(VPTransformState *State) { for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); + // Setup branch terminator successors for VPBBs in VPBBsToFix based on + // VPBB's successors. + for (auto VPBB : State->CFG.VPBBsToFix) { + assert(EnableVPlanNativePath && + "Unexpected VPBBsToFix in non VPlan-native path"); + BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; + assert(BB && "Unexpected null basic block for VPBB"); + + unsigned Idx = 0; + auto *BBTerminator = BB->getTerminator(); + + for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { + VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); + BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); + ++Idx; + } + } + // 3. Merge the temporary latch created with the last basic-block filled. BasicBlock *LastBB = State->CFG.PrevBB; // Connect LastBB to VectorLatchBB to facilitate their merge. - assert(isa<UnreachableInst>(LastBB->getTerminator()) && - "Expected VPlan CFG to terminate with unreachable"); + assert((EnableVPlanNativePath || + isa<UnreachableInst>(LastBB->getTerminator())) && + "Expected InnerLoop VPlan CFG to terminate with unreachable"); + assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) && + "Expected VPlan CFG to terminate with branch in NativePath"); LastBB->getTerminator()->eraseFromParent(); BranchInst::Create(VectorLatchBB, LastBB); @@ -333,7 +433,9 @@ void VPlan::execute(VPTransformState *State) { assert(Merged && "Could not merge last basic block with latch."); VectorLatchBB = LastBB; - updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); + // We do not attempt to preserve DT for outer loop vectorization currently. + if (!EnableVPlanNativePath) + updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); } void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, @@ -366,7 +468,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, "One successor of a basic block does not lead to the other."); assert(InterimSucc->getSinglePredecessor() && "Interim successor has more than one predecessor."); - assert(pred_size(PostDomSucc) == 2 && + assert(PostDomSucc->hasNPredecessors(2) && "PostDom successor has more than two predecessors."); DT->addNewBlock(InterimSucc, BB); DT->addNewBlock(PostDomSucc, BB); @@ -392,8 +494,11 @@ void VPlanPrinter::dump() { OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; if (!Plan.getName().empty()) OS << "\\n" << DOT::EscapeString(Plan.getName()); - if (!Plan.Value2VPValue.empty()) { + if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) { OS << ", where:"; + if (Plan.BackedgeTakenCount) + OS << "\\n" + << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount"; for (auto Entry : Plan.Value2VPValue) { OS << "\\n" << *Entry.second; OS << DOT::EscapeString(" := "); @@ -466,8 +571,10 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) { CBI->printAsOperand(OS); OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\""; - } else + } else { CBV->printAsOperand(OS); + OS << "\""; + } } bumpIndent(-2); @@ -579,3 +686,55 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, } template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); + +void VPValue::replaceAllUsesWith(VPValue *New) { + for (VPUser *User : users()) + for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) + if (User->getOperand(I) == this) + User->setOperand(I, New); +} + +void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, + Old2NewTy &Old2New, + InterleavedAccessInfo &IAI) { + ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry()); + for (VPBlockBase *Base : RPOT) { + visitBlock(Base, Old2New, IAI); + } +} + +void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, + InterleavedAccessInfo &IAI) { + if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { + for (VPRecipeBase &VPI : *VPBB) { + assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); + auto *VPInst = cast<VPInstruction>(&VPI); + auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue()); + auto *IG = IAI.getInterleaveGroup(Inst); + if (!IG) + continue; + + auto NewIGIter = Old2New.find(IG); + if (NewIGIter == Old2New.end()) + Old2New[IG] = new InterleaveGroup<VPInstruction>( + IG->getFactor(), IG->isReverse(), IG->getAlignment()); + + if (Inst == IG->getInsertPos()) + Old2New[IG]->setInsertPos(VPInst); + + InterleaveGroupMap[VPInst] = Old2New[IG]; + InterleaveGroupMap[VPInst]->insertMember( + VPInst, IG->getIndex(Inst), + IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + } + } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + visitRegion(Region, Old2New, IAI); + else + llvm_unreachable("Unsupported kind of VPBlock."); +} + +VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, + InterleavedAccessInfo &IAI) { + Old2NewTy Old2New; + visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI); +} |