diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 766 |
1 files changed, 766 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp new file mode 100644 index 000000000000..4b80d1fb20aa --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -0,0 +1,766 @@ +//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the LLVM vectorization plan. It represents a candidate for +/// vectorization, allowing to plan and optimize how to vectorize a given loop +/// before generating LLVM-IR. +/// The vectorizer uses vectorization plans to estimate the costs of potential +/// candidates and if profitable to execute the desired plan, generating vector +/// LLVM-IR code. +/// +//===----------------------------------------------------------------------===// + +#include "VPlan.h" +#include "VPlanDominatorTree.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> +#include <iterator> +#include <string> +#include <vector> + +using namespace llvm; +extern cl::opt<bool> EnableVPlanNativePath; + +#define DEBUG_TYPE "vplan" + +raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { + if (const VPInstruction *Instr = dyn_cast<VPInstruction>(&V)) + Instr->print(OS); + else + V.printAsOperand(OS); + return OS; +} + +/// \return the VPBasicBlock that is the entry of Block, possibly indirectly. +const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { + const VPBlockBase *Block = this; + while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + Block = Region->getEntry(); + return cast<VPBasicBlock>(Block); +} + +VPBasicBlock *VPBlockBase::getEntryBasicBlock() { + VPBlockBase *Block = this; + while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + Block = Region->getEntry(); + return cast<VPBasicBlock>(Block); +} + +/// \return the VPBasicBlock that is the exit of Block, possibly indirectly. +const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { + const VPBlockBase *Block = this; + while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + Block = Region->getExit(); + return cast<VPBasicBlock>(Block); +} + +VPBasicBlock *VPBlockBase::getExitBasicBlock() { + VPBlockBase *Block = this; + while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + Block = Region->getExit(); + return cast<VPBasicBlock>(Block); +} + +VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { + if (!Successors.empty() || !Parent) + return this; + assert(Parent->getExit() == this && + "Block w/o successors not the exit of its parent."); + return Parent->getEnclosingBlockWithSuccessors(); +} + +VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { + if (!Predecessors.empty() || !Parent) + return this; + assert(Parent->getEntry() == this && + "Block w/o predecessors not the entry of its parent."); + return Parent->getEnclosingBlockWithPredecessors(); +} + +void VPBlockBase::deleteCFG(VPBlockBase *Entry) { + SmallVector<VPBlockBase *, 8> Blocks; + for (VPBlockBase *Block : depth_first(Entry)) + Blocks.push_back(Block); + + for (VPBlockBase *Block : Blocks) + delete Block; +} + +BasicBlock * +VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { + // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. + // Pred stands for Predessor. Prev stands for Previous - last visited/created. + BasicBlock *PrevBB = CFG.PrevBB; + BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), + PrevBB->getParent(), CFG.LastBB); + LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); + + // Hook up the new basic block to its predecessors. + for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { + VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); + auto &PredVPSuccessors = PredVPBB->getSuccessors(); + BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; + + // In outer loop vectorization scenario, the predecessor BBlock may not yet + // be visited(backedge). Mark the VPBasicBlock for fixup at the end of + // vectorization. We do not encounter this case in inner loop vectorization + // as we start out by building a loop skeleton with the vector loop header + // and latch blocks. As a result, we never enter this function for the + // header block in the non VPlan-native path. + if (!PredBB) { + assert(EnableVPlanNativePath && + "Unexpected null predecessor in non VPlan-native path"); + CFG.VPBBsToFix.push_back(PredVPBB); + continue; + } + + assert(PredBB && "Predecessor basic-block not found building successor."); + auto *PredBBTerminator = PredBB->getTerminator(); + LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); + if (isa<UnreachableInst>(PredBBTerminator)) { + assert(PredVPSuccessors.size() == 1 && + "Predecessor ending w/o branch must have single successor."); + PredBBTerminator->eraseFromParent(); + BranchInst::Create(NewBB, PredBB); + } else { + assert(PredVPSuccessors.size() == 2 && + "Predecessor ending with branch must have two successors."); + unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; + assert(!PredBBTerminator->getSuccessor(idx) && + "Trying to reset an existing successor block."); + PredBBTerminator->setSuccessor(idx, NewBB); + } + } + return NewBB; +} + +void VPBasicBlock::execute(VPTransformState *State) { + bool Replica = State->Instance && + !(State->Instance->Part == 0 && State->Instance->Lane == 0); + VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; + VPBlockBase *SingleHPred = nullptr; + BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. + + // 1. Create an IR basic block, or reuse the last one if possible. + // The last IR basic block is reused, as an optimization, in three cases: + // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; + // B. when the current VPBB has a single (hierarchical) predecessor which + // is PrevVPBB and the latter has a single (hierarchical) successor; and + // C. when the current VPBB is an entry of a region replica - where PrevVPBB + // is the exit of this region from a previous instance, or the predecessor + // of this region. + if (PrevVPBB && /* A */ + !((SingleHPred = getSingleHierarchicalPredecessor()) && + SingleHPred->getExitBasicBlock() == PrevVPBB && + PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ + !(Replica && getPredecessors().empty())) { /* C */ + NewBB = createEmptyBasicBlock(State->CFG); + State->Builder.SetInsertPoint(NewBB); + // Temporarily terminate with unreachable until CFG is rewired. + UnreachableInst *Terminator = State->Builder.CreateUnreachable(); + State->Builder.SetInsertPoint(Terminator); + // Register NewBB in its loop. In innermost loops its the same for all BB's. + Loop *L = State->LI->getLoopFor(State->CFG.LastBB); + L->addBasicBlockToLoop(NewBB, *State->LI); + State->CFG.PrevBB = NewBB; + } + + // 2. Fill the IR basic block with IR instructions. + LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() + << " in BB:" << NewBB->getName() << '\n'); + + State->CFG.VPBB2IRBB[this] = NewBB; + State->CFG.PrevVPBB = this; + + for (VPRecipeBase &Recipe : Recipes) + Recipe.execute(*State); + + VPValue *CBV; + if (EnableVPlanNativePath && (CBV = getCondBit())) { + Value *IRCBV = CBV->getUnderlyingValue(); + assert(IRCBV && "Unexpected null underlying value for condition bit"); + + // Condition bit value in a VPBasicBlock is used as the branch selector. In + // the VPlan-native path case, since all branches are uniform we generate a + // branch instruction using the condition value from vector lane 0 and dummy + // successors. The successors are fixed later when the successor blocks are + // visited. + Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0); + NewCond = State->Builder.CreateExtractElement(NewCond, + State->Builder.getInt32(0)); + + // Replace the temporary unreachable terminator with the new conditional + // branch. + auto *CurrentTerminator = NewBB->getTerminator(); + assert(isa<UnreachableInst>(CurrentTerminator) && + "Expected to replace unreachable terminator with conditional " + "branch."); + auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); + CondBr->setSuccessor(0, nullptr); + ReplaceInstWithInst(CurrentTerminator, CondBr); + } + + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); +} + +void VPRegionBlock::execute(VPTransformState *State) { + ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); + + if (!isReplicator()) { + // Visit the VPBlocks connected to "this", starting from it. + for (VPBlockBase *Block : RPOT) { + if (EnableVPlanNativePath) { + // The inner loop vectorization path does not represent loop preheader + // and exit blocks as part of the VPlan. In the VPlan-native path, skip + // vectorizing loop preheader block. In future, we may replace this + // check with the check for loop preheader. + if (Block->getNumPredecessors() == 0) + continue; + + // Skip vectorizing loop exit block. In future, we may replace this + // check with the check for loop exit. + if (Block->getNumSuccessors() == 0) + continue; + } + + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + Block->execute(State); + } + return; + } + + assert(!State->Instance && "Replicating a Region with non-null instance."); + + // Enter replicating mode. + State->Instance = {0, 0}; + + for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { + State->Instance->Part = Part; + for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { + State->Instance->Lane = Lane; + // Visit the VPBlocks connected to \p this, starting from it. + for (VPBlockBase *Block : RPOT) { + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + Block->execute(State); + } + } + } + + // Exit replicating mode. + State->Instance.reset(); +} + +void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { + Parent = InsertPos->getParent(); + Parent->getRecipeList().insert(InsertPos->getIterator(), this); +} + +iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { + return getParent()->getRecipeList().erase(getIterator()); +} + +void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { + InsertPos->getParent()->getRecipeList().splice( + std::next(InsertPos->getIterator()), getParent()->getRecipeList(), + getIterator()); +} + +void VPInstruction::generateInstruction(VPTransformState &State, + unsigned Part) { + IRBuilder<> &Builder = State.Builder; + + if (Instruction::isBinaryOp(getOpcode())) { + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); + State.set(this, V, Part); + return; + } + + switch (getOpcode()) { + case VPInstruction::Not: { + Value *A = State.get(getOperand(0), Part); + Value *V = Builder.CreateNot(A); + State.set(this, V, Part); + break; + } + case VPInstruction::ICmpULE: { + Value *IV = State.get(getOperand(0), Part); + Value *TC = State.get(getOperand(1), Part); + Value *V = Builder.CreateICmpULE(IV, TC); + State.set(this, V, Part); + break; + } + case Instruction::Select: { + Value *Cond = State.get(getOperand(0), Part); + Value *Op1 = State.get(getOperand(1), Part); + Value *Op2 = State.get(getOperand(2), Part); + Value *V = Builder.CreateSelect(Cond, Op1, Op2); + State.set(this, V, Part); + break; + } + default: + llvm_unreachable("Unsupported opcode for instruction"); + } +} + +void VPInstruction::execute(VPTransformState &State) { + assert(!State.Instance && "VPInstruction executing an Instance"); + for (unsigned Part = 0; Part < State.UF; ++Part) + generateInstruction(State, Part); +} + +void VPInstruction::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"EMIT "; + print(O); + O << "\\l\""; +} + +void VPInstruction::print(raw_ostream &O) const { + printAsOperand(O); + O << " = "; + + switch (getOpcode()) { + case VPInstruction::Not: + O << "not"; + break; + case VPInstruction::ICmpULE: + O << "icmp ule"; + break; + case VPInstruction::SLPLoad: + O << "combined load"; + break; + case VPInstruction::SLPStore: + O << "combined store"; + break; + default: + O << Instruction::getOpcodeName(getOpcode()); + } + + for (const VPValue *Operand : operands()) { + O << " "; + Operand->printAsOperand(O); + } +} + +/// Generate the code inside the body of the vectorized loop. Assumes a single +/// LoopVectorBody basic-block was created for this. Introduce additional +/// basic-blocks as needed, and fill them all. +void VPlan::execute(VPTransformState *State) { + // -1. Check if the backedge taken count is needed, and if so build it. + if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { + Value *TC = State->TripCount; + IRBuilder<> Builder(State->CFG.PrevBB->getTerminator()); + auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1), + "trip.count.minus.1"); + Value2VPValue[TCMO] = BackedgeTakenCount; + } + + // 0. Set the reverse mapping from VPValues to Values for code generation. + for (auto &Entry : Value2VPValue) + State->VPValue2Value[Entry.second] = Entry.first; + + BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; + BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); + assert(VectorHeaderBB && "Loop preheader does not have a single successor."); + + // 1. Make room to generate basic-blocks inside loop body if needed. + BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock( + VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); + Loop *L = State->LI->getLoopFor(VectorHeaderBB); + L->addBasicBlockToLoop(VectorLatchBB, *State->LI); + // Remove the edge between Header and Latch to allow other connections. + // Temporarily terminate with unreachable until CFG is rewired. + // Note: this asserts the generated code's assumption that + // getFirstInsertionPt() can be dereferenced into an Instruction. + VectorHeaderBB->getTerminator()->eraseFromParent(); + State->Builder.SetInsertPoint(VectorHeaderBB); + UnreachableInst *Terminator = State->Builder.CreateUnreachable(); + State->Builder.SetInsertPoint(Terminator); + + // 2. Generate code in loop body. + State->CFG.PrevVPBB = nullptr; + State->CFG.PrevBB = VectorHeaderBB; + State->CFG.LastBB = VectorLatchBB; + + for (VPBlockBase *Block : depth_first(Entry)) + Block->execute(State); + + // Setup branch terminator successors for VPBBs in VPBBsToFix based on + // VPBB's successors. + for (auto VPBB : State->CFG.VPBBsToFix) { + assert(EnableVPlanNativePath && + "Unexpected VPBBsToFix in non VPlan-native path"); + BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; + assert(BB && "Unexpected null basic block for VPBB"); + + unsigned Idx = 0; + auto *BBTerminator = BB->getTerminator(); + + for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { + VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); + BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); + ++Idx; + } + } + + // 3. Merge the temporary latch created with the last basic-block filled. + BasicBlock *LastBB = State->CFG.PrevBB; + // Connect LastBB to VectorLatchBB to facilitate their merge. + assert((EnableVPlanNativePath || + isa<UnreachableInst>(LastBB->getTerminator())) && + "Expected InnerLoop VPlan CFG to terminate with unreachable"); + assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) && + "Expected VPlan CFG to terminate with branch in NativePath"); + LastBB->getTerminator()->eraseFromParent(); + BranchInst::Create(VectorLatchBB, LastBB); + + // Merge LastBB with Latch. + bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); + (void)Merged; + assert(Merged && "Could not merge last basic block with latch."); + VectorLatchBB = LastBB; + + // We do not attempt to preserve DT for outer loop vectorization currently. + if (!EnableVPlanNativePath) + updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); +} + +void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, + BasicBlock *LoopLatchBB) { + BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); + assert(LoopHeaderBB && "Loop preheader does not have a single successor."); + DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); + // The vector body may be more than a single basic-block by this point. + // Update the dominator tree information inside the vector body by propagating + // it from header to latch, expecting only triangular control-flow, if any. + BasicBlock *PostDomSucc = nullptr; + for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { + // Get the list of successors of this block. + std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); + assert(Succs.size() <= 2 && + "Basic block in vector loop has more than 2 successors."); + PostDomSucc = Succs[0]; + if (Succs.size() == 1) { + assert(PostDomSucc->getSinglePredecessor() && + "PostDom successor has more than one predecessor."); + DT->addNewBlock(PostDomSucc, BB); + continue; + } + BasicBlock *InterimSucc = Succs[1]; + if (PostDomSucc->getSingleSuccessor() == InterimSucc) { + PostDomSucc = Succs[1]; + InterimSucc = Succs[0]; + } + assert(InterimSucc->getSingleSuccessor() == PostDomSucc && + "One successor of a basic block does not lead to the other."); + assert(InterimSucc->getSinglePredecessor() && + "Interim successor has more than one predecessor."); + assert(PostDomSucc->hasNPredecessors(2) && + "PostDom successor has more than two predecessors."); + DT->addNewBlock(InterimSucc, BB); + DT->addNewBlock(PostDomSucc, BB); + } +} + +const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { + return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + + Twine(getOrCreateBID(Block)); +} + +const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { + const std::string &Name = Block->getName(); + if (!Name.empty()) + return Name; + return "VPB" + Twine(getOrCreateBID(Block)); +} + +void VPlanPrinter::dump() { + Depth = 1; + bumpIndent(0); + OS << "digraph VPlan {\n"; + OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; + if (!Plan.getName().empty()) + OS << "\\n" << DOT::EscapeString(Plan.getName()); + if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) { + OS << ", where:"; + if (Plan.BackedgeTakenCount) + OS << "\\n" + << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount"; + for (auto Entry : Plan.Value2VPValue) { + OS << "\\n" << *Entry.second; + OS << DOT::EscapeString(" := "); + Entry.first->printAsOperand(OS, false); + } + } + OS << "\"]\n"; + OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; + OS << "edge [fontname=Courier, fontsize=30]\n"; + OS << "compound=true\n"; + + for (VPBlockBase *Block : depth_first(Plan.getEntry())) + dumpBlock(Block); + + OS << "}\n"; +} + +void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { + if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) + dumpBasicBlock(BasicBlock); + else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + dumpRegion(Region); + else + llvm_unreachable("Unsupported kind of VPBlock."); +} + +void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, + bool Hidden, const Twine &Label) { + // Due to "dot" we print an edge between two regions as an edge between the + // exit basic block and the entry basic of the respective regions. + const VPBlockBase *Tail = From->getExitBasicBlock(); + const VPBlockBase *Head = To->getEntryBasicBlock(); + OS << Indent << getUID(Tail) << " -> " << getUID(Head); + OS << " [ label=\"" << Label << '\"'; + if (Tail != From) + OS << " ltail=" << getUID(From); + if (Head != To) + OS << " lhead=" << getUID(To); + if (Hidden) + OS << "; splines=none"; + OS << "]\n"; +} + +void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { + auto &Successors = Block->getSuccessors(); + if (Successors.size() == 1) + drawEdge(Block, Successors.front(), false, ""); + else if (Successors.size() == 2) { + drawEdge(Block, Successors.front(), false, "T"); + drawEdge(Block, Successors.back(), false, "F"); + } else { + unsigned SuccessorNumber = 0; + for (auto *Successor : Successors) + drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); + } +} + +void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { + OS << Indent << getUID(BasicBlock) << " [label =\n"; + bumpIndent(1); + OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; + bumpIndent(1); + + // Dump the block predicate. + const VPValue *Pred = BasicBlock->getPredicate(); + if (Pred) { + OS << " +\n" << Indent << " \"BlockPredicate: "; + if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) { + PredI->printAsOperand(OS); + OS << " (" << DOT::EscapeString(PredI->getParent()->getName()) + << ")\\l\""; + } else + Pred->printAsOperand(OS); + } + + for (const VPRecipeBase &Recipe : *BasicBlock) + Recipe.print(OS, Indent); + + // Dump the condition bit. + const VPValue *CBV = BasicBlock->getCondBit(); + if (CBV) { + OS << " +\n" << Indent << " \"CondBit: "; + if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) { + CBI->printAsOperand(OS); + OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\""; + } else { + CBV->printAsOperand(OS); + OS << "\""; + } + } + + bumpIndent(-2); + OS << "\n" << Indent << "]\n"; + dumpEdges(BasicBlock); +} + +void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { + OS << Indent << "subgraph " << getUID(Region) << " {\n"; + bumpIndent(1); + OS << Indent << "fontname=Courier\n" + << Indent << "label=\"" + << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") + << DOT::EscapeString(Region->getName()) << "\"\n"; + // Dump the blocks of the region. + assert(Region->getEntry() && "Region contains no inner blocks."); + for (const VPBlockBase *Block : depth_first(Region->getEntry())) + dumpBlock(Block); + bumpIndent(-1); + OS << Indent << "}\n"; + dumpEdges(Region); +} + +void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { + std::string IngredientString; + raw_string_ostream RSO(IngredientString); + if (auto *Inst = dyn_cast<Instruction>(V)) { + if (!Inst->getType()->isVoidTy()) { + Inst->printAsOperand(RSO, false); + RSO << " = "; + } + RSO << Inst->getOpcodeName() << " "; + unsigned E = Inst->getNumOperands(); + if (E > 0) { + Inst->getOperand(0)->printAsOperand(RSO, false); + for (unsigned I = 1; I < E; ++I) + Inst->getOperand(I)->printAsOperand(RSO << ", ", false); + } + } else // !Inst + V->printAsOperand(RSO, false); + RSO.flush(); + O << DOT::EscapeString(IngredientString); +} + +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN\\l\""; + for (auto &Instr : make_range(Begin, End)) + O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\""; +} + +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, + const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN-INDUCTION"; + if (Trunc) { + O << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc) << "\\l\""; + } else + O << " " << VPlanIngredient(IV) << "\\l\""; +} + +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; +} + +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"BLEND "; + Phi->printAsOperand(O, false); + O << " ="; + if (!User) { + // Not a User of any mask: not really blending, this is a + // single-predecessor phi. + O << " "; + Phi->getIncomingValue(0)->printAsOperand(O, false); + } else { + for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { + O << " "; + Phi->getIncomingValue(I)->printAsOperand(O, false); + O << "/"; + User->getOperand(I)->printAsOperand(O); + } + } + O << "\\l\""; +} + +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" + << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") + << VPlanIngredient(Ingredient); + if (AlsoPack) + O << " (S->V)"; + O << "\\l\""; +} + +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" + << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) + << "\\l\""; +} + +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, + const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); + if (User) { + O << ", "; + User->getOperand(0)->printAsOperand(O); + } + O << "\\l\""; +} + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); + +void VPValue::replaceAllUsesWith(VPValue *New) { + for (VPUser *User : users()) + for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) + if (User->getOperand(I) == this) + User->setOperand(I, New); +} + +void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, + Old2NewTy &Old2New, + InterleavedAccessInfo &IAI) { + ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry()); + for (VPBlockBase *Base : RPOT) { + visitBlock(Base, Old2New, IAI); + } +} + +void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, + InterleavedAccessInfo &IAI) { + if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { + for (VPRecipeBase &VPI : *VPBB) { + assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); + auto *VPInst = cast<VPInstruction>(&VPI); + auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue()); + auto *IG = IAI.getInterleaveGroup(Inst); + if (!IG) + continue; + + auto NewIGIter = Old2New.find(IG); + if (NewIGIter == Old2New.end()) + Old2New[IG] = new InterleaveGroup<VPInstruction>( + IG->getFactor(), IG->isReverse(), Align(IG->getAlignment())); + + if (Inst == IG->getInsertPos()) + Old2New[IG]->setInsertPos(VPInst); + + InterleaveGroupMap[VPInst] = Old2New[IG]; + InterleaveGroupMap[VPInst]->insertMember( + VPInst, IG->getIndex(Inst), + Align(IG->isReverse() ? (-1) * int(IG->getFactor()) + : IG->getFactor())); + } + } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) + visitRegion(Region, Old2New, IAI); + else + llvm_unreachable("Unsupported kind of VPBlock."); +} + +VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, + InterleavedAccessInfo &IAI) { + Old2NewTy Old2New; + visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI); +} |
