diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 | 
| commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
| tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Vectorize/VPlan.cpp | |
| parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) | |
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/VPlan.cpp')
| -rw-r--r-- | lib/Transforms/Vectorize/VPlan.cpp | 557 | 
1 files changed, 557 insertions, 0 deletions
| diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp new file mode 100644 index 000000000000..4e54fc6db2a5 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -0,0 +1,557 @@ +//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the LLVM vectorization plan. It represents a candidate for +/// vectorization, allowing to plan and optimize how to vectorize a given loop +/// before generating LLVM-IR. +/// The vectorizer uses vectorization plans to estimate the costs of potential +/// candidates and if profitable to execute the desired plan, generating vector +/// LLVM-IR code. +/// +//===----------------------------------------------------------------------===// + +#include "VPlan.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> +#include <iterator> +#include <string> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "vplan" + +raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { +  if (const VPInstruction *Instr = dyn_cast<VPInstruction>(&V)) +    Instr->print(OS); +  else +    V.printAsOperand(OS); +  return OS; +} + +/// \return the VPBasicBlock that is the entry of Block, possibly indirectly. +const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { +  const VPBlockBase *Block = this; +  while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) +    Block = Region->getEntry(); +  return cast<VPBasicBlock>(Block); +} + +VPBasicBlock *VPBlockBase::getEntryBasicBlock() { +  VPBlockBase *Block = this; +  while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) +    Block = Region->getEntry(); +  return cast<VPBasicBlock>(Block); +} + +/// \return the VPBasicBlock that is the exit of Block, possibly indirectly. +const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { +  const VPBlockBase *Block = this; +  while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) +    Block = Region->getExit(); +  return cast<VPBasicBlock>(Block); +} + +VPBasicBlock *VPBlockBase::getExitBasicBlock() { +  VPBlockBase *Block = this; +  while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) +    Block = Region->getExit(); +  return cast<VPBasicBlock>(Block); +} + +VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { +  if (!Successors.empty() || !Parent) +    return this; +  assert(Parent->getExit() == this && +         "Block w/o successors not the exit of its parent."); +  return Parent->getEnclosingBlockWithSuccessors(); +} + +VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { +  if (!Predecessors.empty() || !Parent) +    return this; +  assert(Parent->getEntry() == this && +         "Block w/o predecessors not the entry of its parent."); +  return Parent->getEnclosingBlockWithPredecessors(); +} + +void VPBlockBase::deleteCFG(VPBlockBase *Entry) { +  SmallVector<VPBlockBase *, 8> Blocks; +  for (VPBlockBase *Block : depth_first(Entry)) +    Blocks.push_back(Block); + +  for (VPBlockBase *Block : Blocks) +    delete Block; +} + +BasicBlock * +VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { +  // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. +  // Pred stands for Predessor. Prev stands for Previous - last visited/created. +  BasicBlock *PrevBB = CFG.PrevBB; +  BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), +                                         PrevBB->getParent(), CFG.LastBB); +  DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); + +  // Hook up the new basic block to its predecessors. +  for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { +    VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); +    auto &PredVPSuccessors = PredVPBB->getSuccessors(); +    BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; +    assert(PredBB && "Predecessor basic-block not found building successor."); +    auto *PredBBTerminator = PredBB->getTerminator(); +    DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); +    if (isa<UnreachableInst>(PredBBTerminator)) { +      assert(PredVPSuccessors.size() == 1 && +             "Predecessor ending w/o branch must have single successor."); +      PredBBTerminator->eraseFromParent(); +      BranchInst::Create(NewBB, PredBB); +    } else { +      assert(PredVPSuccessors.size() == 2 && +             "Predecessor ending with branch must have two successors."); +      unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; +      assert(!PredBBTerminator->getSuccessor(idx) && +             "Trying to reset an existing successor block."); +      PredBBTerminator->setSuccessor(idx, NewBB); +    } +  } +  return NewBB; +} + +void VPBasicBlock::execute(VPTransformState *State) { +  bool Replica = State->Instance && +                 !(State->Instance->Part == 0 && State->Instance->Lane == 0); +  VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; +  VPBlockBase *SingleHPred = nullptr; +  BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. + +  // 1. Create an IR basic block, or reuse the last one if possible. +  // The last IR basic block is reused, as an optimization, in three cases: +  // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; +  // B. when the current VPBB has a single (hierarchical) predecessor which +  //    is PrevVPBB and the latter has a single (hierarchical) successor; and +  // C. when the current VPBB is an entry of a region replica - where PrevVPBB +  //    is the exit of this region from a previous instance, or the predecessor +  //    of this region. +  if (PrevVPBB && /* A */ +      !((SingleHPred = getSingleHierarchicalPredecessor()) && +        SingleHPred->getExitBasicBlock() == PrevVPBB && +        PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ +      !(Replica && getPredecessors().empty())) {       /* C */ +    NewBB = createEmptyBasicBlock(State->CFG); +    State->Builder.SetInsertPoint(NewBB); +    // Temporarily terminate with unreachable until CFG is rewired. +    UnreachableInst *Terminator = State->Builder.CreateUnreachable(); +    State->Builder.SetInsertPoint(Terminator); +    // Register NewBB in its loop. In innermost loops its the same for all BB's. +    Loop *L = State->LI->getLoopFor(State->CFG.LastBB); +    L->addBasicBlockToLoop(NewBB, *State->LI); +    State->CFG.PrevBB = NewBB; +  } + +  // 2. Fill the IR basic block with IR instructions. +  DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() +               << " in BB:" << NewBB->getName() << '\n'); + +  State->CFG.VPBB2IRBB[this] = NewBB; +  State->CFG.PrevVPBB = this; + +  for (VPRecipeBase &Recipe : Recipes) +    Recipe.execute(*State); + +  DEBUG(dbgs() << "LV: filled BB:" << *NewBB); +} + +void VPRegionBlock::execute(VPTransformState *State) { +  ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); + +  if (!isReplicator()) { +    // Visit the VPBlocks connected to "this", starting from it. +    for (VPBlockBase *Block : RPOT) { +      DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); +      Block->execute(State); +    } +    return; +  } + +  assert(!State->Instance && "Replicating a Region with non-null instance."); + +  // Enter replicating mode. +  State->Instance = {0, 0}; + +  for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { +    State->Instance->Part = Part; +    for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { +      State->Instance->Lane = Lane; +      // Visit the VPBlocks connected to \p this, starting from it. +      for (VPBlockBase *Block : RPOT) { +        DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); +        Block->execute(State); +      } +    } +  } + +  // Exit replicating mode. +  State->Instance.reset(); +} + +void VPInstruction::generateInstruction(VPTransformState &State, +                                        unsigned Part) { +  IRBuilder<> &Builder = State.Builder; + +  if (Instruction::isBinaryOp(getOpcode())) { +    Value *A = State.get(getOperand(0), Part); +    Value *B = State.get(getOperand(1), Part); +    Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); +    State.set(this, V, Part); +    return; +  } + +  switch (getOpcode()) { +  case VPInstruction::Not: { +    Value *A = State.get(getOperand(0), Part); +    Value *V = Builder.CreateNot(A); +    State.set(this, V, Part); +    break; +  } +  default: +    llvm_unreachable("Unsupported opcode for instruction"); +  } +} + +void VPInstruction::execute(VPTransformState &State) { +  assert(!State.Instance && "VPInstruction executing an Instance"); +  for (unsigned Part = 0; Part < State.UF; ++Part) +    generateInstruction(State, Part); +} + +void VPInstruction::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" << Indent << "\"EMIT "; +  print(O); +  O << "\\l\""; +} + +void VPInstruction::print(raw_ostream &O) const { +  printAsOperand(O); +  O << " = "; + +  switch (getOpcode()) { +  case VPInstruction::Not: +    O << "not"; +    break; +  default: +    O << Instruction::getOpcodeName(getOpcode()); +  } + +  for (const VPValue *Operand : operands()) { +    O << " "; +    Operand->printAsOperand(O); +  } +} + +/// Generate the code inside the body of the vectorized loop. Assumes a single +/// LoopVectorBody basic-block was created for this. Introduce additional +/// basic-blocks as needed, and fill them all. +void VPlan::execute(VPTransformState *State) { +  // 0. Set the reverse mapping from VPValues to Values for code generation. +  for (auto &Entry : Value2VPValue) +    State->VPValue2Value[Entry.second] = Entry.first; + +  BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; +  BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); +  assert(VectorHeaderBB && "Loop preheader does not have a single successor."); +  BasicBlock *VectorLatchBB = VectorHeaderBB; + +  // 1. Make room to generate basic-blocks inside loop body if needed. +  VectorLatchBB = VectorHeaderBB->splitBasicBlock( +      VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); +  Loop *L = State->LI->getLoopFor(VectorHeaderBB); +  L->addBasicBlockToLoop(VectorLatchBB, *State->LI); +  // Remove the edge between Header and Latch to allow other connections. +  // Temporarily terminate with unreachable until CFG is rewired. +  // Note: this asserts the generated code's assumption that +  // getFirstInsertionPt() can be dereferenced into an Instruction. +  VectorHeaderBB->getTerminator()->eraseFromParent(); +  State->Builder.SetInsertPoint(VectorHeaderBB); +  UnreachableInst *Terminator = State->Builder.CreateUnreachable(); +  State->Builder.SetInsertPoint(Terminator); + +  // 2. Generate code in loop body. +  State->CFG.PrevVPBB = nullptr; +  State->CFG.PrevBB = VectorHeaderBB; +  State->CFG.LastBB = VectorLatchBB; + +  for (VPBlockBase *Block : depth_first(Entry)) +    Block->execute(State); + +  // 3. Merge the temporary latch created with the last basic-block filled. +  BasicBlock *LastBB = State->CFG.PrevBB; +  // Connect LastBB to VectorLatchBB to facilitate their merge. +  assert(isa<UnreachableInst>(LastBB->getTerminator()) && +         "Expected VPlan CFG to terminate with unreachable"); +  LastBB->getTerminator()->eraseFromParent(); +  BranchInst::Create(VectorLatchBB, LastBB); + +  // Merge LastBB with Latch. +  bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); +  (void)Merged; +  assert(Merged && "Could not merge last basic block with latch."); +  VectorLatchBB = LastBB; + +  updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); +} + +void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, +                                BasicBlock *LoopLatchBB) { +  BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); +  assert(LoopHeaderBB && "Loop preheader does not have a single successor."); +  DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); +  // The vector body may be more than a single basic-block by this point. +  // Update the dominator tree information inside the vector body by propagating +  // it from header to latch, expecting only triangular control-flow, if any. +  BasicBlock *PostDomSucc = nullptr; +  for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { +    // Get the list of successors of this block. +    std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); +    assert(Succs.size() <= 2 && +           "Basic block in vector loop has more than 2 successors."); +    PostDomSucc = Succs[0]; +    if (Succs.size() == 1) { +      assert(PostDomSucc->getSinglePredecessor() && +             "PostDom successor has more than one predecessor."); +      DT->addNewBlock(PostDomSucc, BB); +      continue; +    } +    BasicBlock *InterimSucc = Succs[1]; +    if (PostDomSucc->getSingleSuccessor() == InterimSucc) { +      PostDomSucc = Succs[1]; +      InterimSucc = Succs[0]; +    } +    assert(InterimSucc->getSingleSuccessor() == PostDomSucc && +           "One successor of a basic block does not lead to the other."); +    assert(InterimSucc->getSinglePredecessor() && +           "Interim successor has more than one predecessor."); +    assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 && +           "PostDom successor has more than two predecessors."); +    DT->addNewBlock(InterimSucc, BB); +    DT->addNewBlock(PostDomSucc, BB); +  } +} + +const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { +  return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + +         Twine(getOrCreateBID(Block)); +} + +const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { +  const std::string &Name = Block->getName(); +  if (!Name.empty()) +    return Name; +  return "VPB" + Twine(getOrCreateBID(Block)); +} + +void VPlanPrinter::dump() { +  Depth = 1; +  bumpIndent(0); +  OS << "digraph VPlan {\n"; +  OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; +  if (!Plan.getName().empty()) +    OS << "\\n" << DOT::EscapeString(Plan.getName()); +  if (!Plan.Value2VPValue.empty()) { +    OS << ", where:"; +    for (auto Entry : Plan.Value2VPValue) { +      OS << "\\n" << *Entry.second; +      OS << DOT::EscapeString(" := "); +      Entry.first->printAsOperand(OS, false); +    } +  } +  OS << "\"]\n"; +  OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; +  OS << "edge [fontname=Courier, fontsize=30]\n"; +  OS << "compound=true\n"; + +  for (VPBlockBase *Block : depth_first(Plan.getEntry())) +    dumpBlock(Block); + +  OS << "}\n"; +} + +void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { +  if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) +    dumpBasicBlock(BasicBlock); +  else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) +    dumpRegion(Region); +  else +    llvm_unreachable("Unsupported kind of VPBlock."); +} + +void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, +                            bool Hidden, const Twine &Label) { +  // Due to "dot" we print an edge between two regions as an edge between the +  // exit basic block and the entry basic of the respective regions. +  const VPBlockBase *Tail = From->getExitBasicBlock(); +  const VPBlockBase *Head = To->getEntryBasicBlock(); +  OS << Indent << getUID(Tail) << " -> " << getUID(Head); +  OS << " [ label=\"" << Label << '\"'; +  if (Tail != From) +    OS << " ltail=" << getUID(From); +  if (Head != To) +    OS << " lhead=" << getUID(To); +  if (Hidden) +    OS << "; splines=none"; +  OS << "]\n"; +} + +void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { +  auto &Successors = Block->getSuccessors(); +  if (Successors.size() == 1) +    drawEdge(Block, Successors.front(), false, ""); +  else if (Successors.size() == 2) { +    drawEdge(Block, Successors.front(), false, "T"); +    drawEdge(Block, Successors.back(), false, "F"); +  } else { +    unsigned SuccessorNumber = 0; +    for (auto *Successor : Successors) +      drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); +  } +} + +void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { +  OS << Indent << getUID(BasicBlock) << " [label =\n"; +  bumpIndent(1); +  OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; +  bumpIndent(1); +  for (const VPRecipeBase &Recipe : *BasicBlock) +    Recipe.print(OS, Indent); +  bumpIndent(-2); +  OS << "\n" << Indent << "]\n"; +  dumpEdges(BasicBlock); +} + +void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { +  OS << Indent << "subgraph " << getUID(Region) << " {\n"; +  bumpIndent(1); +  OS << Indent << "fontname=Courier\n" +     << Indent << "label=\"" +     << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") +     << DOT::EscapeString(Region->getName()) << "\"\n"; +  // Dump the blocks of the region. +  assert(Region->getEntry() && "Region contains no inner blocks."); +  for (const VPBlockBase *Block : depth_first(Region->getEntry())) +    dumpBlock(Block); +  bumpIndent(-1); +  OS << Indent << "}\n"; +  dumpEdges(Region); +} + +void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { +  std::string IngredientString; +  raw_string_ostream RSO(IngredientString); +  if (auto *Inst = dyn_cast<Instruction>(V)) { +    if (!Inst->getType()->isVoidTy()) { +      Inst->printAsOperand(RSO, false); +      RSO << " = "; +    } +    RSO << Inst->getOpcodeName() << " "; +    unsigned E = Inst->getNumOperands(); +    if (E > 0) { +      Inst->getOperand(0)->printAsOperand(RSO, false); +      for (unsigned I = 1; I < E; ++I) +        Inst->getOperand(I)->printAsOperand(RSO << ", ", false); +    } +  } else // !Inst +    V->printAsOperand(RSO, false); +  RSO.flush(); +  O << DOT::EscapeString(IngredientString); +} + +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" << Indent << "\"WIDEN\\l\""; +  for (auto &Instr : make_range(Begin, End)) +    O << " +\n" << Indent << "\"  " << VPlanIngredient(&Instr) << "\\l\""; +} + +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, +                                          const Twine &Indent) const { +  O << " +\n" << Indent << "\"WIDEN-INDUCTION"; +  if (Trunc) { +    O << "\\l\""; +    O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\""; +    O << " +\n" << Indent << "\"  " << VPlanIngredient(Trunc) << "\\l\""; +  } else +    O << " " << VPlanIngredient(IV) << "\\l\""; +} + +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; +} + +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" << Indent << "\"BLEND "; +  Phi->printAsOperand(O, false); +  O << " ="; +  if (!User) { +    // Not a User of any mask: not really blending, this is a +    // single-predecessor phi. +    O << " "; +    Phi->getIncomingValue(0)->printAsOperand(O, false); +  } else { +    for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { +      O << " "; +      Phi->getIncomingValue(I)->printAsOperand(O, false); +      O << "/"; +      User->getOperand(I)->printAsOperand(O); +    } +  } +  O << "\\l\""; +} + +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" +    << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") +    << VPlanIngredient(Ingredient); +  if (AlsoPack) +    O << " (S->V)"; +  O << "\\l\""; +} + +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +  O << " +\n" +    << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) +    << "\\l\""; +} + +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, +                                           const Twine &Indent) const { +  O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); +  if (User) { +    O << ", "; +    User->getOperand(0)->printAsOperand(O); +  } +  O << "\\l\""; +} | 
