diff options
Diffstat (limited to 'lib/Transforms/Vectorize')
| -rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 28 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlan.cpp | 5 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlan.h | 121 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlanDominatorTree.h | 41 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 21 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlanHCFGBuilder.h | 23 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/VPlanLoopInfo.h | 45 | 
8 files changed, 257 insertions, 35 deletions
| diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3c693f5d5ee0..859d0c92ca5a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,13 +535,13 @@ protected:    /// Returns true if we should generate a scalar version of \p IV.    bool needsScalarInduction(Instruction *IV) const; -  /// If there is a cast involved in the induction variable \p ID, which should  -  /// be ignored in the vectorized loop body, this function records the  -  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the  -  /// cast. We had already proved that the casted Phi is equal to the uncasted  -  /// Phi in the vectorized loop (under a runtime guard), and therefore  -  /// there is no need to vectorize the cast - the same value can be used in the  -  /// vector loop for both the Phi and the cast.  +  /// If there is a cast involved in the induction variable \p ID, which should +  /// be ignored in the vectorized loop body, this function records the +  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the +  /// cast. We had already proved that the casted Phi is equal to the uncasted +  /// Phi in the vectorized loop (under a runtime guard), and therefore +  /// there is no need to vectorize the cast - the same value can be used in the +  /// vector loop for both the Phi and the cast.    /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,    /// Otherwise, \p VectorLoopValue is a widened/vectorized value.    /// @@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){    // high enough value to practically disable vectorization with such    // operations, except where previously deployed legality hack allowed    // using very low cost values. This is to avoid regressions coming simply -  // from moving "masked load/store" check from legality to cost model.  +  // from moving "masked load/store" check from legality to cost model.    // Masked Load/Gather emulation was previously never allowed.    // Limited number of Masked Store/Scatter emulation was allowed.    assert(isScalarWithPredication(I) && @@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(          }))        DeadInstructions.insert(IndUpdate); -    // We record as "Dead" also the type-casting instructions we had identified  +    // We record as "Dead" also the type-casting instructions we had identified      // during induction analysis. We don't need any handling for them in the -    // vectorized loop because we have proven that, under a proper runtime  -    // test guarding the vectorized loop, the value of the phi, and the casted  +    // vectorized loop because we have proven that, under a proper runtime +    // test guarding the vectorized loop, the value of the phi, and the casted      // value of the phi, are the same. The last instruction in this casting chain -    // will get its scalar/vector/widened def from the scalar/vector/widened def  +    // will get its scalar/vector/widened def from the scalar/vector/widened def      // of the respective phi node. Any other casts in the induction def-use chain      // have no other uses outside the phi update chain, and will be ignored.      InductionDescriptor &IndDes = Induction.second; @@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {    auto Plan = llvm::make_unique<VPlan>();    // Build hierarchical CFG -  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); -  HCFGBuilder.buildHierarchicalCFG(*Plan.get()); +  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); +  HCFGBuilder.buildHierarchicalCFG();    return Plan;  } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ac8c4f046c6f..5c2efe885e22 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {  }  /// \returns analysis of the Instructions in \p VL described in -/// InstructionsState, the Opcode that we suppose the whole list  +/// InstructionsState, the Opcode that we suppose the whole list  /// could be vectorized even if its structure is diverse.  static InstructionsState getSameOpcode(ArrayRef<Value *> VL,                                         unsigned BaseIndex = 0) { @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {            // TODO: Merge this shuffle with the ReorderShuffleMask.            if (!E->ReorderIndices.empty())              Builder.SetInsertPoint(VL0); +          else if (auto *I = dyn_cast<Instruction>(V)) +            Builder.SetInsertPoint(I->getParent(), +                                   std::next(I->getIterator())); +          else +            Builder.SetInsertPoint(&F->getEntryBlock(), +                                   F->getEntryBlock().getFirstInsertionPt());            V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),                                            E->ReuseShuffleIndices, "shuffle");          } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index f7b07b722bb1..0780e70809d0 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -18,6 +18,7 @@  //===----------------------------------------------------------------------===//  #include "VPlan.h" +#include "VPlanDominatorTree.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/SmallVector.h" @@ -25,7 +26,6 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h"  #include "llvm/IR/InstrTypes.h"  #include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" @@ -34,6 +34,7 @@  #include "llvm/Support/Casting.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h"  #include "llvm/Support/GraphWriter.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,    }    O << "\\l\"";  } + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 866951cb79a4..883e6f52369a 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -26,8 +26,10 @@  #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H  #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "VPlanLoopInfo.h"  #include "VPlanValue.h"  #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/GraphTraits.h"  #include "llvm/ADT/Optional.h"  #include "llvm/ADT/SmallPtrSet.h" @@ -51,7 +53,6 @@ class BasicBlock;  class DominatorTree;  class InnerLoopVectorizer;  class InterleaveGroup; -class LoopInfo;  class raw_ostream;  class Value;  class VPBasicBlock; @@ -516,6 +517,23 @@ public:    /// Delete all blocks reachable from a given VPBlockBase, inclusive.    static void deleteCFG(VPBlockBase *Entry); + +  void printAsOperand(raw_ostream &OS, bool PrintType) const { +    OS << getName(); +  } + +  void print(raw_ostream &OS) const { +    // TODO: Only printing VPBB name for now since we only have dot printing +    // support for VPInstructions/Recipes. +    printAsOperand(OS, false); +  } + +  /// Return true if it is legal to hoist instructions into this block. +  bool isLegalToHoistInto() { +    // There are currently no constraints that prevent an instruction to be +    // hoisted into a VPBlockBase. +    return true; +  }  };  /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1037,6 +1055,12 @@ public:      EntryBlock->setParent(this);    } +  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a +  // specific interface of llvm::Function, instead of using +  // GraphTraints::getEntryNode. We should add a new template parameter to +  // DominatorTreeBase representing the Graph type. +  VPBlockBase &front() const { return *Entry; } +    const VPBlockBase *getExit() const { return Exit; }    VPBlockBase *getExit() { return Exit; } @@ -1087,6 +1111,9 @@ private:    /// VPlan.    Value2VPValueTy Value2VPValue; +  /// Holds the VPLoopInfo analysis for this VPlan. +  VPLoopInfo VPLInfo; +  public:    VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1133,6 +1160,10 @@ public:      return Value2VPValue[V];    } +  /// Return the VPLoopInfo analysis for this VPlan. +  VPLoopInfo &getVPLoopInfo() { return VPLInfo; } +  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } +  private:    /// Add to the given dominator tree the header block and every new basic block    /// that was created between it and the latch block, inclusive. @@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {    return OS;  } -//===--------------------------------------------------------------------===// -// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs  // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     // +//===----------------------------------------------------------------------===// -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region.  template <> struct GraphTraits<VPBlockBase *> {    using NodeRef = VPBlockBase *; @@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {    }  }; -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order -// for a VPBlockBase is considered to be when traversing the predecessors of a -// VPBlockBase instead of its successors. +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal.  template <> struct GraphTraits<Inverse<VPBlockBase *>> {    using NodeRef = VPBlockBase *;    using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; -  static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { -    return B; -  } +  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }    static inline ChildIteratorType child_begin(NodeRef N) {      return N->getPredecessors().begin(); @@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {    }  }; +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { +  using GraphRef = VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getEntry()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; + +template <> +struct GraphTraits<const VPRegionBlock *> +    : public GraphTraits<const VPBlockBase *> { +  using GraphRef = const VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getEntry()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; + +template <> +struct GraphTraits<Inverse<VPRegionBlock *>> +    : public GraphTraits<Inverse<VPBlockBase *>> { +  using GraphRef = VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(Inverse<GraphRef> N) { +    return N.Graph->getExit(); +  } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getExit()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; +  //===----------------------------------------------------------------------===//  // VPlan Utilities  //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h new file mode 100644 index 000000000000..1b81097b6d31 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -0,0 +1,41 @@ +//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements dominator tree analysis for a single level of a VPlan's +/// H-CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H + +#include "VPlan.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Template specialization of the standard LLVM dominator tree utility for +/// VPBlockBases. +using VPDominatorTree = DomTreeBase<VPBlockBase>; + +using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>; + +/// Template specializations of GraphTraits for VPDomTreeNode. +template <> +struct GraphTraits<VPDomTreeNode *> +    : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {}; + +template <> +struct GraphTraits<const VPDomTreeNode *> +    : public DomTreeGraphTraitsBase<const VPDomTreeNode, +                                    VPDomTreeNode::const_iterator> {}; +} // namespace llvm +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 08129b74cddf..b6307acb9474 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {    return TopRegion;  } +VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { +  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); +  return PCFGBuilder.buildPlainCFG(); +} +  // Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { +void VPlanHCFGBuilder::buildHierarchicalCFG() {    // Build Top Region enclosing the plain CFG and set it as VPlan entry. -  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); -  VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); +  VPRegionBlock *TopRegion = buildPlainCFG();    Plan.setEntry(TopRegion);    LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);    Verifier.verifyHierarchicalCFG(TopRegion); + +  // Compute plain CFG dom tree for VPLInfo. +  VPDomTree.recalculate(*TopRegion); +  LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; +             VPDomTree.print(dbgs())); + +  // Compute VPLInfo and keep it in Plan. +  VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); +  VPLInfo.analyze(VPDomTree); +  LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; +             VPLInfo.print(dbgs()));  } diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index c4e69843615a..3f11dcb5164d 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -26,14 +26,18 @@  #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H  #include "VPlan.h" +#include "VPlanDominatorTree.h"  #include "VPlanVerifier.h"  namespace llvm {  class Loop; +class VPlanTestBase;  /// Main class to build the VPlan H-CFG for an incoming IR.  class VPlanHCFGBuilder { +  friend VPlanTestBase; +  private:    // The outermost loop of the input loop nest considered for vectorization.    Loop *TheLoop; @@ -41,14 +45,27 @@ private:    // Loop Info analysis.    LoopInfo *LI; +  // The VPlan that will contain the H-CFG we are building. +  VPlan &Plan; +    // VPlan verifier utility.    VPlanVerifier Verifier; +  // Dominator analysis for VPlan plain CFG to be used in the +  // construction of the H-CFG. This analysis is no longer valid once regions +  // are introduced. +  VPDominatorTree VPDomTree; + +  /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) +  /// enclosing the plain CFG. +  VPRegionBlock *buildPlainCFG(); +  public: -  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} +  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) +      : TheLoop(Lp), LI(LI), Plan(P) {} -  /// Build H-CFG for TheLoop and update \p Plan accordingly. -  void buildHierarchicalCFG(VPlan &Plan); +  /// Build H-CFG for TheLoop and update Plan accordingly. +  void buildHierarchicalCFG();  };  } // namespace llvm diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h new file mode 100644 index 000000000000..5c2485fc2145 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h @@ -0,0 +1,45 @@ +//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a +/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization +/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further +/// information can be found in VectorizationPlanner.rst. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H + +#include "llvm/Analysis/LoopInfoImpl.h" + +namespace llvm { +class VPBlockBase; + +/// Hold analysis information for every loop detected by VPLoopInfo. It is an +/// instantiation of LoopBase. +class VPLoop : public LoopBase<VPBlockBase, VPLoop> { +private: +  friend class LoopInfoBase<VPBlockBase, VPLoop>; +  explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {} +}; + +/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based +/// Hierarchical CFG. It is a specialization of LoopInfoBase class. +// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which +// is the same as the incoming IR CFG. If it's more efficient than running the +// whole loop detection algorithm, we may want to create a mechanism to +// translate LoopInfo into VPLoopInfo. However, that would require significant +// changes in LoopInfoBase class. +typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H | 
