diff options
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlan.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlan.h | 121 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlanDominatorTree.h | 41 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 21 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlanHCFGBuilder.h | 23 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/VPlanLoopInfo.h | 45 |
8 files changed, 257 insertions, 35 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3c693f5d5ee0..859d0c92ca5a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,13 +535,13 @@ protected: /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// If there is a cast involved in the induction variable \p ID, which should - /// be ignored in the vectorized loop body, this function records the - /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the - /// cast. We had already proved that the casted Phi is equal to the uncasted - /// Phi in the vectorized loop (under a runtime guard), and therefore - /// there is no need to vectorize the cast - the same value can be used in the - /// vector loop for both the Phi and the cast. + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, /// Otherwise, \p VectorLoopValue is a widened/vectorized value. /// @@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){ // high enough value to practically disable vectorization with such // operations, except where previously deployed legality hack allowed // using very low cost values. This is to avoid regressions coming simply - // from moving "masked load/store" check from legality to cost model. + // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. assert(isScalarWithPredication(I) && @@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( })) DeadInstructions.insert(IndUpdate); - // We record as "Dead" also the type-casting instructions we had identified + // We record as "Dead" also the type-casting instructions we had identified // during induction analysis. We don't need any handling for them in the - // vectorized loop because we have proven that, under a proper runtime - // test guarding the vectorized loop, the value of the phi, and the casted + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted // value of the phi, are the same. The last instruction in this casting chain - // will get its scalar/vector/widened def from the scalar/vector/widened def + // will get its scalar/vector/widened def from the scalar/vector/widened def // of the respective phi node. Any other casts in the induction def-use chain // have no other uses outside the phi update chain, and will be ignored. InductionDescriptor &IndDes = Induction.second; @@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto Plan = llvm::make_unique<VPlan>(); // Build hierarchical CFG - VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); - HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); + HCFGBuilder.buildHierarchicalCFG(); return Plan; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ac8c4f046c6f..5c2efe885e22 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) { } /// \returns analysis of the Instructions in \p VL described in -/// InstructionsState, the Opcode that we suppose the whole list +/// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, unsigned BaseIndex = 0) { @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // TODO: Merge this shuffle with the ReorderShuffleMask. if (!E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); + else if (auto *I = dyn_cast<Instruction>(V)) + Builder.SetInsertPoint(I->getParent(), + std::next(I->getIterator())); + else + Builder.SetInsertPoint(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index f7b07b722bb1..0780e70809d0 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" @@ -25,7 +26,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -34,6 +34,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, } O << "\\l\""; } + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 866951cb79a4..883e6f52369a 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -26,8 +26,10 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "VPlanLoopInfo.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" @@ -51,7 +53,6 @@ class BasicBlock; class DominatorTree; class InnerLoopVectorizer; class InterleaveGroup; -class LoopInfo; class raw_ostream; class Value; class VPBasicBlock; @@ -516,6 +517,23 @@ public: /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); + + void printAsOperand(raw_ostream &OS, bool PrintType) const { + OS << getName(); + } + + void print(raw_ostream &OS) const { + // TODO: Only printing VPBB name for now since we only have dot printing + // support for VPInstructions/Recipes. + printAsOperand(OS, false); + } + + /// Return true if it is legal to hoist instructions into this block. + bool isLegalToHoistInto() { + // There are currently no constraints that prevent an instruction to be + // hoisted into a VPBlockBase. + return true; + } }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1037,6 +1055,12 @@ public: EntryBlock->setParent(this); } + // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a + // specific interface of llvm::Function, instead of using + // GraphTraints::getEntryNode. We should add a new template parameter to + // DominatorTreeBase representing the Graph type. + VPBlockBase &front() const { return *Entry; } + const VPBlockBase *getExit() const { return Exit; } VPBlockBase *getExit() { return Exit; } @@ -1087,6 +1111,9 @@ private: /// VPlan. Value2VPValueTy Value2VPValue; + /// Holds the VPLoopInfo analysis for this VPlan. + VPLoopInfo VPLInfo; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1133,6 +1160,10 @@ public: return Value2VPValue[V]; } + /// Return the VPLoopInfo analysis for this VPlan. + VPLoopInfo &getVPLoopInfo() { return VPLInfo; } + const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. @@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { return OS; } -//===--------------------------------------------------------------------===// -// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // +//===----------------------------------------------------------------------===// -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region. template <> struct GraphTraits<VPBlockBase *> { using NodeRef = VPBlockBase *; @@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> { } }; -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order -// for a VPBlockBase is considered to be when traversing the predecessors of a -// VPBlockBase instead of its successors. +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal. template <> struct GraphTraits<Inverse<VPBlockBase *>> { using NodeRef = VPBlockBase *; using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; - static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { - return B; - } + static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } static inline ChildIteratorType child_begin(NodeRef N) { return N->getPredecessors().begin(); @@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> { } }; +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<const VPRegionBlock *> + : public GraphTraits<const VPBlockBase *> { + using GraphRef = const VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<Inverse<VPRegionBlock *>> + : public GraphTraits<Inverse<VPBlockBase *>> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(Inverse<GraphRef> N) { + return N.Graph->getExit(); + } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getExit()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + //===----------------------------------------------------------------------===// // VPlan Utilities //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h new file mode 100644 index 000000000000..1b81097b6d31 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -0,0 +1,41 @@ +//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements dominator tree analysis for a single level of a VPlan's +/// H-CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H + +#include "VPlan.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Template specialization of the standard LLVM dominator tree utility for +/// VPBlockBases. +using VPDominatorTree = DomTreeBase<VPBlockBase>; + +using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>; + +/// Template specializations of GraphTraits for VPDomTreeNode. +template <> +struct GraphTraits<VPDomTreeNode *> + : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {}; + +template <> +struct GraphTraits<const VPDomTreeNode *> + : public DomTreeGraphTraitsBase<const VPDomTreeNode, + VPDomTreeNode::const_iterator> {}; +} // namespace llvm +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 08129b74cddf..b6307acb9474 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { return TopRegion; } +VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { + PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); + return PCFGBuilder.buildPlainCFG(); +} + // Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { +void VPlanHCFGBuilder::buildHierarchicalCFG() { // Build Top Region enclosing the plain CFG and set it as VPlan entry. - PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); - VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); + VPRegionBlock *TopRegion = buildPlainCFG(); Plan.setEntry(TopRegion); LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); Verifier.verifyHierarchicalCFG(TopRegion); + + // Compute plain CFG dom tree for VPLInfo. + VPDomTree.recalculate(*TopRegion); + LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; + VPDomTree.print(dbgs())); + + // Compute VPLInfo and keep it in Plan. + VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); + VPLInfo.analyze(VPDomTree); + LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; + VPLInfo.print(dbgs())); } diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index c4e69843615a..3f11dcb5164d 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -26,14 +26,18 @@ #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "VPlanVerifier.h" namespace llvm { class Loop; +class VPlanTestBase; /// Main class to build the VPlan H-CFG for an incoming IR. class VPlanHCFGBuilder { + friend VPlanTestBase; + private: // The outermost loop of the input loop nest considered for vectorization. Loop *TheLoop; @@ -41,14 +45,27 @@ private: // Loop Info analysis. LoopInfo *LI; + // The VPlan that will contain the H-CFG we are building. + VPlan &Plan; + // VPlan verifier utility. VPlanVerifier Verifier; + // Dominator analysis for VPlan plain CFG to be used in the + // construction of the H-CFG. This analysis is no longer valid once regions + // are introduced. + VPDominatorTree VPDomTree; + + /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) + /// enclosing the plain CFG. + VPRegionBlock *buildPlainCFG(); + public: - VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) + : TheLoop(Lp), LI(LI), Plan(P) {} - /// Build H-CFG for TheLoop and update \p Plan accordingly. - void buildHierarchicalCFG(VPlan &Plan); + /// Build H-CFG for TheLoop and update Plan accordingly. + void buildHierarchicalCFG(); }; } // namespace llvm diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h new file mode 100644 index 000000000000..5c2485fc2145 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h @@ -0,0 +1,45 @@ +//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a +/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization +/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further +/// information can be found in VectorizationPlanner.rst. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H + +#include "llvm/Analysis/LoopInfoImpl.h" + +namespace llvm { +class VPBlockBase; + +/// Hold analysis information for every loop detected by VPLoopInfo. It is an +/// instantiation of LoopBase. +class VPLoop : public LoopBase<VPBlockBase, VPLoop> { +private: + friend class LoopInfoBase<VPBlockBase, VPLoop>; + explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {} +}; + +/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based +/// Hierarchical CFG. It is a specialization of LoopInfoBase class. +// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which +// is the same as the incoming IR CFG. If it's more efficient than running the +// whole loop detection algorithm, we may want to create a mechanism to +// translate LoopInfo into VPLoopInfo. However, that would require significant +// changes in LoopInfoBase class. +typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H |