summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-08-02 17:32:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-08-02 17:32:43 +0000
commitb7eb8e35e481a74962664b63dfb09483b200209a (patch)
tree1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Transforms/Vectorize
parenteb11fae6d08f479c0799db45860a98af528fa6e7 (diff)
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp28
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp5
-rw-r--r--lib/Transforms/Vectorize/VPlan.h121
-rw-r--r--lib/Transforms/Vectorize/VPlanDominatorTree.h41
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp21
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.h23
-rw-r--r--lib/Transforms/Vectorize/VPlanLoopInfo.h45
8 files changed, 257 insertions, 35 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c693f5d5ee0..859d0c92ca5a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -535,13 +535,13 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// If there is a cast involved in the induction variable \p ID, which should
- /// be ignored in the vectorized loop body, this function records the
- /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
- /// cast. We had already proved that the casted Phi is equal to the uncasted
- /// Phi in the vectorized loop (under a runtime guard), and therefore
- /// there is no need to vectorize the cast - the same value can be used in the
- /// vector loop for both the Phi and the cast.
+ /// If there is a cast involved in the induction variable \p ID, which should
+ /// be ignored in the vectorized loop body, this function records the
+ /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+ /// cast. We had already proved that the casted Phi is equal to the uncasted
+ /// Phi in the vectorized loop (under a runtime guard), and therefore
+ /// there is no need to vectorize the cast - the same value can be used in the
+ /// vector loop for both the Phi and the cast.
/// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
/// Otherwise, \p VectorLoopValue is a widened/vectorized value.
///
@@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
// high enough value to practically disable vectorization with such
// operations, except where previously deployed legality hack allowed
// using very low cost values. This is to avoid regressions coming simply
- // from moving "masked load/store" check from legality to cost model.
+ // from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
assert(isScalarWithPredication(I) &&
@@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
}))
DeadInstructions.insert(IndUpdate);
- // We record as "Dead" also the type-casting instructions we had identified
+ // We record as "Dead" also the type-casting instructions we had identified
// during induction analysis. We don't need any handling for them in the
- // vectorized loop because we have proven that, under a proper runtime
- // test guarding the vectorized loop, the value of the phi, and the casted
+ // vectorized loop because we have proven that, under a proper runtime
+ // test guarding the vectorized loop, the value of the phi, and the casted
// value of the phi, are the same. The last instruction in this casting chain
- // will get its scalar/vector/widened def from the scalar/vector/widened def
+ // will get its scalar/vector/widened def from the scalar/vector/widened def
// of the respective phi node. Any other casts in the induction def-use chain
// have no other uses outside the phi update chain, and will be ignored.
InductionDescriptor &IndDes = Induction.second;
@@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto Plan = llvm::make_unique<VPlan>();
// Build hierarchical CFG
- VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
- HCFGBuilder.buildHierarchicalCFG(*Plan.get());
+ VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
+ HCFGBuilder.buildHierarchicalCFG();
return Plan;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ac8c4f046c6f..5c2efe885e22 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
}
/// \returns analysis of the Instructions in \p VL described in
-/// InstructionsState, the Opcode that we suppose the whole list
+/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex = 0) {
@@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
if (!E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
+ else if (auto *I = dyn_cast<Instruction>(V))
+ Builder.SetInsertPoint(I->getParent(),
+ std::next(I->getIterator()));
+ else
+ Builder.SetInsertPoint(&F->getEntryBlock(),
+ F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index f7b07b722bb1..0780e70809d0 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,7 +26,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -34,6 +34,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
}
O << "\\l\"";
}
+
+template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 866951cb79a4..883e6f52369a 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -26,8 +26,10 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
+#include "VPlanLoopInfo.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -51,7 +53,6 @@ class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class InterleaveGroup;
-class LoopInfo;
class raw_ostream;
class Value;
class VPBasicBlock;
@@ -516,6 +517,23 @@ public:
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
+
+ void printAsOperand(raw_ostream &OS, bool PrintType) const {
+ OS << getName();
+ }
+
+ void print(raw_ostream &OS) const {
+ // TODO: Only printing VPBB name for now since we only have dot printing
+ // support for VPInstructions/Recipes.
+ printAsOperand(OS, false);
+ }
+
+ /// Return true if it is legal to hoist instructions into this block.
+ bool isLegalToHoistInto() {
+ // There are currently no constraints that prevent an instruction to be
+ // hoisted into a VPBlockBase.
+ return true;
+ }
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -1037,6 +1055,12 @@ public:
EntryBlock->setParent(this);
}
+ // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
+ // specific interface of llvm::Function, instead of using
+ // GraphTraints::getEntryNode. We should add a new template parameter to
+ // DominatorTreeBase representing the Graph type.
+ VPBlockBase &front() const { return *Entry; }
+
const VPBlockBase *getExit() const { return Exit; }
VPBlockBase *getExit() { return Exit; }
@@ -1087,6 +1111,9 @@ private:
/// VPlan.
Value2VPValueTy Value2VPValue;
+ /// Holds the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo VPLInfo;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
@@ -1133,6 +1160,10 @@ public:
return Value2VPValue[V];
}
+ /// Return the VPLoopInfo analysis for this VPlan.
+ VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
+ const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
return OS;
}
-//===--------------------------------------------------------------------===//
-// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs //
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
+//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes...
+// The following set of template specializations implement GraphTraits to treat
+// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
+// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
+// VPBlockBase is a VPRegionBlock, this specialization provides access to its
+// successors/predecessors but not to the blocks inside the region.
template <> struct GraphTraits<VPBlockBase *> {
using NodeRef = VPBlockBase *;
@@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {
}
};
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order
-// for a VPBlockBase is considered to be when traversing the predecessors of a
-// VPBlockBase instead of its successors.
+// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
+// of successors for the inverse traversal.
template <> struct GraphTraits<Inverse<VPBlockBase *>> {
using NodeRef = VPBlockBase *;
using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
- static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) {
- return B;
- }
+ static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
static inline ChildIteratorType child_begin(NodeRef N) {
return N->getPredecessors().begin();
@@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {
}
};
+// The following set of template specializations implement GraphTraits to
+// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
+// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
+// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
+// there won't be automatic recursion into other VPBlockBases that turn to be
+// VPRegionBlocks.
+
+template <>
+struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<const VPRegionBlock *>
+ : public GraphTraits<const VPBlockBase *> {
+ using GraphRef = const VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getEntry());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<Inverse<VPRegionBlock *>>
+ : public GraphTraits<Inverse<VPBlockBase *>> {
+ using GraphRef = VPRegionBlock *;
+ using nodes_iterator = df_iterator<NodeRef>;
+
+ static NodeRef getEntryNode(Inverse<GraphRef> N) {
+ return N.Graph->getExit();
+ }
+
+ static nodes_iterator nodes_begin(GraphRef N) {
+ return nodes_iterator::begin(N->getExit());
+ }
+
+ static nodes_iterator nodes_end(GraphRef N) {
+ // df_iterator::end() returns an empty iterator so the node used doesn't
+ // matter.
+ return nodes_iterator::end(N);
+ }
+};
+
//===----------------------------------------------------------------------===//
// VPlan Utilities
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
new file mode 100644
index 000000000000..1b81097b6d31
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -0,0 +1,41 @@
+//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements dominator tree analysis for a single level of a VPlan's
+/// H-CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+
+#include "VPlan.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/IR/Dominators.h"
+
+namespace llvm {
+
+/// Template specialization of the standard LLVM dominator tree utility for
+/// VPBlockBases.
+using VPDominatorTree = DomTreeBase<VPBlockBase>;
+
+using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>;
+
+/// Template specializations of GraphTraits for VPDomTreeNode.
+template <>
+struct GraphTraits<VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const VPDomTreeNode *>
+ : public DomTreeGraphTraitsBase<const VPDomTreeNode,
+ VPDomTreeNode::const_iterator> {};
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 08129b74cddf..b6307acb9474 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
return TopRegion;
}
+VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
+ PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
+ return PCFGBuilder.buildPlainCFG();
+}
+
// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) {
+void VPlanHCFGBuilder::buildHierarchicalCFG() {
// Build Top Region enclosing the plain CFG and set it as VPlan entry.
- PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
- VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG();
+ VPRegionBlock *TopRegion = buildPlainCFG();
Plan.setEntry(TopRegion);
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
Verifier.verifyHierarchicalCFG(TopRegion);
+
+ // Compute plain CFG dom tree for VPLInfo.
+ VPDomTree.recalculate(*TopRegion);
+ LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
+ VPDomTree.print(dbgs()));
+
+ // Compute VPLInfo and keep it in Plan.
+ VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
+ VPLInfo.analyze(VPDomTree);
+ LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
+ VPLInfo.print(dbgs()));
}
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index c4e69843615a..3f11dcb5164d 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -26,14 +26,18 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
#include "VPlan.h"
+#include "VPlanDominatorTree.h"
#include "VPlanVerifier.h"
namespace llvm {
class Loop;
+class VPlanTestBase;
/// Main class to build the VPlan H-CFG for an incoming IR.
class VPlanHCFGBuilder {
+ friend VPlanTestBase;
+
private:
// The outermost loop of the input loop nest considered for vectorization.
Loop *TheLoop;
@@ -41,14 +45,27 @@ private:
// Loop Info analysis.
LoopInfo *LI;
+ // The VPlan that will contain the H-CFG we are building.
+ VPlan &Plan;
+
// VPlan verifier utility.
VPlanVerifier Verifier;
+ // Dominator analysis for VPlan plain CFG to be used in the
+ // construction of the H-CFG. This analysis is no longer valid once regions
+ // are introduced.
+ VPDominatorTree VPDomTree;
+
+ /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion)
+ /// enclosing the plain CFG.
+ VPRegionBlock *buildPlainCFG();
+
public:
- VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {}
+ VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
+ : TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update \p Plan accordingly.
- void buildHierarchicalCFG(VPlan &Plan);
+ /// Build H-CFG for TheLoop and update Plan accordingly.
+ void buildHierarchicalCFG();
};
} // namespace llvm
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
new file mode 100644
index 000000000000..5c2485fc2145
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -0,0 +1,45 @@
+//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a
+/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization
+/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further
+/// information can be found in VectorizationPlanner.rst.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+
+#include "llvm/Analysis/LoopInfoImpl.h"
+
+namespace llvm {
+class VPBlockBase;
+
+/// Hold analysis information for every loop detected by VPLoopInfo. It is an
+/// instantiation of LoopBase.
+class VPLoop : public LoopBase<VPBlockBase, VPLoop> {
+private:
+ friend class LoopInfoBase<VPBlockBase, VPLoop>;
+ explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {}
+};
+
+/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based
+/// Hierarchical CFG. It is a specialization of LoopInfoBase class.
+// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which
+// is the same as the incoming IR CFG. If it's more efficient than running the
+// whole loop detection algorithm, we may want to create a mechanism to
+// translate LoopInfo into VPLoopInfo. However, that would require significant
+// changes in LoopInfoBase class.
+typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo;
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H