diff options
Diffstat (limited to 'lib/Transforms/Vectorize/VPlan.h')
-rw-r--r-- | lib/Transforms/Vectorize/VPlan.h | 216 |
1 files changed, 209 insertions, 7 deletions
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 883e6f52369a..5c1b4a83c30e 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -38,6 +38,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include <algorithm> #include <cassert> @@ -52,12 +53,14 @@ class LoopVectorizationCostModel; class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -class InterleaveGroup; +template <class T> class InterleaveGroup; +class LoopInfo; class raw_ostream; class Value; class VPBasicBlock; class VPRegionBlock; class VPlan; +class VPlanSlp; /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: @@ -293,6 +296,10 @@ struct VPTransformState { /// of replication, maps the BasicBlock of the last replica created. SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; + /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed + /// up at the end of vector code generation. + SmallVector<VPBasicBlock *, 8> VPBBsToFix; + CFGState() = default; } CFG; @@ -313,6 +320,9 @@ struct VPTransformState { /// Values they correspond to. VPValue2ValueTy VPValue2Value; + /// Hold the trip count of the scalar loop. + Value *TripCount = nullptr; + /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. InnerLoopVectorizer *ILV; @@ -600,10 +610,16 @@ public: /// the VPInstruction is also a single def-use vertex. class VPInstruction : public VPUser, public VPRecipeBase { friend class VPlanHCFGTransforms; + friend class VPlanSlp; public: /// VPlan opcodes, extending LLVM IR with idiomatics instructions. - enum { Not = Instruction::OtherOpsEnd + 1 }; + enum { + Not = Instruction::OtherOpsEnd + 1, + ICmpULE, + SLPLoad, + SLPStore, + }; private: typedef unsigned char OpcodeTy; @@ -613,6 +629,13 @@ private: /// modeled instruction. void generateInstruction(VPTransformState &State, unsigned Part); +protected: + Instruction *getUnderlyingInstr() { + return cast_or_null<Instruction>(getUnderlyingValue()); + } + + void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } + public: VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands) : VPUser(VPValue::VPInstructionSC, Operands), @@ -626,6 +649,11 @@ public: return V->getVPValueID() == VPValue::VPInstructionSC; } + VPInstruction *clone() const { + SmallVector<VPValue *, 2> Operands(operands()); + return new VPInstruction(Opcode, Operands); + } + /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *R) { return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC; @@ -643,6 +671,14 @@ public: /// Print the VPInstruction. void print(raw_ostream &O) const; + + /// Return true if this instruction may modify memory. + bool mayWriteToMemory() const { + // TODO: we can use attributes of the called function to rule out memory + // modifications. + return Opcode == Instruction::Store || Opcode == Instruction::Call || + Opcode == Instruction::Invoke || Opcode == SLPStore; + } }; /// VPWidenRecipe is a recipe for producing a copy of vector type for each @@ -764,11 +800,15 @@ public: /// or stores into one wide load/store and shuffles. class VPInterleaveRecipe : public VPRecipeBase { private: - const InterleaveGroup *IG; + const InterleaveGroup<Instruction> *IG; + std::unique_ptr<VPUser> User; public: - VPInterleaveRecipe(const InterleaveGroup *IG) - : VPRecipeBase(VPInterleaveSC), IG(IG) {} + VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask) + : VPRecipeBase(VPInterleaveSC), IG(IG) { + if (Mask) // Create a VPInstruction to register as a user of the mask. + User.reset(new VPUser({Mask})); + } ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -782,7 +822,7 @@ public: /// Print the recipe. void print(raw_ostream &O, const Twine &Indent) const override; - const InterleaveGroup *getInterleaveGroup() { return IG; } + const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } }; /// VPReplicateRecipe replicates a given instruction producing multiple scalar @@ -1107,6 +1147,10 @@ private: // (operators '==' and '<'). SmallPtrSet<VPValue *, 16> VPExternalDefs; + /// Represents the backedge taken count of the original loop, for folding + /// the tail. + VPValue *BackedgeTakenCount = nullptr; + /// Holds a mapping between Values and their corresponding VPValue inside /// VPlan. Value2VPValueTy Value2VPValue; @@ -1114,6 +1158,9 @@ private: /// Holds the VPLoopInfo analysis for this VPlan. VPLoopInfo VPLInfo; + /// Holds the condition bit values built during VPInstruction to VPRecipe transformation. + SmallVector<VPValue *, 4> VPCBVs; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1121,9 +1168,14 @@ public: if (Entry) VPBlockBase::deleteCFG(Entry); for (auto &MapEntry : Value2VPValue) - delete MapEntry.second; + if (MapEntry.second != BackedgeTakenCount) + delete MapEntry.second; + if (BackedgeTakenCount) + delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not. for (VPValue *Def : VPExternalDefs) delete Def; + for (VPValue *CBV : VPCBVs) + delete CBV; } /// Generate the IR code for this VPlan. @@ -1134,6 +1186,13 @@ public: VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; } + /// The backedge taken count of the original loop. + VPValue *getOrCreateBackedgeTakenCount() { + if (!BackedgeTakenCount) + BackedgeTakenCount = new VPValue(); + return BackedgeTakenCount; + } + void addVF(unsigned VF) { VFs.insert(VF); } bool hasVF(unsigned VF) { return VFs.count(VF); } @@ -1148,6 +1207,11 @@ public: VPExternalDefs.insert(VPVal); } + /// Add \p CBV to the vector of condition bit values. + void addCBV(VPValue *CBV) { + VPCBVs.push_back(CBV); + } + void addVPValue(Value *V) { assert(V && "Trying to add a null Value to VPlan"); assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); @@ -1429,6 +1493,144 @@ public: } }; +class VPInterleavedAccessInfo { +private: + DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> + InterleaveGroupMap; + + /// Type for mapping of instruction based interleave groups to VPInstruction + /// interleave groups + using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, + InterleaveGroup<VPInstruction> *>; + + /// Recursively \p Region and populate VPlan based interleave groups based on + /// \p IAI. + void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, + InterleavedAccessInfo &IAI); + /// Recursively traverse \p Block and populate VPlan based interleave groups + /// based on \p IAI. + void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, + InterleavedAccessInfo &IAI); + +public: + VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); + + ~VPInterleavedAccessInfo() { + SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) + delete Ptr; + } + + /// Get the interleave group that \p Instr belongs to. + /// + /// \returns nullptr if doesn't have such group. + InterleaveGroup<VPInstruction> * + getInterleaveGroup(VPInstruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } +}; + +/// Class that maps (parts of) an existing VPlan to trees of combined +/// VPInstructions. +class VPlanSlp { +private: + enum class OpMode { Failed, Load, Opcode }; + + /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as + /// DenseMap keys. + struct BundleDenseMapInfo { + static SmallVector<VPValue *, 4> getEmptyKey() { + return {reinterpret_cast<VPValue *>(-1)}; + } + + static SmallVector<VPValue *, 4> getTombstoneKey() { + return {reinterpret_cast<VPValue *>(-2)}; + } + + static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { + return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); + } + + static bool isEqual(const SmallVector<VPValue *, 4> &LHS, + const SmallVector<VPValue *, 4> &RHS) { + return LHS == RHS; + } + }; + + /// Mapping of values in the original VPlan to a combined VPInstruction. + DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> + BundleToCombined; + + VPInterleavedAccessInfo &IAI; + + /// Basic block to operate on. For now, only instructions in a single BB are + /// considered. + const VPBasicBlock &BB; + + /// Indicates whether we managed to combine all visited instructions or not. + bool CompletelySLP = true; + + /// Width of the widest combined bundle in bits. + unsigned WidestBundleBits = 0; + + using MultiNodeOpTy = + typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; + + // Input operand bundles for the current multi node. Each multi node operand + // bundle contains values not matching the multi node's opcode. They will + // be reordered in reorderMultiNodeOps, once we completed building a + // multi node. + SmallVector<MultiNodeOpTy, 4> MultiNodeOps; + + /// Indicates whether we are building a multi node currently. + bool MultiNodeActive = false; + + /// Check if we can vectorize Operands together. + bool areVectorizable(ArrayRef<VPValue *> Operands) const; + + /// Add combined instruction \p New for the bundle \p Operands. + void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); + + /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. + VPInstruction *markFailed(); + + /// Reorder operands in the multi node to maximize sequential memory access + /// and commutative operations. + SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); + + /// Choose the best candidate to use for the lane after \p Last. The set of + /// candidates to choose from are values with an opcode matching \p Last's + /// or loads consecutive to \p Last. + std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, + SmallPtrSetImpl<VPValue *> &Candidates, + VPInterleavedAccessInfo &IAI); + + /// Print bundle \p Values to dbgs(). + void dumpBundle(ArrayRef<VPValue *> Values); + +public: + VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} + + ~VPlanSlp() { + for (auto &KV : BundleToCombined) + delete KV.second; + } + + /// Tries to build an SLP tree rooted at \p Operands and returns a + /// VPInstruction combining \p Operands, if they can be combined. + VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); + + /// Return the width of the widest combined bundle in bits. + unsigned getWidestBundleBits() const { return WidestBundleBits; } + + /// Return true if all visited instruction can be combined. + bool isCompletelySLP() const { return CompletelySLP; } +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H |