diff options
Diffstat (limited to 'include/llvm/Analysis/TargetTransformInfo.h')
-rw-r--r-- | include/llvm/Analysis/TargetTransformInfo.h | 235 |
1 files changed, 195 insertions, 40 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 223175d17c2d..7574b811bc1c 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -1,9 +1,8 @@ //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -28,6 +27,10 @@ #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/AssumptionCache.h" #include <functional> namespace llvm { @@ -36,6 +39,8 @@ namespace Intrinsic { enum ID : unsigned; } +class AssumptionCache; +class BranchInst; class Function; class GlobalValue; class IntrinsicInst; @@ -45,6 +50,7 @@ class SCEV; class ScalarEvolution; class StoreInst; class SwitchInst; +class TargetLibraryInfo; class Type; class User; class Value; @@ -73,6 +79,30 @@ struct MemIntrinsicInfo { } }; +/// Attributes of a target dependent hardware loop. +struct HardwareLoopInfo { + HardwareLoopInfo() = delete; + HardwareLoopInfo(Loop *L) : L(L) {} + Loop *L = nullptr; + BasicBlock *ExitBlock = nullptr; + BranchInst *ExitBranch = nullptr; + const SCEV *ExitCount = nullptr; + IntegerType *CountType = nullptr; + Value *LoopDecrement = nullptr; // Decrement the loop counter by this + // value in every iteration. + bool IsNestingLegal = false; // Can a hardware loop be a parent to + // another hardware loop? + bool CounterInReg = false; // Should loop counter be updated in + // the loop via a phi? + bool PerformEntryTest = false; // Generate the intrinsic which also performs + // icmp ne zero on the loop counter value and + // produces an i1 to guard the loop entry. + bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, + DominatorTree &DT, bool ForceNestedLoop = false, + bool ForceHardwareLoopPHI = false); + bool canAnalyze(LoopInfo &LI); +}; + /// This pass provides access to the codegen interfaces that are needed /// for IR-level transformations. class TargetTransformInfo { @@ -81,7 +111,7 @@ public: /// API below. /// /// This is used by targets to construct a TTI wrapping their target-specific - /// implementaion that encodes appropriate costs for their target. + /// implementation that encodes appropriate costs for their target. template <typename T> TargetTransformInfo(T Impl); /// Construct a baseline TTI object using a minimal implementation of @@ -209,18 +239,21 @@ public: /// This is the most basic query for estimating call cost: it only knows the /// function type and (potentially) the number of arguments at the call site. /// The latter is only interesting for varargs function types. - int getCallCost(FunctionType *FTy, int NumArgs = -1) const; + int getCallCost(FunctionType *FTy, int NumArgs = -1, + const User *U = nullptr) const; /// Estimate the cost of calling a specific function when lowered. /// /// This overload adds the ability to reason about the particular function /// being called in the event it is a library call with special lowering. - int getCallCost(const Function *F, int NumArgs = -1) const; + int getCallCost(const Function *F, int NumArgs = -1, + const User *U = nullptr) const; /// Estimate the cost of calling a specific function when lowered. /// /// This overload allows specifying a set of candidate argument values. - int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const; + int getCallCost(const Function *F, ArrayRef<const Value *> Arguments, + const User *U = nullptr) const; /// \returns A value by which our inlining threshold should be multiplied. /// This is primarily used to bump up the inlining threshold wholesale on @@ -230,17 +263,35 @@ public: /// individual classes of instructions would be better. unsigned getInliningThresholdMultiplier() const; + /// \returns Vector bonus in percent. + /// + /// Vector bonuses: We want to more aggressively inline vector-dense kernels + /// and apply this bonus based on the percentage of vector instructions. A + /// bonus is applied if the vector instructions exceed 50% and half that amount + /// is applied if it exceeds 10%. Note that these bonuses are some what + /// arbitrary and evolved over time by accident as much as because they are + /// principled bonuses. + /// FIXME: It would be nice to base the bonus values on something more + /// scientific. A target may has no bonus on vector instructions. + int getInlinerVectorBonusPercent() const; + /// Estimate the cost of an intrinsic when lowered. /// /// Mirrors the \c getCallCost method but uses an intrinsic identifier. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) const; + ArrayRef<Type *> ParamTys, + const User *U = nullptr) const; /// Estimate the cost of an intrinsic when lowered. /// /// Mirrors the \c getCallCost method but uses an intrinsic identifier. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) const; + ArrayRef<const Value *> Arguments, + const User *U = nullptr) const; + + /// \return the expected cost of a memcpy, which could e.g. depend on the + /// source/destination type and alignment and the number of bytes copied. + int getMemcpyCost(const Instruction *I) const; /// \return The estimated number of case clusters when lowering \p 'SI'. /// \p JTSize Set a jump table size only when \p SI is suitable for a jump @@ -296,7 +347,7 @@ public: // Returns true for the target specific // set of operations which produce uniform result - // even taking non-unform arguments + // even taking non-uniform arguments bool isAlwaysUniform(const Value *V) const; /// Returns the address space ID for a target's 'flat' address space. Note @@ -437,6 +488,13 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const; + /// Query the target whether it would be profitable to convert the given loop + /// into a hardware loop. + bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *LibInfo, + HardwareLoopInfo &HWLoopInfo) const; + /// @} /// \name Scalar Target Information @@ -483,21 +541,40 @@ public: /// calculation for the instructions in a loop. bool canMacroFuseCmp() const; + /// Return true if the target can save a compare for loop count, for example + /// hardware loop saves a compare. + bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) const; + /// \return True is LSR should make efforts to create/preserve post-inc /// addressing mode expressions. bool shouldFavorPostInc() const; - /// Return true if the target supports masked load/store - /// AVX2 and AVX-512 targets allow masks for consecutive load and store + /// Return true if LSR should make efforts to generate indexed addressing + /// modes that operate across loop iterations. + bool shouldFavorBackedgeIndex(const Loop *L) const; + + /// Return true if the target supports masked load. bool isLegalMaskedStore(Type *DataType) const; + /// Return true if the target supports masked store. bool isLegalMaskedLoad(Type *DataType) const; - /// Return true if the target supports masked gather/scatter - /// AVX-512 fully supports gather and scatter for vectors with 32 and 64 - /// bits scalar type. + /// Return true if the target supports nontemporal store. + bool isLegalNTStore(Type *DataType, unsigned Alignment) const; + /// Return true if the target supports nontemporal load. + bool isLegalNTLoad(Type *DataType, unsigned Alignment) const; + + /// Return true if the target supports masked scatter. bool isLegalMaskedScatter(Type *DataType) const; + /// Return true if the target supports masked gather. bool isLegalMaskedGather(Type *DataType) const; + /// Return true if the target supports masked compress store. + bool isLegalMaskedCompressStore(Type *DataType) const; + /// Return true if the target supports masked expand load. + bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This @@ -576,17 +653,35 @@ public: /// Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; - /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is - /// true if this is the expansion of memcmp(p1, p2, s) == 0. + /// Returns options for expansion of memcmp. IsZeroCmp is + // true if this is the expansion of memcmp(p1, p2, s) == 0. struct MemCmpExpansionOptions { + // Return true if memcmp expansion is enabled. + operator bool() const { return MaxNumLoads > 0; } + + // Maximum number of load operations. + unsigned MaxNumLoads = 0; + // The list of available load sizes (in bytes), sorted in decreasing order. SmallVector<unsigned, 8> LoadSizes; + + // For memcmp expansion when the memcmp result is only compared equal or + // not-equal to 0, allow up to this number of load pairs per block. As an + // example, this may allow 'memcmp(a, b, 3) == 0' in a single block: + // a0 = load2bytes &a[0] + // b0 = load2bytes &b[0] + // a2 = load1byte &a[2] + // b2 = load1byte &b[2] + // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 + unsigned NumLoadsPerBlock = 1; + // Set to true to allow overlapping loads. For example, 7-byte compares can // be done with two 4-byte compares instead of 4+2+1-byte compares. This // requires all loads in LoadSizes to be doable in an unaligned way. bool AllowOverlappingLoads = false; }; - const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const; + MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -700,7 +795,7 @@ public: bool shouldMaximizeVectorBandwidth(bool OptSize) const; /// \return The minimum vectorization factor for types of given element - /// bit width, or 0 if there is no mimimum VF. The returned value only + /// bit width, or 0 if there is no minimum VF. The returned value only /// applies when shouldMaximizeVectorBandwidth returns true. unsigned getMinimumVF(unsigned ElemWidth) const; @@ -1005,6 +1100,11 @@ public: /// \returns True if the target wants to expand the given reduction intrinsic /// into a shuffle sequence. bool shouldExpandReduction(const IntrinsicInst *II) const; + + /// \returns the size cost of rematerializing a GlobalValue address relative + /// to a stack reload. + unsigned getGISelRematGlobalCost() const; + /// @} private: @@ -1035,15 +1135,18 @@ public: virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; - virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; - virtual int getCallCost(const Function *F, int NumArgs) = 0; + virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0; + virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0; virtual int getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) = 0; + ArrayRef<const Value *> Arguments, const User *U) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; + virtual int getInlinerVectorBonusPercent() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) = 0; + ArrayRef<Type *> ParamTys, const User *U) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) = 0; + ArrayRef<const Value *> Arguments, + const User *U) = 0; + virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) = 0; virtual int @@ -1055,6 +1158,10 @@ public: virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; + virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *LibInfo, + HardwareLoopInfo &HWLoopInfo) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -1065,11 +1172,19 @@ public: virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool canMacroFuseCmp() = 0; + virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; + virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; + virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0; + virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; + virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; @@ -1092,8 +1207,8 @@ public: unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; - virtual const MemCmpExpansionOptions *enableMemCmpExpansion( - bool IsZeroCmp) const = 0; + virtual MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool enableMaskedInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -1210,6 +1325,7 @@ public: virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual unsigned getGISelRematGlobalCost() const = 0; virtual int getInstructionLatency(const Instruction *I) = 0; }; @@ -1235,26 +1351,33 @@ public: int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); } - int getCallCost(FunctionType *FTy, int NumArgs) override { - return Impl.getCallCost(FTy, NumArgs); + int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override { + return Impl.getCallCost(FTy, NumArgs, U); } - int getCallCost(const Function *F, int NumArgs) override { - return Impl.getCallCost(F, NumArgs); + int getCallCost(const Function *F, int NumArgs, const User *U) override { + return Impl.getCallCost(F, NumArgs, U); } int getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) override { - return Impl.getCallCost(F, Arguments); + ArrayRef<const Value *> Arguments, const User *U) override { + return Impl.getCallCost(F, Arguments, U); } unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); } + int getInlinerVectorBonusPercent() override { + return Impl.getInlinerVectorBonusPercent(); + } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> ParamTys) override { - return Impl.getIntrinsicCost(IID, RetTy, ParamTys); + ArrayRef<Type *> ParamTys, const User *U = nullptr) override { + return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U); } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<const Value *> Arguments) override { - return Impl.getIntrinsicCost(IID, RetTy, Arguments); + ArrayRef<const Value *> Arguments, + const User *U = nullptr) override { + return Impl.getIntrinsicCost(IID, RetTy, Arguments, U); + } + int getMemcpyCost(const Instruction *I) override { + return Impl.getMemcpyCost(I); } int getUserCost(const User *U, ArrayRef<const Value *> Operands) override { return Impl.getUserCost(U, Operands); @@ -1279,6 +1402,12 @@ public: UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, SE, UP); } + bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *LibInfo, + HardwareLoopInfo &HWLoopInfo) override { + return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } @@ -1299,21 +1428,42 @@ public: bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } + bool canSaveCmp(Loop *L, BranchInst **BI, + ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) override { + return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); + } bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } + bool shouldFavorBackedgeIndex(const Loop *L) const override { + return Impl.shouldFavorBackedgeIndex(L); + } bool isLegalMaskedStore(Type *DataType) override { return Impl.isLegalMaskedStore(DataType); } bool isLegalMaskedLoad(Type *DataType) override { return Impl.isLegalMaskedLoad(DataType); } + bool isLegalNTStore(Type *DataType, unsigned Alignment) override { + return Impl.isLegalNTStore(DataType, Alignment); + } + bool isLegalNTLoad(Type *DataType, unsigned Alignment) override { + return Impl.isLegalNTLoad(DataType, Alignment); + } bool isLegalMaskedScatter(Type *DataType) override { return Impl.isLegalMaskedScatter(DataType); } bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool isLegalMaskedCompressStore(Type *DataType) override { + return Impl.isLegalMaskedCompressStore(DataType); + } + bool isLegalMaskedExpandLoad(Type *DataType) override { + return Impl.isLegalMaskedExpandLoad(DataType); + } bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } @@ -1368,9 +1518,9 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } - const MemCmpExpansionOptions *enableMemCmpExpansion( - bool IsZeroCmp) const override { - return Impl.enableMemCmpExpansion(IsZeroCmp); + MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const override { + return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp); } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); @@ -1617,6 +1767,11 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } + + unsigned getGISelRematGlobalCost() const override { + return Impl.getGISelRematGlobalCost(); + } + int getInstructionLatency(const Instruction *I) override { return Impl.getInstructionLatency(I); } |