1 files changed, 134 insertions, 20 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 3913cc3f107c3..7570d22a803cb 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -25,6 +25,8 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
@@ -34,7 +36,6 @@ namespace llvm {
 class Function;
 class GlobalValue;
 class Loop;
-class PreservedAnalyses;
 class Type;
 class User;
 class Value;
@@ -165,6 +166,14 @@ public:
   /// This overload allows specifying a set of candidate argument values.
   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
 
+  /// \returns A value by which our inlining threshold should be multiplied.
+  /// This is primarily used to bump up the inlining threshold wholesale on
+  /// targets where calls are unusually expensive.
+  ///
+  /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
+  /// individual classes of instructions would be better.
+  unsigned getInliningThresholdMultiplier() const;
+
   /// \brief Estimate the cost of an intrinsic when lowered.
   ///
   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
@@ -260,6 +269,10 @@ public:
     // (set to UINT_MAX to disable). This does not apply in cases where the
     // loop is being fully unrolled.
     unsigned MaxCount;
+    /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
+    /// applies even if full unrolling is selected. This allows a target to fall
+    /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
+    unsigned FullUnrollMaxCount;
     /// Allow partial unrolling (unrolling of loops to expand the size of the
     /// loop body, not only to eliminate small constant-trip-count loops).
     bool Partial;
@@ -267,9 +280,14 @@ public:
     /// loop body even when the number of loop iterations is not known at
     /// compile time).
     bool Runtime;
+    /// Allow generation of a loop remainder (extra iterations after unroll).
+    bool AllowRemainder;
     /// Allow emitting expensive instructions (such as divisions) when computing
     /// the trip count of a loop for runtime unrolling.
     bool AllowExpensiveTripCount;
+    /// Apply loop unroll on any kind of loop
+    /// (mainly to loops that fail runtime unrolling).
+    bool Force;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling
@@ -313,8 +331,7 @@ public:
                              unsigned AddrSpace = 0) const;
 
   /// \brief Return true if the target supports masked load/store
-  /// AVX2 and AVX-512 targets allow masks for consecutive load and store for
-  /// 32 and 64 bit elements.
+  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
   bool isLegalMaskedStore(Type *DataType) const;
   bool isLegalMaskedLoad(Type *DataType) const;
 
@@ -362,6 +379,20 @@ public:
   /// \brief Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
 
+  /// \brief Indicate that it is potentially unsafe to automatically vectorize
+  /// floating-point operations because the semantics of vector and scalar
+  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
+  /// does not support IEEE-754 denormal numbers, while depending on the
+  /// platform, scalar floating-point math does.
+  /// This applies to floating-point math operations and calls, not memory
+  /// operations, shuffles, or casts.
+  bool isFPVectorizationPotentiallyUnsafe() const;
+
+  /// \brief Determine if the target supports unaligned memory accesses.
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
+                                      unsigned Alignment = 1,
+                                      bool *Fast = nullptr) const;
+
   /// \brief Return hardware support for population count.
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
@@ -383,6 +414,16 @@ public:
                     Type *Ty) const;
   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                     Type *Ty) const;
+
+  /// \brief Return the expected cost for the given integer when optimising
+  /// for size. This is different than the other integer immediate cost
+  /// functions in that it is subtarget agnostic. This is useful when you e.g.
+  /// target one ISA such as Aarch32 but smaller encodings could be possible
+  /// with another such as Thumb. This return value is used as a penalty when
+  /// the total costs for a constant is calculated (the bigger the cost, the
+  /// more beneficial constant hoisting is).
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) const;
   /// @}
 
   /// \name Vector Target Information
@@ -416,6 +457,27 @@ public:
   /// \return The width of the largest scalar or vector register type.
   unsigned getRegisterBitWidth(bool Vector) const;
 
+  /// \return The bitwidth of the largest vector type that should be used to
+  /// load/store in the given address space.
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+
+  /// \return The size of a cache line in bytes.
+  unsigned getCacheLineSize() const;
+
+  /// \return How much before a load we should place the prefetch instruction.
+  /// This is currently measured in number of instructions.
+  unsigned getPrefetchDistance() const;
+
+  /// \return Some HW prefetchers can handle accesses up to a certain constant
+  /// stride.  This is the minimum stride in bytes where it makes sense to start
+  /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
+  unsigned getMinPrefetchStride() const;
+
+  /// \return The maximum number of iterations to prefetch ahead.  If the
+  /// required number of iterations is more than this number, no prefetching is
+  /// performed.
+  unsigned getMaxPrefetchIterationsAhead() const;
+
   /// \return The maximum interleave factor that any transform should try to
   /// perform for this target. This number depends on the level of parallelism
   /// and the number of execution units in the CPU.
@@ -438,6 +500,11 @@ public:
   /// zext, etc.
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
 
+  /// \return The expected cost of a sign- or zero-extended vector extract. Use
+  /// -1 to indicate that there is no information about the index value.
+  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
+                               unsigned Index = -1) const;
+
   /// \return The expected cost of control-flow related instructions such as
   /// Phi, Ret, Br.
   int getCFInstrCost(unsigned Opcode) const;
@@ -497,11 +564,11 @@ public:
 
   /// \returns The cost of Intrinsic instructions. Types analysis only.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys) const;
+                            ArrayRef<Type *> Tys, FastMathFlags FMF) const;
 
   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args) const;
+                            ArrayRef<Value *> Args, FastMathFlags FMF) const;
 
   /// \returns The cost of Call instructions.
   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -569,6 +636,7 @@ public:
   virtual int getCallCost(const Function *F, int NumArgs) = 0;
   virtual int getCallCost(const Function *F,
                           ArrayRef<const Value *> Arguments) = 0;
+  virtual unsigned getInliningThresholdMultiplier() = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                                ArrayRef<Type *> ParamTys) = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -599,9 +667,16 @@ public:
   virtual bool shouldBuildLookupTables() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
+  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
+  virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+                                              unsigned AddressSpace,
+                                              unsigned Alignment,
+                                              bool *Fast) = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
   virtual int getFPOpCost(Type *Ty) = 0;
+  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                                    Type *Ty) = 0;
   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                             Type *Ty) = 0;
@@ -609,6 +684,11 @@ public:
                             Type *Ty) = 0;
   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
+  virtual unsigned getCacheLineSize() = 0;
+  virtual unsigned getPrefetchDistance() = 0;
+  virtual unsigned getMinPrefetchStride() = 0;
+  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
   virtual unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -618,6 +698,8 @@ public:
   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                              Type *SubTp) = 0;
   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+                                       VectorType *VecTy, unsigned Index) = 0;
   virtual int getCFInstrCost(unsigned Opcode) = 0;
   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                  Type *CondTy) = 0;
@@ -639,9 +721,11 @@ public:
   virtual int getReductionCost(unsigned Opcode, Type *Ty,
                                bool IsPairwiseForm) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Type *> Tys) = 0;
+                                    ArrayRef<Type *> Tys,
+                                    FastMathFlags FMF) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Value *> Args) = 0;
+                                    ArrayRef<Value *> Args,
+                                    FastMathFlags FMF) = 0;
   virtual int getCallInstrCost(Function *F, Type *RetTy,
                                ArrayRef<Type *> Tys) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -684,6 +768,9 @@ public:
                   ArrayRef<const Value *> Arguments) override {
     return Impl.getCallCost(F, Arguments);
   }
+  unsigned getInliningThresholdMultiplier() override {
+    return Impl.getInliningThresholdMultiplier();
+  }
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                        ArrayRef<Type *> ParamTys) override {
     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
@@ -751,6 +838,14 @@ public:
   bool enableInterleavedAccessVectorization() override {
     return Impl.enableInterleavedAccessVectorization();
   }
+  bool isFPVectorizationPotentiallyUnsafe() override {
+    return Impl.isFPVectorizationPotentiallyUnsafe();
+  }
+  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+                                      unsigned Alignment, bool *Fast) override {
+    return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+                                               Alignment, Fast);
+  }
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
     return Impl.getPopcntSupport(IntTyWidthInBit);
   }
@@ -758,6 +853,10 @@ public:
 
   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
 
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) override {
+    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
+  }
   int getIntImmCost(const APInt &Imm, Type *Ty) override {
     return Impl.getIntImmCost(Imm, Ty);
   }
@@ -775,6 +874,21 @@ public:
   unsigned getRegisterBitWidth(bool Vector) override {
     return Impl.getRegisterBitWidth(Vector);
   }
+
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
+    return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
+  }
+
+  unsigned getCacheLineSize() override {
+    return Impl.getCacheLineSize();
+  }
+  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
+  unsigned getMinPrefetchStride() override {
+    return Impl.getMinPrefetchStride();
+  }
+  unsigned getMaxPrefetchIterationsAhead() override {
+    return Impl.getMaxPrefetchIterationsAhead();
+  }
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }
@@ -793,6 +907,10 @@ public:
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
     return Impl.getCastInstrCost(Opcode, Dst, Src);
   }
+  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
+                               unsigned Index) override {
+    return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+  }
   int getCFInstrCost(unsigned Opcode) override {
     return Impl.getCFInstrCost(Opcode);
   }
@@ -826,13 +944,14 @@ public:
                        bool IsPairwiseForm) override {
     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
   }
-  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
+                            FastMathFlags FMF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
   }
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Args);
+                            ArrayRef<Value *> Args,
+                            FastMathFlags FMF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
   }
   int getCallInstrCost(Function *F, Type *RetTy,
                        ArrayRef<Type *> Tys) override {
@@ -876,16 +995,10 @@ TargetTransformInfo::TargetTransformInfo(T Impl)
 /// is done in a subtarget specific way and LLVM supports compiling different
 /// functions targeting different subtargets in order to support runtime
 /// dispatch according to the observed subtarget.
-class TargetIRAnalysis {
+class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
 public:
   typedef TargetTransformInfo Result;
 
-  /// \brief Opaque, unique identifier for this analysis pass.
-  static void *ID() { return (void *)&PassID; }
-
-  /// \brief Provide access to a name for this pass for debugging purposes.
-  static StringRef name() { return "TargetIRAnalysis"; }
-
   /// \brief Default construct a target IR analysis.
   ///
   /// This will use the module's datalayout to construct a baseline
@@ -912,9 +1025,10 @@ public:
     return *this;
   }
 
-  Result run(const Function &F);
+  Result run(const Function &F, AnalysisManager<Function> &);
 
 private:
+  friend AnalysisInfoMixin<TargetIRAnalysis>;
   static char PassID;
 
   /// \brief The callback used to produce a result.