1 files changed, 157 insertions, 34 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 24edd3826a2e..c20f20cfbe4d 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,21 +23,28 @@
 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 
 #include "llvm/ADT/Optional.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
 
 namespace llvm {
 
+namespace Intrinsic {
+enum ID : unsigned;
+}
+
 class Function;
 class GlobalValue;
+class IntrinsicInst;
+class LoadInst;
 class Loop;
-class ScalarEvolution;
 class SCEV;
+class ScalarEvolution;
+class StoreInst;
+class SwitchInst;
 class Type;
 class User;
 class Value;
@@ -107,6 +114,37 @@ public:
   /// \name Generic Target Information
   /// @{
 
+  /// \brief The kind of cost model.
+  ///
+  /// There are several different cost models that can be customized by the
+  /// target. The normalization of each cost model may be target specific.
+  enum TargetCostKind {
+    TCK_RecipThroughput, ///< Reciprocal throughput.
+    TCK_Latency,         ///< The latency of instruction.
+    TCK_CodeSize         ///< Instruction code size.
+  };
+
+  /// \brief Query the cost of a specified instruction.
+  ///
+  /// Clients should use this interface to query the cost of an existing
+  /// instruction. The instruction must have a valid parent (basic block).
+  ///
+  /// Note, this method does not cache the cost calculation and it
+  /// can be expensive in some cases.
+  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
+    switch (kind){
+    case TCK_RecipThroughput:
+      return getInstructionThroughput(I);
+
+    case TCK_Latency:
+      return getInstructionLatency(I);
+
+    case TCK_CodeSize:
+      return getUserCost(I);
+    }
+    llvm_unreachable("Unknown instruction cost kind");
+  }
+
   /// \brief Underlying constants for 'cost' values in this interface.
   ///
   /// Many APIs in this interface return a cost. This enum defines the
@@ -382,6 +420,8 @@ public:
     bool UpperBound;
     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     bool AllowPeeling;
+    /// Allow unrolling of all the iterations of the runtime loop remainder.
+    bool UnrollRemainder;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling
@@ -420,10 +460,12 @@ public:
   /// this target, for a load/store of the specified type.
   /// The type may be VoidTy, in which case only return true if the addressing
   /// mode is legal for a load/store of any legal type.
+  /// If target returns true in LSRWithInstrQueries(), I may be valid.
   /// TODO: Handle pre/postinc as well.
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale,
-                             unsigned AddrSpace = 0) const;
+                             unsigned AddrSpace = 0,
+                             Instruction *I = nullptr) const;
 
   /// \brief Return true if LSR cost of C1 is lower than C1.
   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
@@ -440,6 +482,20 @@ public:
   bool isLegalMaskedScatter(Type *DataType) const;
   bool isLegalMaskedGather(Type *DataType) const;
 
+  /// Return true if the target has a unified operation to calculate division
+  /// and remainder. If so, the additional implicit multiplication and
+  /// subtraction required to calculate a remainder from division are free. This
+  /// can enable more aggressive transformations for division and remainder than
+  /// would typically be allowed using throughput or size cost models.
+  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
+
+  /// Return true if the given instruction (assumed to be a memory access
+  /// instruction) has a volatile variant. If that's the case then we can avoid
+  /// addrspacecast to generic AS for volatile loads/stores. Default
+  /// implementation returns false, which prevents address space inference for
+  /// volatile loads/stores.
+  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
+
   /// Return true if target doesn't mind addresses in vectors.
   bool prefersVectorizedAddressing() const;
 
@@ -453,12 +509,12 @@ public:
                            bool HasBaseReg, int64_t Scale,
                            unsigned AddrSpace = 0) const;
 
-  /// \brief Return true if target supports the load / store
-  /// instruction with the given Offset on the form reg + Offset. It
-  /// may be that Offset is too big for a certain type (register
-  /// class).
-  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const;
-  
+  /// \brief Return true if the loop strength reduce pass should make
+  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
+  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
+  /// immediate offset and no index register.
+  bool LSRWithInstrQueries() const;
+
   /// \brief Return true if it's free to truncate a value of type Ty1 to type
   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
@@ -498,8 +554,13 @@ public:
   /// \brief Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
-  /// \brief Enable inline expansion of memcmp
-  bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const;
+  /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
+  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
+  struct MemCmpExpansionOptions {
+    // The list of available load sizes (in bytes), sorted in decreasing order.
+    SmallVector<unsigned, 8> LoadSizes;
+  };
+  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
 
   /// \brief Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
@@ -525,6 +586,12 @@ public:
   /// \brief Return true if the hardware has a fast square-root instruction.
   bool haveFastSqrt(Type *Ty) const;
 
+  /// Return true if it is faster to check if a floating-point value is NaN
+  /// (or not-NaN) versus a comparison against a constant FP zero value.
+  /// Targets should override this if materializing a 0.0 for comparison is
+  /// generally as cheap as checking for ordered/unordered.
+  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
+
   /// \brief Return the expected cost of supporting the floating point operation
   /// of the specified type.
   int getFPOpCost(Type *Ty) const;
@@ -599,6 +666,22 @@ public:
   /// \return The size of a cache line in bytes.
   unsigned getCacheLineSize() const;
 
+  /// The possible cache levels
+  enum class CacheLevel {
+    L1D,   // The L1 data cache
+    L2D,   // The L2 data cache
+
+    // We currently do not model L3 caches, as their sizes differ widely between
+    // microarchitectures. Also, we currently do not have a use for L3 cache
+    // size modeling yet.
+  };
+
+  /// \return The size of the cache level in bytes, if available.
+  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
+
+  /// \return The associativity of the cache level, if available.
+  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
+
   /// \return How much before a load we should place the prefetch instruction.
   /// This is currently measured in number of instructions.
   unsigned getPrefetchDistance() const;
@@ -699,11 +782,14 @@ public:
   ///
   /// Pairwise:
   ///  (v0, v1, v2, v3)
-  ///  ((v0+v1), (v2, v3), undef, undef)
+  ///  ((v0+v1), (v2+v3), undef, undef)
   /// Split:
   ///  (v0, v1, v2, v3)
   ///  ((v0+v2), (v1+v3), undef, undef)
-  int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
+  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+                                 bool IsPairwiseForm) const;
+  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
+                             bool IsUnsigned) const;
 
   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
   /// Three cases are handled: 1. scalar instruction 2. vector instruction
@@ -776,12 +862,6 @@ public:
                                          unsigned SrcAlign,
                                          unsigned DestAlign) const;
 
-  /// \returns True if we want to test the new memcpy lowering functionality in
-  /// Transform/Utils.
-  /// Temporary. Will be removed once we move to the new functionality and
-  /// remove the old.
-  bool useWideIRMemcpyLoopLowering() const;
-
   /// \returns True if the two functions have compatible attributes for inlining
   /// purposes.
   bool areInlineCompatible(const Function *Caller,
@@ -838,6 +918,14 @@ public:
   /// @}
 
 private:
+  /// \brief Estimate the latency of specified instruction.
+  /// Returns 1 as the default value.
+  int getInstructionLatency(const Instruction *I) const;
+
+  /// \brief Returns the expected throughput cost of the instruction.
+  /// Returns -1 if the cost is unknown.
+  int getInstructionThroughput(const Instruction *I) const;
+
   /// \brief The abstract base class used to type erase specific TTI
   /// implementations.
   class Concept;
@@ -882,18 +970,21 @@ public:
   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                      int64_t BaseOffset, bool HasBaseReg,
                                      int64_t Scale,
-                                     unsigned AddrSpace) = 0;
+                                     unsigned AddrSpace,
+                                     Instruction *I) = 0;
   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
                              TargetTransformInfo::LSRCost &C2) = 0;
   virtual bool isLegalMaskedStore(Type *DataType) = 0;
   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
   virtual bool isLegalMaskedGather(Type *DataType) = 0;
+  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
+  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                    int64_t BaseOffset, bool HasBaseReg,
                                    int64_t Scale, unsigned AddrSpace) = 0;
-  virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
+  virtual bool LSRWithInstrQueries() = 0;
   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
   virtual bool isProfitableToHoist(Instruction *I) = 0;
   virtual bool isTypeLegal(Type *Ty) = 0;
@@ -907,7 +998,8 @@ public:
                                                     unsigned VF) = 0;
   virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
-  virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0;
+  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
+      bool IsZeroCmp) const = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
@@ -917,6 +1009,7 @@ public:
                                               bool *Fast) = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
+  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
   virtual int getFPOpCost(Type *Ty) = 0;
   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                                     Type *Ty) = 0;
@@ -931,6 +1024,8 @@ public:
   virtual bool shouldConsiderAddressTypePromotion(
       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
   virtual unsigned getCacheLineSize() = 0;
+  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
+  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
   virtual unsigned getPrefetchDistance() = 0;
   virtual unsigned getMinPrefetchStride() = 0;
   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
@@ -965,8 +1060,10 @@ public:
                                          ArrayRef<unsigned> Indices,
                                          unsigned Alignment,
                                          unsigned AddressSpace) = 0;
-  virtual int getReductionCost(unsigned Opcode, Type *Ty,
-                               bool IsPairwiseForm) = 0;
+  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+                                         bool IsPairwiseForm) = 0;
+  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
+                                     bool IsPairwiseForm, bool IsUnsigned) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                       ArrayRef<Type *> Tys, FastMathFlags FMF,
                       unsigned ScalarizationCostPassed) = 0;
@@ -1009,6 +1106,7 @@ public:
   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+  virtual int getInstructionLatency(const Instruction *I) = 0;
 };
 
 template <typename T>
@@ -1085,9 +1183,10 @@ public:
   }
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale,
-                             unsigned AddrSpace) override {
+                             unsigned AddrSpace,
+                             Instruction *I) override {
     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
-                                      Scale, AddrSpace);
+                                      Scale, AddrSpace, I);
   }
   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
                      TargetTransformInfo::LSRCost &C2) override {
@@ -1105,6 +1204,12 @@ public:
   bool isLegalMaskedGather(Type *DataType) override {
     return Impl.isLegalMaskedGather(DataType);
   }
+  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
+    return Impl.hasDivRemOp(DataType, IsSigned);
+  }
+  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
+    return Impl.hasVolatileVariant(I, AddrSpace);
+  }
   bool prefersVectorizedAddressing() override {
     return Impl.prefersVectorizedAddressing();
   }
@@ -1114,8 +1219,8 @@ public:
     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
                                      Scale, AddrSpace);
   }
-  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
-    return Impl.isFoldableMemAccessOffset(I, Offset);
+  bool LSRWithInstrQueries() override {
+    return Impl.LSRWithInstrQueries();
   }
   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
     return Impl.isTruncateFree(Ty1, Ty2);
@@ -1148,8 +1253,9 @@ public:
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }
-  bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override {
-    return Impl.expandMemCmp(I, MaxLoadSize);
+  const MemCmpExpansionOptions *enableMemCmpExpansion(
+      bool IsZeroCmp) const override {
+    return Impl.enableMemCmpExpansion(IsZeroCmp);
   }
   bool enableInterleavedAccessVectorization() override {
     return Impl.enableInterleavedAccessVectorization();
@@ -1168,6 +1274,10 @@ public:
   }
   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
 
+  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
+    return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
+  }
+
   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
 
   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
@@ -1202,6 +1312,12 @@ public:
   unsigned getCacheLineSize() override {
     return Impl.getCacheLineSize();
   }
+  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
+    return Impl.getCacheSize(Level);
+  }
+  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
+    return Impl.getCacheAssociativity(Level);
+  }
   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
   unsigned getMinPrefetchStride() override {
     return Impl.getMinPrefetchStride();
@@ -1267,10 +1383,14 @@ public:
     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                            Alignment, AddressSpace);
   }
-  int getReductionCost(unsigned Opcode, Type *Ty,
-                       bool IsPairwiseForm) override {
-    return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
+  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+                                 bool IsPairwiseForm) override {
+    return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
   }
+  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
+                             bool IsPairwiseForm, bool IsUnsigned) override {
+    return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
+   }
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
@@ -1360,6 +1480,9 @@ public:
   bool shouldExpandReduction(const IntrinsicInst *II) const override {
     return Impl.shouldExpandReduction(II);
   }
+  int getInstructionLatency(const Instruction *I) override {
+    return Impl.getInstructionLatency(I);
+  }
 };
 
 template <typename T>