diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 | 
| commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
| tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Scalar/LoopStrengthReduce.cpp | |
| parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) | |
Notes
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 271 | 
1 files changed, 165 insertions, 106 deletions
| diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3638da118cb7..953854c8b7b7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -65,7 +65,9 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h"  #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopAnalysisManager.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h" @@ -80,13 +82,18 @@  #include "llvm/IR/Dominators.h"  #include "llvm/IR/GlobalValue.h"  #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h"  #include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h"  #include "llvm/IR/Module.h"  #include "llvm/IR/OperandTraits.h"  #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h"  #include "llvm/IR/Value.h"  #include "llvm/IR/ValueHandle.h"  #include "llvm/Pass.h" @@ -98,7 +105,6 @@  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include <algorithm> @@ -107,8 +113,8 @@  #include <cstdint>  #include <cstdlib>  #include <iterator> +#include <limits>  #include <map> -#include <tuple>  #include <utility>  using namespace llvm; @@ -131,7 +137,7 @@ static cl::opt<bool> EnablePhiElim(  // The flag adds instruction count to solutions cost comparision.  static cl::opt<bool> InsnsCost( -  "lsr-insns-cost", cl::Hidden, cl::init(false), +  "lsr-insns-cost", cl::Hidden, cl::init(true),    cl::desc("Add instruction count to a LSR cost model"));  // Flag to choose how to narrow complex lsr solution @@ -160,15 +166,14 @@ namespace {  struct MemAccessTy {    /// Used in situations where the accessed memory type is unknown. -  static const unsigned UnknownAddressSpace = ~0u; +  static const unsigned UnknownAddressSpace = +      std::numeric_limits<unsigned>::max(); -  Type *MemTy; -  unsigned AddrSpace; +  Type *MemTy = nullptr; +  unsigned AddrSpace = UnknownAddressSpace; -  MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {} - -  MemAccessTy(Type *Ty, unsigned AS) : -    MemTy(Ty), AddrSpace(AS) {} +  MemAccessTy() = default; +  MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}    bool operator==(MemAccessTy Other) const {      return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; @@ -195,11 +200,11 @@ public:  } // end anonymous namespace +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void RegSortData::print(raw_ostream &OS) const {    OS << "[NumUses=" << UsedByIndices.count() << ']';  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void RegSortData::dump() const {    print(errs()); errs() << '\n';  } @@ -209,7 +214,7 @@ namespace {  /// Map register candidates to information about how they are used.  class RegUseTracker { -  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; +  using RegUsesTy = DenseMap<const SCEV *, RegSortData>;    RegUsesTy RegUsesMap;    SmallVector<const SCEV *, 16> RegSequence; @@ -225,8 +230,9 @@ public:    void clear(); -  typedef SmallVectorImpl<const SCEV *>::iterator iterator; -  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator; +  using iterator = SmallVectorImpl<const SCEV *>::iterator; +  using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator; +    iterator begin() { return RegSequence.begin(); }    iterator end()   { return RegSequence.end(); }    const_iterator begin() const { return RegSequence.begin(); } @@ -299,16 +305,16 @@ namespace {  /// satisfying a use. It may include broken-out immediates and scaled registers.  struct Formula {    /// Global base address used for complex addressing. -  GlobalValue *BaseGV; +  GlobalValue *BaseGV = nullptr;    /// Base offset for complex addressing. -  int64_t BaseOffset; +  int64_t BaseOffset = 0;    /// Whether any complex addressing has a base register. -  bool HasBaseReg; +  bool HasBaseReg = false;    /// The scale of any complex addressing. -  int64_t Scale; +  int64_t Scale = 0;    /// The list of "base" registers for this use. When this is non-empty. The    /// canonical representation of a formula is @@ -328,16 +334,14 @@ struct Formula {    /// The 'scaled' register for this use. This should be non-null when Scale is    /// not zero. -  const SCEV *ScaledReg; +  const SCEV *ScaledReg = nullptr;    /// An additional constant offset which added near the use. This requires a    /// temporary register, but the offset itself can live in an add immediate    /// field rather than a register. -  int64_t UnfoldedOffset; +  int64_t UnfoldedOffset = 0; -  Formula() -      : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), -        ScaledReg(nullptr), UnfoldedOffset(0) {} +  Formula() = default;    void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -562,6 +566,7 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,    return false;  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void Formula::print(raw_ostream &OS) const {    bool First = true;    if (BaseGV) { @@ -598,7 +603,6 @@ void Formula::print(raw_ostream &OS) const {    }  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void Formula::dump() const {    print(errs()); errs() << '\n';  } @@ -773,7 +777,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {  /// Returns true if the specified instruction is using the specified value as an  /// address. -static bool isAddressUse(Instruction *Inst, Value *OperandVal) { +static bool isAddressUse(const TargetTransformInfo &TTI, +                         Instruction *Inst, Value *OperandVal) {    bool isAddress = isa<LoadInst>(Inst);    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {      if (SI->getPointerOperand() == OperandVal) @@ -782,11 +787,24 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {      // Addressing modes can also be folded into prefetches and a variety      // of intrinsics.      switch (II->getIntrinsicID()) { -      default: break; -      case Intrinsic::prefetch: -        if (II->getArgOperand(0) == OperandVal) +    case Intrinsic::memset: +    case Intrinsic::prefetch: +      if (II->getArgOperand(0) == OperandVal) +        isAddress = true; +      break; +    case Intrinsic::memmove: +    case Intrinsic::memcpy: +      if (II->getArgOperand(0) == OperandVal || +          II->getArgOperand(1) == OperandVal) +        isAddress = true; +      break; +    default: { +      MemIntrinsicInfo IntrInfo; +      if (TTI.getTgtMemIntrinsic(II, IntrInfo)) { +        if (IntrInfo.PtrVal == OperandVal)            isAddress = true; -        break; +      } +    }      }    } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {      if (RMW->getPointerOperand() == OperandVal) @@ -799,7 +817,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {  }  /// Return the type of the memory being accessed. -static MemAccessTy getAccessType(const Instruction *Inst) { +static MemAccessTy getAccessType(const TargetTransformInfo &TTI, +                                 Instruction *Inst) {    MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {      AccessTy.MemTy = SI->getOperand(0)->getType(); @@ -810,6 +829,21 @@ static MemAccessTy getAccessType(const Instruction *Inst) {      AccessTy.AddrSpace = RMW->getPointerAddressSpace();    } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {      AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); +  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { +    switch (II->getIntrinsicID()) { +    case Intrinsic::prefetch: +      AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); +      break; +    default: { +      MemIntrinsicInfo IntrInfo; +      if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) { +        AccessTy.AddrSpace +          = IntrInfo.PtrVal->getType()->getPointerAddressSpace(); +      } + +      break; +    } +    }    }    // All pointers have the same requirements, so canonicalize them to an @@ -948,6 +982,7 @@ class LSRUse;  /// accurate cost model.  static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,                                   const LSRUse &LU, const Formula &F); +  // Get the cost of the scaling factor used in F for LU.  static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,                                       const LSRUse &LU, const Formula &F, @@ -1013,30 +1048,30 @@ private:                             ScalarEvolution &SE, DominatorTree &DT,                             SmallPtrSetImpl<const SCEV *> *LoserRegs);  }; -   +  /// An operand value in an instruction which is to be replaced with some  /// equivalent, possibly strength-reduced, replacement.  struct LSRFixup {    /// The instruction which will be updated. -  Instruction *UserInst; +  Instruction *UserInst = nullptr;    /// The operand of the instruction which will be replaced. The operand may be    /// used more than once; every instance will be replaced. -  Value *OperandValToReplace; +  Value *OperandValToReplace = nullptr;    /// If this user is to use the post-incremented value of an induction -  /// variable, this variable is non-null and holds the loop associated with the +  /// variable, this set is non-empty and holds the loops associated with the    /// induction variable.    PostIncLoopSet PostIncLoops;    /// A constant offset to be added to the LSRUse expression.  This allows    /// multiple fixups to share the same LSRUse with different offsets, for    /// example in an unrolled loop. -  int64_t Offset; +  int64_t Offset = 0; -  bool isUseFullyOutsideLoop(const Loop *L) const; +  LSRFixup() = default; -  LSRFixup(); +  bool isUseFullyOutsideLoop(const Loop *L) const;    void print(raw_ostream &OS) const;    void dump() const; @@ -1086,7 +1121,7 @@ public:      // TODO: Add a generic icmp too?    }; -  typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair; +  using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;    KindType Kind;    MemAccessTy AccessTy; @@ -1095,25 +1130,25 @@ public:    SmallVector<LSRFixup, 8> Fixups;    /// Keep track of the min and max offsets of the fixups. -  int64_t MinOffset; -  int64_t MaxOffset; +  int64_t MinOffset = std::numeric_limits<int64_t>::max(); +  int64_t MaxOffset = std::numeric_limits<int64_t>::min();    /// This records whether all of the fixups using this LSRUse are outside of    /// the loop, in which case some special-case heuristics may be used. -  bool AllFixupsOutsideLoop; +  bool AllFixupsOutsideLoop = true;    /// RigidFormula is set to true to guarantee that this use will be associated    /// with a single formula--the one that initially matched. Some SCEV    /// expressions cannot be expanded. This allows LSR to consider the registers    /// used by those expressions without the need to expand them later after    /// changing the formula. -  bool RigidFormula; +  bool RigidFormula = false;    /// This records the widest use type for any fixup using this    /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max    /// fixup widths to be equivalent, because the narrower one may be relying on    /// the implicit truncation to truncate away bogus bits. -  Type *WidestFixupType; +  Type *WidestFixupType = nullptr;    /// A list of ways to build a value that can satisfy this user.  After the    /// list is populated, one of these is selected heuristically and used to @@ -1123,10 +1158,7 @@ public:    /// The set of register candidates used by all formulae in this LSRUse.    SmallPtrSet<const SCEV *, 4> Regs; -  LSRUse(KindType K, MemAccessTy AT) -      : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), -        AllFixupsOutsideLoop(true), RigidFormula(false), -        WidestFixupType(nullptr) {} +  LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}    LSRFixup &getNewFixup() {      Fixups.push_back(LSRFixup()); @@ -1140,7 +1172,7 @@ public:      if (f.Offset < MinOffset)        MinOffset = f.Offset;    } -   +    bool HasFormulaWithSameRegs(const Formula &F) const;    float getNotSelectedProbability(const SCEV *Reg) const;    bool InsertFormula(const Formula &F, const Loop &L); @@ -1153,6 +1185,12 @@ public:  } // end anonymous namespace +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, +                                 LSRUse::KindType Kind, MemAccessTy AccessTy, +                                 GlobalValue *BaseGV, int64_t BaseOffset, +                                 bool HasBaseReg, int64_t Scale, +                                 Instruction *Fixup = nullptr); +  /// Tally up interesting quantities from the given register.  void Cost::RateRegister(const SCEV *Reg,                          SmallPtrSetImpl<const SCEV *> &Regs, @@ -1280,8 +1318,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,      // Check with target if this offset with this instruction is      // specifically not supported. -    if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) && -        !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) +    if (LU.Kind == LSRUse::Address && Offset != 0 && +        !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, +                              Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))        C.NumBaseAdds++;    } @@ -1325,14 +1364,14 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,  /// Set this cost to a losing value.  void Cost::Lose() { -  C.Insns = ~0u; -  C.NumRegs = ~0u; -  C.AddRecCost = ~0u; -  C.NumIVMuls = ~0u; -  C.NumBaseAdds = ~0u; -  C.ImmCost = ~0u; -  C.SetupCost = ~0u; -  C.ScaleCost = ~0u; +  C.Insns = std::numeric_limits<unsigned>::max(); +  C.NumRegs = std::numeric_limits<unsigned>::max(); +  C.AddRecCost = std::numeric_limits<unsigned>::max(); +  C.NumIVMuls = std::numeric_limits<unsigned>::max(); +  C.NumBaseAdds = std::numeric_limits<unsigned>::max(); +  C.ImmCost = std::numeric_limits<unsigned>::max(); +  C.SetupCost = std::numeric_limits<unsigned>::max(); +  C.ScaleCost = std::numeric_limits<unsigned>::max();  }  /// Choose the lower cost. @@ -1343,6 +1382,7 @@ bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {    return TTI.isLSRCostLess(C, Other.C);  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void Cost::print(raw_ostream &OS) const {    if (InsnsCost)      OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); @@ -1363,16 +1403,11 @@ void Cost::print(raw_ostream &OS) const {      OS << ", plus " << C.SetupCost << " setup cost";  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void Cost::dump() const {    print(errs()); errs() << '\n';  }  #endif -LSRFixup::LSRFixup() -  : UserInst(nullptr), OperandValToReplace(nullptr), -    Offset(0) {} -  /// Test whether this fixup always uses its value outside of the given loop.  bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {    // PHI nodes use their value in their incoming blocks. @@ -1387,6 +1422,7 @@ bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {    return !L->contains(UserInst);  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void LSRFixup::print(raw_ostream &OS) const {    OS << "UserInst=";    // Store is common and interesting enough to be worth special-casing. @@ -1410,7 +1446,6 @@ void LSRFixup::print(raw_ostream &OS) const {      OS << ", Offset=" << Offset;  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void LSRFixup::dump() const {    print(errs()); errs() << '\n';  } @@ -1493,6 +1528,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {        RegUses.dropRegister(S, LUIdx);  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void LSRUse::print(raw_ostream &OS) const {    OS << "LSR Use: Kind=";    switch (Kind) { @@ -1526,7 +1562,6 @@ void LSRUse::print(raw_ostream &OS) const {      OS << ", widest fixup type: " << *WidestFixupType;  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void LSRUse::dump() const {    print(errs()); errs() << '\n';  } @@ -1535,11 +1570,12 @@ LLVM_DUMP_METHOD void LSRUse::dump() const {  static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,                                   LSRUse::KindType Kind, MemAccessTy AccessTy,                                   GlobalValue *BaseGV, int64_t BaseOffset, -                                 bool HasBaseReg, int64_t Scale) { +                                 bool HasBaseReg, int64_t Scale, +                                 Instruction *Fixup/*= nullptr*/) {    switch (Kind) {    case LSRUse::Address:      return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, -                                     HasBaseReg, Scale, AccessTy.AddrSpace); +                                     HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);    case LSRUse::ICmpZero:      // There's not even a target hook for querying whether it would be legal to @@ -1564,7 +1600,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,        // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset        // Offs is the ICmp immediate.        if (Scale == 0) -        // The cast does the right thing with INT64_MIN. +        // The cast does the right thing with +        // std::numeric_limits<int64_t>::min().          BaseOffset = -(uint64_t)BaseOffset;        return TTI.isLegalICmpImmediate(BaseOffset);      } @@ -1645,6 +1682,16 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,  static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,                                   const LSRUse &LU, const Formula &F) { +  // Target may want to look at the user instructions. +  if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { +    for (const LSRFixup &Fixup : LU.Fixups) +      if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, +                                (F.BaseOffset + Fixup.Offset), F.HasBaseReg, +                                F.Scale, Fixup.UserInst)) +        return false; +    return true; +  } +    return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,                                LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,                                F.Scale); @@ -1752,22 +1799,21 @@ struct IVInc {    Value* IVOperand;    const SCEV *IncExpr; -  IVInc(Instruction *U, Value *O, const SCEV *E): -    UserInst(U), IVOperand(O), IncExpr(E) {} +  IVInc(Instruction *U, Value *O, const SCEV *E) +      : UserInst(U), IVOperand(O), IncExpr(E) {}  };  // The list of IV increments in program order.  We typically add the head of a  // chain without finding subsequent links.  struct IVChain { -  SmallVector<IVInc,1> Incs; -  const SCEV *ExprBase; - -  IVChain() : ExprBase(nullptr) {} +  SmallVector<IVInc, 1> Incs; +  const SCEV *ExprBase = nullptr; +  IVChain() = default;    IVChain(const IVInc &Head, const SCEV *Base) -    : Incs(1, Head), ExprBase(Base) {} +      : Incs(1, Head), ExprBase(Base) {} -  typedef SmallVectorImpl<IVInc>::const_iterator const_iterator; +  using const_iterator = SmallVectorImpl<IVInc>::const_iterator;    // Return the first increment in the chain.    const_iterator begin() const { @@ -1809,13 +1855,13 @@ class LSRInstance {    LoopInfo &LI;    const TargetTransformInfo &TTI;    Loop *const L; -  bool Changed; +  bool Changed = false;    /// This is the insert position that the current loop's induction variable    /// increment should be placed. In simple loops, this is the latch block's    /// terminator. But in more complicated cases, this is a position which will    /// dominate all the in-loop post-increment users. -  Instruction *IVIncInsertPos; +  Instruction *IVIncInsertPos = nullptr;    /// Interesting factors between use strides.    /// @@ -1861,7 +1907,7 @@ class LSRInstance {    void CollectFixupsAndInitialFormulae();    // Support for sharing of LSRUses between LSRFixups. -  typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy; +  using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;    UseMapTy UseMap;    bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -2002,6 +2048,14 @@ void LSRInstance::OptimizeShadowIV() {      if (!PH) continue;      if (PH->getNumIncomingValues() != 2) continue; +    // If the calculation in integers overflows, the result in FP type will +    // differ. So we only can do this transformation if we are guaranteed to not +    // deal with overflowing values +    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH)); +    if (!AR) continue; +    if (IsSigned && !AR->hasNoSignedWrap()) continue; +    if (!IsSigned && !AR->hasNoUnsignedWrap()) continue; +      Type *SrcTy = PH->getType();      int Mantissa = DestTy->getFPMantissaWidth();      if (Mantissa == -1) continue; @@ -2094,7 +2148,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {  /// unfortunately this can come up even for loops where the user didn't use  /// a C do-while loop. For example, seemingly well-behaved top-test loops  /// will commonly be lowered like this: -// +///  ///   if (n > 0) {  ///     i = 0;  ///     do { @@ -2128,7 +2182,6 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {  /// This function solves this problem by detecting this type of loop and  /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting  /// the instructions for the maximum computation. -///  ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {    // Check that the loop matches the pattern we're looking for.    if (Cond->getPredicate() != CmpInst::ICMP_EQ && @@ -2268,7 +2321,6 @@ LSRInstance::OptimizeLoopTermCond() {    // Otherwise treat this as a rotated loop.    for (BasicBlock *ExitingBlock : ExitingBlocks) { -      // Get the terminating condition for the loop if possible.  If we      // can, we want to change it to use a post-incremented version of its      // induction variable, to allow coalescing the live ranges for the IV into @@ -2333,7 +2385,7 @@ LSRInstance::OptimizeLoopTermCond() {                  C->getValue().isMinSignedValue())                goto decline_post_inc;              // Check for possible scaled-address reuse. -            MemAccessTy AccessTy = getAccessType(UI->getUser()); +            MemAccessTy AccessTy = getAccessType(TTI, UI->getUser());              int64_t Scale = C->getSExtValue();              if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,                                            /*BaseOffset=*/0, @@ -2941,7 +2993,7 @@ void LSRInstance::CollectChains() {        // consider leaf IV Users. This effectively rediscovers a portion of        // IVUsers analysis but in program order this time.        if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I))) -        continue; +          continue;        // Remove this instruction from any NearUsers set it may be in.        for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); @@ -3003,13 +3055,13 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {  static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,                               Value *Operand, const TargetTransformInfo &TTI) {    const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); -  if (!IncConst || !isAddressUse(UserInst, Operand)) +  if (!IncConst || !isAddressUse(TTI, UserInst, Operand))      return false;    if (IncConst->getAPInt().getMinSignedBits() > 64)      return false; -  MemAccessTy AccessTy = getAccessType(UserInst); +  MemAccessTy AccessTy = getAccessType(TTI, UserInst);    int64_t IncOffset = IncConst->getValue()->getSExtValue();    if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,                          IncOffset, /*HaseBaseReg=*/false)) @@ -3136,14 +3188,14 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {      LSRUse::KindType Kind = LSRUse::Basic;      MemAccessTy AccessTy; -    if (isAddressUse(UserInst, U.getOperandValToReplace())) { +    if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {        Kind = LSRUse::Address; -      AccessTy = getAccessType(UserInst); +      AccessTy = getAccessType(TTI, UserInst);      }      const SCEV *S = IU.getExpr(U);      PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); -     +      // Equality (== and !=) ICmps are special. We can rewrite (i == N) as      // (N - i == 0), and this allows (N - i) to be the expression that we work      // with rather than just N or i, so we can consider the register @@ -3432,7 +3484,6 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,    for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),                                                       JE = AddOps.end();         J != JE; ++J) { -      // Loop-variant "unknown" values are uninteresting; we won't be able to      // do anything meaningful with them.      if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) @@ -3654,12 +3705,18 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,    // Don't do this if there is more than one offset.    if (LU.MinOffset != LU.MaxOffset) return; +  // Check if transformation is valid. It is illegal to multiply pointer. +  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) +    return; +  for (const SCEV *BaseReg : Base.BaseRegs) +    if (BaseReg->getType()->isPointerTy()) +      return;    assert(!Base.BaseGV && "ICmpZero use is not legal!");    // Check each interesting stride.    for (int64_t Factor : Factors) {      // Check that the multiplication doesn't overflow. -    if (Base.BaseOffset == INT64_MIN && Factor == -1) +    if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)        continue;      int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;      if (NewBaseOffset / Factor != Base.BaseOffset) @@ -3671,7 +3728,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,      // Check that multiplying with the use offset doesn't overflow.      int64_t Offset = LU.MinOffset; -    if (Offset == INT64_MIN && Factor == -1) +    if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)        continue;      Offset = (uint64_t)Offset * Factor;      if (Offset / Factor != LU.MinOffset) @@ -3709,7 +3766,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,      // Check that multiplying with the unfolded offset doesn't overflow.      if (F.UnfoldedOffset != 0) { -      if (F.UnfoldedOffset == INT64_MIN && Factor == -1) +      if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() && +          Factor == -1)          continue;        F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;        if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) @@ -3833,7 +3891,7 @@ struct WorkItem {    const SCEV *OrigReg;    WorkItem(size_t LI, int64_t I, const SCEV *R) -    : LUIdx(LI), Imm(I), OrigReg(R) {} +      : LUIdx(LI), Imm(I), OrigReg(R) {}    void print(raw_ostream &OS) const;    void dump() const; @@ -3841,12 +3899,12 @@ struct WorkItem {  } // end anonymous namespace +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void WorkItem::print(raw_ostream &OS) const {    OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx       << " , add offset " << Imm;  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void WorkItem::dump() const {    print(errs()); errs() << '\n';  } @@ -3856,7 +3914,8 @@ LLVM_DUMP_METHOD void WorkItem::dump() const {  /// opportunities between them.  void LSRInstance::GenerateCrossUseConstantOffsets() {    // Group the registers by their value without any added constant offset. -  typedef std::map<int64_t, const SCEV *> ImmMapTy; +  using ImmMapTy = std::map<int64_t, const SCEV *>; +    DenseMap<const SCEV *, ImmMapTy> Map;    DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;    SmallVector<const SCEV *, 8> Sequence; @@ -4060,8 +4119,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {    // Collect the best formula for each unique set of shared registers. This    // is reset for each use. -  typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo> -    BestFormulaeTy; +  using BestFormulaeTy = +      DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>; +    BestFormulaeTy BestFormulae;    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { @@ -4148,7 +4208,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {  }  // This is a rough guess that seems to work fairly well. -static const size_t ComplexityLimit = UINT16_MAX; +static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();  /// Estimate the worst-case number of solutions the solver might have to  /// consider. It almost never considers this many solutions because it prune the @@ -4267,7 +4327,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {          LUThatHas->pushFixup(Fixup);          DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');        } -       +        // Delete formulae from the new use which are no longer legal.        bool Any = false;        for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -4332,7 +4392,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {                    "from the Formulae with the same Scale and ScaledReg.\n");    // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. -  typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy; +  using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>; +    BestFormulaeTy BestFormulae;  #ifndef NDEBUG    bool ChangedFormulae = false; @@ -4454,7 +4515,6 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {  /// Use3:  ///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted  ///  reg(c) + reg({b,+,1})          1 + 2/3 -  void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {    if (EstimateSearchSpaceComplexity() < ComplexityLimit)      return; @@ -4549,7 +4609,6 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {    print_uses(dbgs()));  } -  /// Pick a register which seems likely to be profitable, and then in any use  /// which has any reference to that register, delete all formulae which do not  /// reference that register. @@ -5196,8 +5255,7 @@ void LSRInstance::ImplementSolution(  LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,                           DominatorTree &DT, LoopInfo &LI,                           const TargetTransformInfo &TTI) -    : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false), -      IVIncInsertPos(nullptr) { +    : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {    // If LoopSimplify form is not available, stay out of trouble.    if (!L->isLoopSimplifyForm())      return; @@ -5302,6 +5360,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,    ImplementSolution(Solution);  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void LSRInstance::print_factors_and_types(raw_ostream &OS) const {    if (Factors.empty() && Types.empty()) return; @@ -5352,7 +5411,6 @@ void LSRInstance::print(raw_ostream &OS) const {    print_uses(OS);  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void LSRInstance::dump() const {    print(errs()); errs() << '\n';  } @@ -5448,6 +5506,7 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,  }  char LoopStrengthReduce::ID = 0; +  INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",                        "Loop Strength Reduction", false, false)  INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) | 
