summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Scalar/LoopStrengthReduce.cpp
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp271
1 files changed, 165 insertions, 106 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3638da118cb7..953854c8b7b7 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -65,7 +65,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -80,13 +82,18 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -98,7 +105,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -107,8 +113,8 @@
#include <cstdint>
#include <cstdlib>
#include <iterator>
+#include <limits>
#include <map>
-#include <tuple>
#include <utility>
using namespace llvm;
@@ -131,7 +137,7 @@ static cl::opt<bool> EnablePhiElim(
// The flag adds instruction count to solutions cost comparision.
static cl::opt<bool> InsnsCost(
- "lsr-insns-cost", cl::Hidden, cl::init(false),
+ "lsr-insns-cost", cl::Hidden, cl::init(true),
cl::desc("Add instruction count to a LSR cost model"));
// Flag to choose how to narrow complex lsr solution
@@ -160,15 +166,14 @@ namespace {
struct MemAccessTy {
/// Used in situations where the accessed memory type is unknown.
- static const unsigned UnknownAddressSpace = ~0u;
+ static const unsigned UnknownAddressSpace =
+ std::numeric_limits<unsigned>::max();
- Type *MemTy;
- unsigned AddrSpace;
+ Type *MemTy = nullptr;
+ unsigned AddrSpace = UnknownAddressSpace;
- MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
-
- MemAccessTy(Type *Ty, unsigned AS) :
- MemTy(Ty), AddrSpace(AS) {}
+ MemAccessTy() = default;
+ MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
bool operator==(MemAccessTy Other) const {
return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
@@ -195,11 +200,11 @@ public:
} // end anonymous namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
@@ -209,7 +214,7 @@ namespace {
/// Map register candidates to information about how they are used.
class RegUseTracker {
- typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+ using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
@@ -225,8 +230,9 @@ public:
void clear();
- typedef SmallVectorImpl<const SCEV *>::iterator iterator;
- typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+ using iterator = SmallVectorImpl<const SCEV *>::iterator;
+ using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
+
iterator begin() { return RegSequence.begin(); }
iterator end() { return RegSequence.end(); }
const_iterator begin() const { return RegSequence.begin(); }
@@ -299,16 +305,16 @@ namespace {
/// satisfying a use. It may include broken-out immediates and scaled registers.
struct Formula {
/// Global base address used for complex addressing.
- GlobalValue *BaseGV;
+ GlobalValue *BaseGV = nullptr;
/// Base offset for complex addressing.
- int64_t BaseOffset;
+ int64_t BaseOffset = 0;
/// Whether any complex addressing has a base register.
- bool HasBaseReg;
+ bool HasBaseReg = false;
/// The scale of any complex addressing.
- int64_t Scale;
+ int64_t Scale = 0;
/// The list of "base" registers for this use. When this is non-empty. The
/// canonical representation of a formula is
@@ -328,16 +334,14 @@ struct Formula {
/// The 'scaled' register for this use. This should be non-null when Scale is
/// not zero.
- const SCEV *ScaledReg;
+ const SCEV *ScaledReg = nullptr;
/// An additional constant offset which added near the use. This requires a
/// temporary register, but the offset itself can live in an add immediate
/// field rather than a register.
- int64_t UnfoldedOffset;
+ int64_t UnfoldedOffset = 0;
- Formula()
- : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
- ScaledReg(nullptr), UnfoldedOffset(0) {}
+ Formula() = default;
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
@@ -562,6 +566,7 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
return false;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
@@ -598,7 +603,6 @@ void Formula::print(raw_ostream &OS) const {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Formula::dump() const {
print(errs()); errs() << '\n';
}
@@ -773,7 +777,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
/// Returns true if the specified instruction is using the specified value as an
/// address.
-static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+static bool isAddressUse(const TargetTransformInfo &TTI,
+ Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getPointerOperand() == OperandVal)
@@ -782,11 +787,24 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::prefetch:
- if (II->getArgOperand(0) == OperandVal)
+ case Intrinsic::memset:
+ case Intrinsic::prefetch:
+ if (II->getArgOperand(0) == OperandVal)
+ isAddress = true;
+ break;
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ if (II->getArgOperand(0) == OperandVal ||
+ II->getArgOperand(1) == OperandVal)
+ isAddress = true;
+ break;
+ default: {
+ MemIntrinsicInfo IntrInfo;
+ if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
+ if (IntrInfo.PtrVal == OperandVal)
isAddress = true;
- break;
+ }
+ }
}
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
if (RMW->getPointerOperand() == OperandVal)
@@ -799,7 +817,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
}
/// Return the type of the memory being accessed.
-static MemAccessTy getAccessType(const Instruction *Inst) {
+static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
+ Instruction *Inst) {
MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
AccessTy.MemTy = SI->getOperand(0)->getType();
@@ -810,6 +829,21 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
AccessTy.AddrSpace = RMW->getPointerAddressSpace();
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::prefetch:
+ AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
+ break;
+ default: {
+ MemIntrinsicInfo IntrInfo;
+ if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
+ AccessTy.AddrSpace
+ = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
+ }
+
+ break;
+ }
+ }
}
// All pointers have the same requirements, so canonicalize them to an
@@ -948,6 +982,7 @@ class LSRUse;
/// accurate cost model.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
+
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F,
@@ -1013,30 +1048,30 @@ private:
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
-
+
/// An operand value in an instruction which is to be replaced with some
/// equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
/// The instruction which will be updated.
- Instruction *UserInst;
+ Instruction *UserInst = nullptr;
/// The operand of the instruction which will be replaced. The operand may be
/// used more than once; every instance will be replaced.
- Value *OperandValToReplace;
+ Value *OperandValToReplace = nullptr;
/// If this user is to use the post-incremented value of an induction
- /// variable, this variable is non-null and holds the loop associated with the
+ /// variable, this set is non-empty and holds the loops associated with the
/// induction variable.
PostIncLoopSet PostIncLoops;
/// A constant offset to be added to the LSRUse expression. This allows
/// multiple fixups to share the same LSRUse with different offsets, for
/// example in an unrolled loop.
- int64_t Offset;
+ int64_t Offset = 0;
- bool isUseFullyOutsideLoop(const Loop *L) const;
+ LSRFixup() = default;
- LSRFixup();
+ bool isUseFullyOutsideLoop(const Loop *L) const;
void print(raw_ostream &OS) const;
void dump() const;
@@ -1086,7 +1121,7 @@ public:
// TODO: Add a generic icmp too?
};
- typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
+ using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
KindType Kind;
MemAccessTy AccessTy;
@@ -1095,25 +1130,25 @@ public:
SmallVector<LSRFixup, 8> Fixups;
/// Keep track of the min and max offsets of the fixups.
- int64_t MinOffset;
- int64_t MaxOffset;
+ int64_t MinOffset = std::numeric_limits<int64_t>::max();
+ int64_t MaxOffset = std::numeric_limits<int64_t>::min();
/// This records whether all of the fixups using this LSRUse are outside of
/// the loop, in which case some special-case heuristics may be used.
- bool AllFixupsOutsideLoop;
+ bool AllFixupsOutsideLoop = true;
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
/// used by those expressions without the need to expand them later after
/// changing the formula.
- bool RigidFormula;
+ bool RigidFormula = false;
/// This records the widest use type for any fixup using this
/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
/// fixup widths to be equivalent, because the narrower one may be relying on
/// the implicit truncation to truncate away bogus bits.
- Type *WidestFixupType;
+ Type *WidestFixupType = nullptr;
/// A list of ways to build a value that can satisfy this user. After the
/// list is populated, one of these is selected heuristically and used to
@@ -1123,10 +1158,7 @@ public:
/// The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, MemAccessTy AT)
- : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true), RigidFormula(false),
- WidestFixupType(nullptr) {}
+ LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
@@ -1140,7 +1172,7 @@ public:
if (f.Offset < MinOffset)
MinOffset = f.Offset;
}
-
+
bool HasFormulaWithSameRegs(const Formula &F) const;
float getNotSelectedProbability(const SCEV *Reg) const;
bool InsertFormula(const Formula &F, const Loop &L);
@@ -1153,6 +1185,12 @@ public:
} // end anonymous namespace
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale,
+ Instruction *Fixup = nullptr);
+
/// Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
@@ -1280,8 +1318,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Check with target if this offset with this instruction is
// specifically not supported.
- if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
- !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
+ if (LU.Kind == LSRUse::Address && Offset != 0 &&
+ !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
C.NumBaseAdds++;
}
@@ -1325,14 +1364,14 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
/// Set this cost to a losing value.
void Cost::Lose() {
- C.Insns = ~0u;
- C.NumRegs = ~0u;
- C.AddRecCost = ~0u;
- C.NumIVMuls = ~0u;
- C.NumBaseAdds = ~0u;
- C.ImmCost = ~0u;
- C.SetupCost = ~0u;
- C.ScaleCost = ~0u;
+ C.Insns = std::numeric_limits<unsigned>::max();
+ C.NumRegs = std::numeric_limits<unsigned>::max();
+ C.AddRecCost = std::numeric_limits<unsigned>::max();
+ C.NumIVMuls = std::numeric_limits<unsigned>::max();
+ C.NumBaseAdds = std::numeric_limits<unsigned>::max();
+ C.ImmCost = std::numeric_limits<unsigned>::max();
+ C.SetupCost = std::numeric_limits<unsigned>::max();
+ C.ScaleCost = std::numeric_limits<unsigned>::max();
}
/// Choose the lower cost.
@@ -1343,6 +1382,7 @@ bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
return TTI.isLSRCostLess(C, Other.C);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::print(raw_ostream &OS) const {
if (InsnsCost)
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
@@ -1363,16 +1403,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << C.SetupCost << " setup cost";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Cost::dump() const {
print(errs()); errs() << '\n';
}
#endif
-LSRFixup::LSRFixup()
- : UserInst(nullptr), OperandValToReplace(nullptr),
- Offset(0) {}
-
/// Test whether this fixup always uses its value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
@@ -1387,6 +1422,7 @@ bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
return !L->contains(UserInst);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
// Store is common and interesting enough to be worth special-casing.
@@ -1410,7 +1446,6 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
@@ -1493,6 +1528,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
RegUses.dropRegister(S, LUIdx);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
@@ -1526,7 +1562,6 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
@@ -1535,11 +1570,12 @@ LLVM_DUMP_METHOD void LSRUse::dump() const {
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale,
+ Instruction *Fixup/*= nullptr*/) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
- HasBaseReg, Scale, AccessTy.AddrSpace);
+ HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
@@ -1564,7 +1600,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
if (Scale == 0)
- // The cast does the right thing with INT64_MIN.
+ // The cast does the right thing with
+ // std::numeric_limits<int64_t>::min().
BaseOffset = -(uint64_t)BaseOffset;
return TTI.isLegalICmpImmediate(BaseOffset);
}
@@ -1645,6 +1682,16 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
+ // Target may want to look at the user instructions.
+ if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
+ for (const LSRFixup &Fixup : LU.Fixups)
+ if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
+ F.Scale, Fixup.UserInst))
+ return false;
+ return true;
+ }
+
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
F.Scale);
@@ -1752,22 +1799,21 @@ struct IVInc {
Value* IVOperand;
const SCEV *IncExpr;
- IVInc(Instruction *U, Value *O, const SCEV *E):
- UserInst(U), IVOperand(O), IncExpr(E) {}
+ IVInc(Instruction *U, Value *O, const SCEV *E)
+ : UserInst(U), IVOperand(O), IncExpr(E) {}
};
// The list of IV increments in program order. We typically add the head of a
// chain without finding subsequent links.
struct IVChain {
- SmallVector<IVInc,1> Incs;
- const SCEV *ExprBase;
-
- IVChain() : ExprBase(nullptr) {}
+ SmallVector<IVInc, 1> Incs;
+ const SCEV *ExprBase = nullptr;
+ IVChain() = default;
IVChain(const IVInc &Head, const SCEV *Base)
- : Incs(1, Head), ExprBase(Base) {}
+ : Incs(1, Head), ExprBase(Base) {}
- typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+ using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
// Return the first increment in the chain.
const_iterator begin() const {
@@ -1809,13 +1855,13 @@ class LSRInstance {
LoopInfo &LI;
const TargetTransformInfo &TTI;
Loop *const L;
- bool Changed;
+ bool Changed = false;
/// This is the insert position that the current loop's induction variable
/// increment should be placed. In simple loops, this is the latch block's
/// terminator. But in more complicated cases, this is a position which will
/// dominate all the in-loop post-increment users.
- Instruction *IVIncInsertPos;
+ Instruction *IVIncInsertPos = nullptr;
/// Interesting factors between use strides.
///
@@ -1861,7 +1907,7 @@ class LSRInstance {
void CollectFixupsAndInitialFormulae();
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
+ using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -2002,6 +2048,14 @@ void LSRInstance::OptimizeShadowIV() {
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
+ // If the calculation in integers overflows, the result in FP type will
+ // differ. So we only can do this transformation if we are guaranteed to not
+ // deal with overflowing values
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
+ if (!AR) continue;
+ if (IsSigned && !AR->hasNoSignedWrap()) continue;
+ if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
+
Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
@@ -2094,7 +2148,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
-//
+///
/// if (n > 0) {
/// i = 0;
/// do {
@@ -2128,7 +2182,6 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
-///
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
@@ -2268,7 +2321,6 @@ LSRInstance::OptimizeLoopTermCond() {
// Otherwise treat this as a rotated loop.
for (BasicBlock *ExitingBlock : ExitingBlocks) {
-
// Get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
@@ -2333,7 +2385,7 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
- MemAccessTy AccessTy = getAccessType(UI->getUser());
+ MemAccessTy AccessTy = getAccessType(TTI, UI->getUser());
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
/*BaseOffset=*/0,
@@ -2941,7 +2993,7 @@ void LSRInstance::CollectChains() {
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
- continue;
+ continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
@@ -3003,13 +3055,13 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
- if (!IncConst || !isAddressUse(UserInst, Operand))
+ if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
return false;
if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
- MemAccessTy AccessTy = getAccessType(UserInst);
+ MemAccessTy AccessTy = getAccessType(TTI, UserInst);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
IncOffset, /*HaseBaseReg=*/false))
@@ -3136,14 +3188,14 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LSRUse::KindType Kind = LSRUse::Basic;
MemAccessTy AccessTy;
- if (isAddressUse(UserInst, U.getOperandValToReplace())) {
+ if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
Kind = LSRUse::Address;
- AccessTy = getAccessType(UserInst);
+ AccessTy = getAccessType(TTI, UserInst);
}
const SCEV *S = IU.getExpr(U);
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
-
+
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
// with rather than just N or i, so we can consider the register
@@ -3432,7 +3484,6 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end();
J != JE; ++J) {
-
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
@@ -3654,12 +3705,18 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
+ // Check if transformation is valid. It is illegal to multiply pointer.
+ if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
+ return;
+ for (const SCEV *BaseReg : Base.BaseRegs)
+ if (BaseReg->getType()->isPointerTy())
+ return;
assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (int64_t Factor : Factors) {
// Check that the multiplication doesn't overflow.
- if (Base.BaseOffset == INT64_MIN && Factor == -1)
+ if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
if (NewBaseOffset / Factor != Base.BaseOffset)
@@ -3671,7 +3728,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
- if (Offset == INT64_MIN && Factor == -1)
+ if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
if (Offset / Factor != LU.MinOffset)
@@ -3709,7 +3766,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check that multiplying with the unfolded offset doesn't overflow.
if (F.UnfoldedOffset != 0) {
- if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
+ if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
+ Factor == -1)
continue;
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
@@ -3833,7 +3891,7 @@ struct WorkItem {
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
- : LUIdx(LI), Imm(I), OrigReg(R) {}
+ : LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
@@ -3841,12 +3899,12 @@ struct WorkItem {
} // end anonymous namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
@@ -3856,7 +3914,8 @@ LLVM_DUMP_METHOD void WorkItem::dump() const {
/// opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
- typedef std::map<int64_t, const SCEV *> ImmMapTy;
+ using ImmMapTy = std::map<int64_t, const SCEV *>;
+
DenseMap<const SCEV *, ImmMapTy> Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
@@ -4060,8 +4119,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
- typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
- BestFormulaeTy;
+ using BestFormulaeTy =
+ DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
+
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
@@ -4148,7 +4208,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
}
// This is a rough guess that seems to work fairly well.
-static const size_t ComplexityLimit = UINT16_MAX;
+static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();
/// Estimate the worst-case number of solutions the solver might have to
/// consider. It almost never considers this many solutions because it prune the
@@ -4267,7 +4327,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LUThatHas->pushFixup(Fixup);
DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
-
+
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -4332,7 +4392,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
"from the Formulae with the same Scale and ScaledReg.\n");
// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
- typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy;
+ using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
+
BestFormulaeTy BestFormulae;
#ifndef NDEBUG
bool ChangedFormulae = false;
@@ -4454,7 +4515,6 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
/// Use3:
/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
/// reg(c) + reg({b,+,1}) 1 + 2/3
-
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
@@ -4549,7 +4609,6 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
print_uses(dbgs()));
}
-
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -5196,8 +5255,7 @@ void LSRInstance::ImplementSolution(
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI)
- : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
- IVIncInsertPos(nullptr) {
+ : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -5302,6 +5360,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
ImplementSolution(Solution);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
if (Factors.empty() && Types.empty()) return;
@@ -5352,7 +5411,6 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
@@ -5448,6 +5506,7 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
}
char LoopStrengthReduce::ID = 0;
+
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)