summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp421
1 files changed, 249 insertions, 172 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 953854c8b7b7..fa83b48210bc 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -75,6 +75,8 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -105,8 +107,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -121,7 +123,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-reduce"
-/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
/// worst cases before LSR burns too much compile time and stack space.
@@ -185,6 +187,8 @@ struct MemAccessTy {
unsigned AS = UnknownAddressSpace) {
return MemAccessTy(Type::getVoidTy(Ctx), AS);
}
+
+ Type *getType() { return MemTy; }
};
/// This class holds data which is used to order reuse candidates.
@@ -327,7 +331,7 @@ struct Formula {
/// #2 enforces that 1 * reg is reg.
/// #3 ensures invariant regs with respect to current loop can be combined
/// together in LSR codegen.
- /// This invariant can be temporarly broken while building a formula.
+ /// This invariant can be temporarily broken while building a formula.
/// However, every formula inserted into the LSRInstance must be in canonical
/// form.
SmallVector<const SCEV *, 4> BaseRegs;
@@ -442,7 +446,7 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
canonicalize(*L);
}
-/// \brief Check whether or not this formula statisfies the canonical
+/// Check whether or not this formula satisfies the canonical
/// representation.
/// \see Formula::BaseRegs.
bool Formula::isCanonical(const Loop &L) const {
@@ -470,7 +474,7 @@ bool Formula::isCanonical(const Loop &L) const {
return I == BaseRegs.end();
}
-/// \brief Helper method to morph a formula into its canonical representation.
+/// Helper method to morph a formula into its canonical representation.
/// \see Formula::BaseRegs.
/// Every formula having more than one base register, must use the ScaledReg
/// field. Otherwise, we would have to do special cases everywhere in LSR
@@ -505,7 +509,7 @@ void Formula::canonicalize(const Loop &L) {
}
}
-/// \brief Get rid of the scale in the formula.
+/// Get rid of the scale in the formula.
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
/// \return true if it was possible to get rid of the scale, false otherwise.
/// \note After this operation the formula may not be in the canonical form.
@@ -818,7 +822,7 @@ static bool isAddressUse(const TargetTransformInfo &TTI,
/// Return the type of the memory being accessed.
static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
- Instruction *Inst) {
+ Instruction *Inst, Value *OperandVal) {
MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
AccessTy.MemTy = SI->getOperand(0)->getType();
@@ -832,7 +836,14 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::prefetch:
+ case Intrinsic::memset:
AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
+ AccessTy.MemTy = OperandVal->getType();
+ break;
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
+ AccessTy.MemTy = OperandVal->getType();
break;
default: {
MemIntrinsicInfo IntrInfo;
@@ -857,12 +868,11 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
/// Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
+ for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
+ if (SE.isSCEVable(PN.getType()) &&
+ (SE.getEffectiveSCEVType(PN.getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
+ SE.getSCEV(&PN) == AR)
return true;
}
return false;
@@ -938,7 +948,7 @@ static bool isHighCostExpansion(const SCEV *S,
return true;
}
-/// If any of the instructions is the specified set are trivially dead, delete
+/// If any of the instructions in the specified set are trivially dead, delete
/// them and see if this makes any of their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
@@ -971,7 +981,7 @@ class LSRUse;
} // end anonymous namespace
-/// \brief Check if the addressing mode defined by \p F is completely
+/// Check if the addressing mode defined by \p F is completely
/// folded in \p LU at isel time.
/// This includes address-mode folding and special icmp tricks.
/// This function returns true if \p LU can accommodate what \p F
@@ -1041,12 +1051,14 @@ private:
void RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ const TargetTransformInfo &TTI);
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs);
+ SmallPtrSetImpl<const SCEV *> *LoserRegs,
+ const TargetTransformInfo &TTI);
};
/// An operand value in an instruction which is to be replaced with some
@@ -1195,7 +1207,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ const TargetTransformInfo &TTI) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
// If this is an addrec for another loop, it should be an invariant
// with respect to L since L is the innermost loop (at least
@@ -1216,13 +1229,28 @@ void Cost::RateRegister(const SCEV *Reg,
++C.NumRegs;
return;
}
- C.AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ unsigned LoopCost = 1;
+ if (TTI.shouldFavorPostInc()) {
+ const SCEV *LoopStep = AR->getStepRecurrence(SE);
+ if (isa<SCEVConstant>(LoopStep)) {
+ // Check if a post-indexed load/store can be used.
+ if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
+ TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+ const SCEV *LoopStart = AR->getStart();
+ if (!isa<SCEVConstant>(LoopStart) &&
+ SE.isLoopInvariant(LoopStart, L))
+ LoopCost = 0;
+ }
+ }
+ }
+ C.AddRecCost += LoopCost;
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
- RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
if (isLoser())
return;
}
@@ -1250,13 +1278,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+ SmallPtrSetImpl<const SCEV *> *LoserRegs,
+ const TargetTransformInfo &TTI) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg).second) {
- RateRegister(Reg, Regs, L, SE, DT);
+ RateRegister(Reg, Regs, L, SE, DT, TTI);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
@@ -1280,7 +1309,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
if (isLoser())
return;
}
@@ -1289,7 +1318,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
if (isLoser())
return;
}
@@ -1344,14 +1373,15 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// If ICmpZero formula ends with not 0, it could not be replaced by
// just add or sub. We'll need to compare final result of AddRec.
- // That means we'll need an additional instruction.
+ // That means we'll need an additional instruction. But if the target can
+ // macro-fuse a compare with a branch, don't count this extra instruction.
// For -10 + {0, +, 1}:
// i = i + 1;
// cmp i, 10
//
// For {-10, +, 1}:
// i = i + 1;
- if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
+ if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
C.Insns++;
// Each new AddRec adds 1 instruction to calculation.
C.Insns += (C.AddRecCost - PrevAddRecCost);
@@ -1457,7 +1487,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- std::sort(Key.begin(), Key.end());
+ llvm::sort(Key.begin(), Key.end());
return Uniquifier.count(Key);
}
@@ -1481,7 +1511,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
- std::sort(Key.begin(), Key.end());
+ llvm::sort(Key.begin(), Key.end());
if (!Uniquifier.insert(Key).second)
return false;
@@ -2385,24 +2415,27 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
- MemAccessTy AccessTy = getAccessType(TTI, UI->getUser());
- int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
- /*BaseOffset=*/0,
- /*HasBaseReg=*/false, Scale,
- AccessTy.AddrSpace))
- goto decline_post_inc;
- Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
- /*BaseOffset=*/0,
- /*HasBaseReg=*/false, Scale,
- AccessTy.AddrSpace))
- goto decline_post_inc;
+ if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
+ MemAccessTy AccessTy = getAccessType(
+ TTI, UI->getUser(), UI->getOperandValToReplace());
+ int64_t Scale = C->getSExtValue();
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
+ goto decline_post_inc;
+ Scale = -Scale;
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
+ goto decline_post_inc;
+ }
}
}
- DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
- << *Cond << '\n');
+ LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
@@ -2643,7 +2676,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
if (Types.size() == 1)
Types.clear();
- DEBUG(print_factors_and_types(dbgs()));
+ LLVM_DEBUG(print_factors_and_types(dbgs()));
}
/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
@@ -2667,7 +2700,7 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE,
return OI;
}
-/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
+/// IVChain logic must consistently peek base TruncInst operands, so wrap it in
/// a convenient helper.
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
@@ -2774,10 +2807,9 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
return false;
if (!Users.empty()) {
- DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
- for (Instruction *Inst : Users) {
- dbgs() << " " << *Inst << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
+ for (Instruction *Inst
+ : Users) { dbgs() << " " << *Inst << "\n"; });
return false;
}
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
@@ -2830,8 +2862,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
// the stride.
cost -= NumReusedIncrements;
- DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
- << "\n");
+ LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
return cost < 0;
}
@@ -2884,7 +2916,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
if (isa<PHINode>(UserInst))
return;
if (NChains >= MaxChains && !StressIVChain) {
- DEBUG(dbgs() << "IV Chain Limit\n");
+ LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
LastIncExpr = OperExpr;
@@ -2897,11 +2929,11 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
OperExprBase));
ChainUsersVec.resize(NChains);
- DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
- << ") IV=" << *LastIncExpr << "\n");
+ LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
} else {
- DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
- << ") IV+" << *LastIncExpr << "\n");
+ LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
// Add this IV user to the end of the chain.
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
@@ -2971,7 +3003,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
/// loop latch. This will discover chains on side paths, but requires
/// maintaining multiple copies of the Chains state.
void LSRInstance::CollectChains() {
- DEBUG(dbgs() << "Collecting IV Chains.\n");
+ LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
@@ -3013,15 +3045,14 @@ void LSRInstance::CollectChains() {
} // Continue walking down the instructions.
} // Continue walking down the domtree.
// Visit phi backedges to determine if the chain can generate the IV postinc.
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (!SE.isSCEVable(PN->getType()))
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
continue;
Instruction *IncV =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)
- ChainInstruction(PN, IncV, ChainUsersVec);
+ ChainInstruction(&PN, IncV, ChainUsersVec);
}
// Remove any unprofitable chains.
unsigned ChainIdx = 0;
@@ -3041,10 +3072,10 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
- DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
+ LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
for (const IVInc &Inc : Chain) {
- DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
+ LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
IVIncSet.insert(UseI);
@@ -3061,7 +3092,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
- MemAccessTy AccessTy = getAccessType(TTI, UserInst);
+ MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
IncOffset, /*HaseBaseReg=*/false))
@@ -3101,11 +3132,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
if (IVOpIter == IVOpEnd) {
// Gracefully give up on this chain.
- DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+ LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
return;
}
- DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+ LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = nullptr;
@@ -3152,12 +3183,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
if (isa<PHINode>(Chain.tailUserInst())) {
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
- if (!isCompatibleIVType(Phi, IVSrc))
+ for (PHINode &Phi : L->getHeader()->phis()) {
+ if (!isCompatibleIVType(&Phi, IVSrc))
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
- Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ Phi.getIncomingValueForBlock(L->getLoopLatch()));
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
continue;
Value *IVOper = IVSrc;
@@ -3168,7 +3198,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
}
- Phi->replaceUsesOfWith(PostIncV, IVOper);
+ Phi.replaceUsesOfWith(PostIncV, IVOper);
DeadInsts.emplace_back(PostIncV);
}
}
@@ -3182,7 +3212,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
find(UserInst->operands(), U.getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
if (IVIncSet.count(UseI)) {
- DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
+ LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
continue;
}
@@ -3190,7 +3220,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
MemAccessTy AccessTy;
if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
Kind = LSRUse::Address;
- AccessTy = getAccessType(TTI, UserInst);
+ AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
}
const SCEV *S = IU.getExpr(U);
@@ -3258,7 +3288,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
}
}
- DEBUG(print_fixups(dbgs()));
+ LLVM_DEBUG(print_fixups(dbgs()));
}
/// Insert a formula for the given expression into the given use, separating out
@@ -3467,12 +3497,45 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
return S;
}
-/// \brief Helper function for LSRInstance::GenerateReassociations.
+/// Return true if the SCEV represents a value that may end up as a
+/// post-increment operation.
+static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
+ LSRUse &LU, const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ if (LU.Kind != LSRUse::Address ||
+ !LU.AccessTy.getType()->isIntOrIntVectorTy())
+ return false;
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (!AR)
+ return false;
+ const SCEV *LoopStep = AR->getStepRecurrence(SE);
+ if (!isa<SCEVConstant>(LoopStep))
+ return false;
+ if (LU.AccessTy.getType()->getScalarSizeInBits() !=
+ LoopStep->getType()->getScalarSizeInBits())
+ return false;
+ // Check if a post-indexed load/store can be used.
+ if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
+ TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+ const SCEV *LoopStart = AR->getStart();
+ if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
+ return true;
+ }
+ return false;
+}
+
+/// Helper function for LSRInstance::GenerateReassociations.
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
unsigned Depth, size_t Idx,
bool IsScaledReg) {
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ // Don't generate reassociations for the base register of a value that
+ // may generate a post-increment operator. The reason is that the
+ // reassociations cause extra base+register formula to be created,
+ // and possibly chosen, but the post-increment is more efficient.
+ if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
+ return;
SmallVector<const SCEV *, 8> AddOps;
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
if (Remainder)
@@ -3545,7 +3608,12 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
+ // Because just Depth is not enough to bound compile time.
+ // This means that every time AddOps.size() is greater 16^x we will add
+ // x to Depth.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
+ Depth + 1 + (Log2_32(AddOps.size()) >> 2));
}
}
@@ -3599,7 +3667,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
}
}
-/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+/// Helper function for LSRInstance::GenerateSymbolicOffsets.
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, size_t Idx,
bool IsScaledReg) {
@@ -3631,7 +3699,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
/* IsScaledReg */ true);
}
-/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+/// Helper function for LSRInstance::GenerateConstantOffsets.
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
@@ -3941,10 +4009,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (Imms.size() == 1)
continue;
- DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
- for (const auto &Entry : Imms)
- dbgs() << ' ' << Entry.first;
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+ for (const auto &Entry
+ : Imms) dbgs()
+ << ' ' << Entry.first;
+ dbgs() << '\n');
// Examine each offset.
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
@@ -3956,7 +4025,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
- DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+ LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
+ << '\n');
continue;
}
@@ -4041,6 +4111,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
+ if (TTI.shouldFavorPostInc() &&
+ mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
+ continue;
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
@@ -4102,9 +4175,9 @@ LSRInstance::GenerateAllReuseFormulae() {
GenerateCrossUseConstantOffsets();
- DEBUG(dbgs() << "\n"
- "After generating reuse formulae:\n";
- print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
}
/// If there are multiple formulae with the same set of registers used
@@ -4126,7 +4199,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
+ dbgs() << '\n');
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
@@ -4150,8 +4224,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// as the basis of rediscovering the desired formula that uses an AddRec
// corresponding to the existing phi. Once all formulae have been
// generated, these initial losers may be pruned.
- DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
else {
SmallVector<const SCEV *, 4> Key;
@@ -4164,7 +4238,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
- std::sort(Key.begin(), Key.end());
+ llvm::sort(Key.begin(), Key.end());
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
@@ -4178,10 +4252,10 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
if (CostF.isLess(CostBest, TTI))
std::swap(F, Best);
- DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
- dbgs() << "\n"
- " in favor of formula "; Best.print(dbgs());
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula ";
+ Best.print(dbgs()); dbgs() << '\n');
}
#ifndef NDEBUG
ChangedFormulae = true;
@@ -4200,11 +4274,11 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
BestFormulae.clear();
}
- DEBUG(if (ChangedFormulae) {
- dbgs() << "\n"
- "After filtering out undesirable candidates:\n";
- print_uses(dbgs());
- });
+ LLVM_DEBUG(if (ChangedFormulae) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
}
// This is a rough guess that seems to work fairly well.
@@ -4233,11 +4307,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
/// register pressure); remove it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
- DEBUG(dbgs() << "The search space is too complex.\n");
+ LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
- DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
- "which use a superset of registers used by other "
- "formulae.\n");
+ LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+ "which use a superset of registers used by other "
+ "formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
@@ -4255,7 +4329,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
@@ -4270,8 +4345,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs());
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
@@ -4286,8 +4361,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
LU.RecomputeRegs(LUIdx, RegUses);
}
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
@@ -4297,9 +4371,10 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
- DEBUG(dbgs() << "The search space is too complex.\n"
- "Narrowing the search space by assuming that uses separated "
- "by a constant offset will use the same registers.\n");
+ LLVM_DEBUG(
+ dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by assuming that uses separated "
+ "by a constant offset will use the same registers.\n");
// This is especially useful for unrolled loops.
@@ -4317,7 +4392,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LU.Kind, LU.AccessTy))
continue;
- DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
@@ -4325,7 +4400,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (LSRFixup &Fixup : LU.Fixups) {
Fixup.Offset += F.BaseOffset;
LUThatHas->pushFixup(Fixup);
- DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
+ LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
// Delete formulae from the new use which are no longer legal.
@@ -4334,8 +4409,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs());
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
@@ -4354,7 +4428,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
}
}
- DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
@@ -4362,15 +4436,14 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
- DEBUG(dbgs() << "The search space is too complex.\n");
+ LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
- DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
- "undesirable dedicated registers.\n");
+ LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
FilterOutUndesirableDedicatedRegisters();
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
@@ -4381,15 +4454,16 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
/// The benefit is that it is more likely to find out a better solution
/// from a formulae set with more Scale and ScaledReg variations than
/// a formulae set with the same Scale and ScaledReg. The picking winner
-/// reg heurstic will often keep the formulae with the same Scale and
+/// reg heuristic will often keep the formulae with the same Scale and
/// ScaledReg and filter others, and we want to avoid that if possible.
void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
- DEBUG(dbgs() << "The search space is too complex.\n"
- "Narrowing the search space by choosing the best Formula "
- "from the Formulae with the same Scale and ScaledReg.\n");
+ LLVM_DEBUG(
+ dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by choosing the best Formula "
+ "from the Formulae with the same Scale and ScaledReg.\n");
// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
@@ -4403,7 +4477,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
+ dbgs() << '\n');
// Return true if Formula FA is better than Formula FB.
auto IsBetterThan = [&](Formula &FA, Formula &FB) {
@@ -4447,10 +4522,10 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
Formula &Best = LU.Formulae[P.first->second];
if (IsBetterThan(F, Best))
std::swap(F, Best);
- DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
- dbgs() << "\n"
- " in favor of formula ";
- Best.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of formula ";
+ Best.print(dbgs()); dbgs() << '\n');
#ifndef NDEBUG
ChangedFormulae = true;
#endif
@@ -4466,7 +4541,7 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
BestFormulae.clear();
}
- DEBUG(if (ChangedFormulae) {
+ LLVM_DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
@@ -4525,7 +4600,7 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
// Used in each formula of a solution (in example above this is reg(c)).
// We can skip them in calculations.
SmallPtrSet<const SCEV *, 4> UniqRegs;
- DEBUG(dbgs() << "The search space is too complex.\n");
+ LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
// Map each register to probability of not selecting
DenseMap <const SCEV *, float> RegNumMap;
@@ -4545,7 +4620,8 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
RegNumMap.insert(std::make_pair(Reg, PNotSel));
}
- DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n");
+ LLVM_DEBUG(
+ dbgs() << "Narrowing the search space by deleting costly formulas\n");
// Delete formulas where registers number expectation is high.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
@@ -4587,26 +4663,25 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
MinIdx = i;
}
}
- DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
- dbgs() << " with min reg num " << FMinRegNum << '\n');
+ LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
+ dbgs() << " with min reg num " << FMinRegNum << '\n');
if (MinIdx != 0)
std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
while (LU.Formulae.size() != 1) {
- DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
+ dbgs() << '\n');
LU.Formulae.pop_back();
}
LU.RecomputeRegs(LUIdx, RegUses);
assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
Formula &F = LU.Formulae[0];
- DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
// When we choose the formula, the regs become unique.
UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
if (F.ScaledReg)
UniqRegs.insert(F.ScaledReg);
}
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// Pick a register which seems likely to be profitable, and then in any use
@@ -4619,7 +4694,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
// Ok, we have too many of formulae on our hands to conveniently handle.
// Use a rough heuristic to thin out the list.
- DEBUG(dbgs() << "The search space is too complex.\n");
+ LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
@@ -4640,8 +4715,8 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
}
}
- DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
- << " will yield profitable reuse.\n");
+ LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+ << " will yield profitable reuse.\n");
Taken.insert(Best);
// In any use with formulae which references this register, delete formulae
@@ -4654,7 +4729,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--e;
--i;
@@ -4668,8 +4743,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
LU.RecomputeRegs(LUIdx, RegUses);
}
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
@@ -4751,11 +4825,11 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
if (F.getNumRegs() == 1 && Workspace.size() == 1)
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
- DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
- dbgs() << ".\n Regs:";
- for (const SCEV *S : NewRegs)
- dbgs() << ' ' << *S;
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+ dbgs() << ".\n Regs:"; for (const SCEV *S
+ : NewRegs) dbgs()
+ << ' ' << *S;
+ dbgs() << '\n');
SolutionCost = NewCost;
Solution = Workspace;
@@ -4780,22 +4854,22 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
if (Solution.empty()) {
- DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
return;
}
// Ok, we've now made all our decisions.
- DEBUG(dbgs() << "\n"
- "The chosen solution requires "; SolutionCost.print(dbgs());
- dbgs() << ":\n";
- for (size_t i = 0, e = Uses.size(); i != e; ++i) {
- dbgs() << " ";
- Uses[i].print(dbgs());
- dbgs() << "\n"
- " ";
- Solution[i]->print(dbgs());
- dbgs() << '\n';
- });
+ LLVM_DEBUG(dbgs() << "\n"
+ "The chosen solution requires ";
+ SolutionCost.print(dbgs()); dbgs() << ":\n";
+ for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+ dbgs() << " ";
+ Uses[i].print(dbgs());
+ dbgs() << "\n"
+ " ";
+ Solution[i]->print(dbgs());
+ dbgs() << '\n';
+ });
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
@@ -4996,7 +5070,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -5269,7 +5343,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
for (const IVStrideUse &U : IU) {
if (++NumUsers > MaxIVUsers) {
(void)U;
- DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
+ LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
+ << "\n");
return;
}
// Bail out if we have a PHI on an EHPad that gets a value from a
@@ -5302,9 +5377,9 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
#endif // DEBUG
- DEBUG(dbgs() << "\nLSR on loop ";
- L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
- dbgs() << ":\n");
+ LLVM_DEBUG(dbgs() << "\nLSR on loop ";
+ L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
+ dbgs() << ":\n");
// First, perform some low-level loop optimizations.
OptimizeShadowIV();
@@ -5315,7 +5390,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Skip nested loops until we can model them better with formulae.
if (!L->empty()) {
- DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
@@ -5325,9 +5400,11 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
- assert(!Uses.empty() && "IVUsers reported at least one use");
- DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
- print_uses(dbgs()));
+ if (Uses.empty())
+ return;
+
+ LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+ print_uses(dbgs()));
// Now use the reuse data to generate a bunch of interesting ways
// to formulate the values needed for the uses.