diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 421 |
1 files changed, 249 insertions, 172 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 953854c8b7b7..fa83b48210bc 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -75,6 +75,8 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -105,8 +107,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -121,7 +123,7 @@ using namespace llvm; #define DEBUG_TYPE "loop-reduce" -/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for +/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the /// worst cases before LSR burns too much compile time and stack space. @@ -185,6 +187,8 @@ struct MemAccessTy { unsigned AS = UnknownAddressSpace) { return MemAccessTy(Type::getVoidTy(Ctx), AS); } + + Type *getType() { return MemTy; } }; /// This class holds data which is used to order reuse candidates. @@ -327,7 +331,7 @@ struct Formula { /// #2 enforces that 1 * reg is reg. /// #3 ensures invariant regs with respect to current loop can be combined /// together in LSR codegen. - /// This invariant can be temporarly broken while building a formula. + /// This invariant can be temporarily broken while building a formula. /// However, every formula inserted into the LSRInstance must be in canonical /// form. SmallVector<const SCEV *, 4> BaseRegs; @@ -442,7 +446,7 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { canonicalize(*L); } -/// \brief Check whether or not this formula statisfies the canonical +/// Check whether or not this formula satisfies the canonical /// representation. /// \see Formula::BaseRegs. bool Formula::isCanonical(const Loop &L) const { @@ -470,7 +474,7 @@ bool Formula::isCanonical(const Loop &L) const { return I == BaseRegs.end(); } -/// \brief Helper method to morph a formula into its canonical representation. +/// Helper method to morph a formula into its canonical representation. /// \see Formula::BaseRegs. /// Every formula having more than one base register, must use the ScaledReg /// field. Otherwise, we would have to do special cases everywhere in LSR @@ -505,7 +509,7 @@ void Formula::canonicalize(const Loop &L) { } } -/// \brief Get rid of the scale in the formula. +/// Get rid of the scale in the formula. /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. /// \return true if it was possible to get rid of the scale, false otherwise. /// \note After this operation the formula may not be in the canonical form. @@ -818,7 +822,7 @@ static bool isAddressUse(const TargetTransformInfo &TTI, /// Return the type of the memory being accessed. static MemAccessTy getAccessType(const TargetTransformInfo &TTI, - Instruction *Inst) { + Instruction *Inst, Value *OperandVal) { MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace); if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { AccessTy.MemTy = SI->getOperand(0)->getType(); @@ -832,7 +836,14 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI, } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { case Intrinsic::prefetch: + case Intrinsic::memset: AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); + AccessTy.MemTy = OperandVal->getType(); + break; + case Intrinsic::memmove: + case Intrinsic::memcpy: + AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace(); + AccessTy.MemTy = OperandVal->getType(); break; default: { MemIntrinsicInfo IntrInfo; @@ -857,12 +868,11 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI, /// Return true if this AddRec is already a phi in its loop. static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { - for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == + for (PHINode &PN : AR->getLoop()->getHeader()->phis()) { + if (SE.isSCEVable(PN.getType()) && + (SE.getEffectiveSCEVType(PN.getType()) == SE.getEffectiveSCEVType(AR->getType())) && - SE.getSCEV(PN) == AR) + SE.getSCEV(&PN) == AR) return true; } return false; @@ -938,7 +948,7 @@ static bool isHighCostExpansion(const SCEV *S, return true; } -/// If any of the instructions is the specified set are trivially dead, delete +/// If any of the instructions in the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) { @@ -971,7 +981,7 @@ class LSRUse; } // end anonymous namespace -/// \brief Check if the addressing mode defined by \p F is completely +/// Check if the addressing mode defined by \p F is completely /// folded in \p LU at isel time. /// This includes address-mode folding and special icmp tricks. /// This function returns true if \p LU can accommodate what \p F @@ -1041,12 +1051,14 @@ private: void RateRegister(const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + const TargetTransformInfo &TTI); void RatePrimaryRegister(const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl<const SCEV *> *LoserRegs); + SmallPtrSetImpl<const SCEV *> *LoserRegs, + const TargetTransformInfo &TTI); }; /// An operand value in an instruction which is to be replaced with some @@ -1195,7 +1207,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, void Cost::RateRegister(const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + const TargetTransformInfo &TTI) { if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least @@ -1216,13 +1229,28 @@ void Cost::RateRegister(const SCEV *Reg, ++C.NumRegs; return; } - C.AddRecCost += 1; /// TODO: This should be a function of the stride. + + unsigned LoopCost = 1; + if (TTI.shouldFavorPostInc()) { + const SCEV *LoopStep = AR->getStepRecurrence(SE); + if (isa<SCEVConstant>(LoopStep)) { + // Check if a post-indexed load/store can be used. + if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { + const SCEV *LoopStart = AR->getStart(); + if (!isa<SCEVConstant>(LoopStart) && + SE.isLoopInvariant(LoopStart, L)) + LoopCost = 0; + } + } + } + C.AddRecCost += LoopCost; // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { if (!Regs.count(AR->getOperand(1))) { - RateRegister(AR->getOperand(1), Regs, L, SE, DT); + RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI); if (isLoser()) return; } @@ -1250,13 +1278,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl<const SCEV *> *LoserRegs) { + SmallPtrSetImpl<const SCEV *> *LoserRegs, + const TargetTransformInfo &TTI) { if (LoserRegs && LoserRegs->count(Reg)) { Lose(); return; } if (Regs.insert(Reg).second) { - RateRegister(Reg, Regs, L, SE, DT); + RateRegister(Reg, Regs, L, SE, DT, TTI); if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } @@ -1280,7 +1309,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, Lose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI); if (isLoser()) return; } @@ -1289,7 +1318,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, Lose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI); if (isLoser()) return; } @@ -1344,14 +1373,15 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, // If ICmpZero formula ends with not 0, it could not be replaced by // just add or sub. We'll need to compare final result of AddRec. - // That means we'll need an additional instruction. + // That means we'll need an additional instruction. But if the target can + // macro-fuse a compare with a branch, don't count this extra instruction. // For -10 + {0, +, 1}: // i = i + 1; // cmp i, 10 // // For {-10, +, 1}: // i = i + 1; - if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd()) + if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp()) C.Insns++; // Each new AddRec adds 1 instruction to calculation. C.Insns += (C.AddRecCost - PrevAddRecCost); @@ -1457,7 +1487,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. - std::sort(Key.begin(), Key.end()); + llvm::sort(Key.begin(), Key.end()); return Uniquifier.count(Key); } @@ -1481,7 +1511,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. - std::sort(Key.begin(), Key.end()); + llvm::sort(Key.begin(), Key.end()); if (!Uniquifier.insert(Key).second) return false; @@ -2385,24 +2415,27 @@ LSRInstance::OptimizeLoopTermCond() { C->getValue().isMinSignedValue()) goto decline_post_inc; // Check for possible scaled-address reuse. - MemAccessTy AccessTy = getAccessType(TTI, UI->getUser()); - int64_t Scale = C->getSExtValue(); - if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, - /*BaseOffset=*/0, - /*HasBaseReg=*/false, Scale, - AccessTy.AddrSpace)) - goto decline_post_inc; - Scale = -Scale; - if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, - /*BaseOffset=*/0, - /*HasBaseReg=*/false, Scale, - AccessTy.AddrSpace)) - goto decline_post_inc; + if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) { + MemAccessTy AccessTy = getAccessType( + TTI, UI->getUser(), UI->getOperandValToReplace()); + int64_t Scale = C->getSExtValue(); + if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, + /*BaseOffset=*/0, + /*HasBaseReg=*/false, Scale, + AccessTy.AddrSpace)) + goto decline_post_inc; + Scale = -Scale; + if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, + /*BaseOffset=*/0, + /*HasBaseReg=*/false, Scale, + AccessTy.AddrSpace)) + goto decline_post_inc; + } } } - DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " - << *Cond << '\n'); + LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " + << *Cond << '\n'); // It's possible for the setcc instruction to be anywhere in the loop, and // possible for it to have multiple users. If it is not immediately before @@ -2643,7 +2676,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { if (Types.size() == 1) Types.clear(); - DEBUG(print_factors_and_types(dbgs())); + LLVM_DEBUG(print_factors_and_types(dbgs())); } /// Helper for CollectChains that finds an IV operand (computed by an AddRec in @@ -2667,7 +2700,7 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE, return OI; } -/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in +/// IVChain logic must consistently peek base TruncInst operands, so wrap it in /// a convenient helper. static Value *getWideOperand(Value *Oper) { if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper)) @@ -2774,10 +2807,9 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users, return false; if (!Users.empty()) { - DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; - for (Instruction *Inst : Users) { - dbgs() << " " << *Inst << "\n"; - }); + LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; + for (Instruction *Inst + : Users) { dbgs() << " " << *Inst << "\n"; }); return false; } assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); @@ -2830,8 +2862,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users, // the stride. cost -= NumReusedIncrements; - DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost - << "\n"); + LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost + << "\n"); return cost < 0; } @@ -2884,7 +2916,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, if (isa<PHINode>(UserInst)) return; if (NChains >= MaxChains && !StressIVChain) { - DEBUG(dbgs() << "IV Chain Limit\n"); + LLVM_DEBUG(dbgs() << "IV Chain Limit\n"); return; } LastIncExpr = OperExpr; @@ -2897,11 +2929,11 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), OperExprBase)); ChainUsersVec.resize(NChains); - DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst - << ") IV=" << *LastIncExpr << "\n"); + LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst + << ") IV=" << *LastIncExpr << "\n"); } else { - DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst - << ") IV+" << *LastIncExpr << "\n"); + LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst + << ") IV+" << *LastIncExpr << "\n"); // Add this IV user to the end of the chain. IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } @@ -2971,7 +3003,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, /// loop latch. This will discover chains on side paths, but requires /// maintaining multiple copies of the Chains state. void LSRInstance::CollectChains() { - DEBUG(dbgs() << "Collecting IV Chains.\n"); + LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n"); SmallVector<ChainUsers, 8> ChainUsersVec; SmallVector<BasicBlock *,8> LatchPath; @@ -3013,15 +3045,14 @@ void LSRInstance::CollectChains() { } // Continue walking down the instructions. } // Continue walking down the domtree. // Visit phi backedges to determine if the chain can generate the IV postinc. - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (!SE.isSCEVable(PN->getType())) + for (PHINode &PN : L->getHeader()->phis()) { + if (!SE.isSCEVable(PN.getType())) continue; Instruction *IncV = - dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch())); + dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch())); if (IncV) - ChainInstruction(PN, IncV, ChainUsersVec); + ChainInstruction(&PN, IncV, ChainUsersVec); } // Remove any unprofitable chains. unsigned ChainIdx = 0; @@ -3041,10 +3072,10 @@ void LSRInstance::CollectChains() { void LSRInstance::FinalizeChain(IVChain &Chain) { assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); - DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); + LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); for (const IVInc &Inc : Chain) { - DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); + LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand"); IVIncSet.insert(UseI); @@ -3061,7 +3092,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, if (IncConst->getAPInt().getMinSignedBits() > 64) return false; - MemAccessTy AccessTy = getAccessType(TTI, UserInst); + MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, IncOffset, /*HaseBaseReg=*/false)) @@ -3101,11 +3132,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, } if (IVOpIter == IVOpEnd) { // Gracefully give up on this chain. - DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); + LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); return; } - DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); + LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); const SCEV *LeftOverExpr = nullptr; @@ -3152,12 +3183,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If LSR created a new, wider phi, we may also replace its postinc. We only // do this if we also found a wide value for the head of the chain. if (isa<PHINode>(Chain.tailUserInst())) { - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *Phi = dyn_cast<PHINode>(I); ++I) { - if (!isCompatibleIVType(Phi, IVSrc)) + for (PHINode &Phi : L->getHeader()->phis()) { + if (!isCompatibleIVType(&Phi, IVSrc)) continue; Instruction *PostIncV = dyn_cast<Instruction>( - Phi->getIncomingValueForBlock(L->getLoopLatch())); + Phi.getIncomingValueForBlock(L->getLoopLatch())); if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) continue; Value *IVOper = IVSrc; @@ -3168,7 +3198,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); } - Phi->replaceUsesOfWith(PostIncV, IVOper); + Phi.replaceUsesOfWith(PostIncV, IVOper); DeadInsts.emplace_back(PostIncV); } } @@ -3182,7 +3212,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { find(UserInst->operands(), U.getOperandValToReplace()); assert(UseI != UserInst->op_end() && "cannot find IV operand"); if (IVIncSet.count(UseI)) { - DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); + LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); continue; } @@ -3190,7 +3220,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { MemAccessTy AccessTy; if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { Kind = LSRUse::Address; - AccessTy = getAccessType(TTI, UserInst); + AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace()); } const SCEV *S = IU.getExpr(U); @@ -3258,7 +3288,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { } } - DEBUG(print_fixups(dbgs())); + LLVM_DEBUG(print_fixups(dbgs())); } /// Insert a formula for the given expression into the given use, separating out @@ -3467,12 +3497,45 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, return S; } -/// \brief Helper function for LSRInstance::GenerateReassociations. +/// Return true if the SCEV represents a value that may end up as a +/// post-increment operation. +static bool mayUsePostIncMode(const TargetTransformInfo &TTI, + LSRUse &LU, const SCEV *S, const Loop *L, + ScalarEvolution &SE) { + if (LU.Kind != LSRUse::Address || + !LU.AccessTy.getType()->isIntOrIntVectorTy()) + return false; + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (!AR) + return false; + const SCEV *LoopStep = AR->getStepRecurrence(SE); + if (!isa<SCEVConstant>(LoopStep)) + return false; + if (LU.AccessTy.getType()->getScalarSizeInBits() != + LoopStep->getType()->getScalarSizeInBits()) + return false; + // Check if a post-indexed load/store can be used. + if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { + const SCEV *LoopStart = AR->getStart(); + if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L)) + return true; + } + return false; +} + +/// Helper function for LSRInstance::GenerateReassociations. void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, const Formula &Base, unsigned Depth, size_t Idx, bool IsScaledReg) { const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + // Don't generate reassociations for the base register of a value that + // may generate a post-increment operator. The reason is that the + // reassociations cause extra base+register formula to be created, + // and possibly chosen, but the post-increment is more efficient. + if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) + return; SmallVector<const SCEV *, 8> AddOps; const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); if (Remainder) @@ -3545,7 +3608,12 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like // it. - GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1); + // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2) + // Because just Depth is not enough to bound compile time. + // This means that every time AddOps.size() is greater 16^x we will add + // x to Depth. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), + Depth + 1 + (Log2_32(AddOps.size()) >> 2)); } } @@ -3599,7 +3667,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, } } -/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets. +/// Helper function for LSRInstance::GenerateSymbolicOffsets. void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, const Formula &Base, size_t Idx, bool IsScaledReg) { @@ -3631,7 +3699,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, /* IsScaledReg */ true); } -/// \brief Helper function for LSRInstance::GenerateConstantOffsets. +/// Helper function for LSRInstance::GenerateConstantOffsets. void LSRInstance::GenerateConstantOffsetsImpl( LSRUse &LU, unsigned LUIdx, const Formula &Base, const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) { @@ -3941,10 +4009,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (Imms.size() == 1) continue; - DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; - for (const auto &Entry : Imms) - dbgs() << ' ' << Entry.first; - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; + for (const auto &Entry + : Imms) dbgs() + << ' ' << Entry.first; + dbgs() << '\n'); // Examine each offset. for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); @@ -3956,7 +4025,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (!isa<SCEVConstant>(OrigReg) && UsedByIndicesMap[Reg].count() == 1) { - DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n'); + LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg + << '\n'); continue; } @@ -4041,6 +4111,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { + if (TTI.shouldFavorPostInc() && + mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) + continue; if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; @@ -4102,9 +4175,9 @@ LSRInstance::GenerateAllReuseFormulae() { GenerateCrossUseConstantOffsets(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } /// If there are multiple formulae with the same set of registers used @@ -4126,7 +4199,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); + dbgs() << '\n'); bool Any = false; for (size_t FIdx = 0, NumForms = LU.Formulae.size(); @@ -4150,8 +4224,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // as the basis of rediscovering the desired formula that uses an AddRec // corresponding to the existing phi. Once all formulae have been // generated, these initial losers may be pruned. - DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); + dbgs() << "\n"); } else { SmallVector<const SCEV *, 4> Key; @@ -4164,7 +4238,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for // uniquifying. - std::sort(Key.begin(), Key.end()); + llvm::sort(Key.begin(), Key.end()); std::pair<BestFormulaeTy::const_iterator, bool> P = BestFormulae.insert(std::make_pair(Key, FIdx)); @@ -4178,10 +4252,10 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); if (CostF.isLess(CostBest, TTI)) std::swap(F, Best); - DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); - dbgs() << "\n" - " in favor of formula "; Best.print(dbgs()); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); } #ifndef NDEBUG ChangedFormulae = true; @@ -4200,11 +4274,11 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { BestFormulae.clear(); } - DEBUG(if (ChangedFormulae) { - dbgs() << "\n" - "After filtering out undesirable candidates:\n"; - print_uses(dbgs()); - }); + LLVM_DEBUG(if (ChangedFormulae) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); } // This is a rough guess that seems to work fairly well. @@ -4233,11 +4307,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { /// register pressure); remove it to simplify the system. void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); - DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " - "which use a superset of registers used by other " - "formulae.\n"); + LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " + "which use a superset of registers used by other " + "formulae.\n"); for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; @@ -4255,7 +4329,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); LU.DeleteFormula(F); --i; --e; @@ -4270,8 +4345,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { NewF.BaseRegs.erase(NewF.BaseRegs.begin() + (I - F.BaseRegs.begin())); if (LU.HasFormulaWithSameRegs(NewF)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); LU.DeleteFormula(F); --i; --e; @@ -4286,8 +4361,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { LU.RecomputeRegs(LUIdx, RegUses); } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } } @@ -4297,9 +4371,10 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; - DEBUG(dbgs() << "The search space is too complex.\n" - "Narrowing the search space by assuming that uses separated " - "by a constant offset will use the same registers.\n"); + LLVM_DEBUG( + dbgs() << "The search space is too complex.\n" + "Narrowing the search space by assuming that uses separated " + "by a constant offset will use the same registers.\n"); // This is especially useful for unrolled loops. @@ -4317,7 +4392,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LU.Kind, LU.AccessTy)) continue; - DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; @@ -4325,7 +4400,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { for (LSRFixup &Fixup : LU.Fixups) { Fixup.Offset += F.BaseOffset; LUThatHas->pushFixup(Fixup); - DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); + LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } // Delete formulae from the new use which are no longer legal. @@ -4334,8 +4409,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { Formula &F = LUThatHas->Formulae[i]; if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, LUThatHas->Kind, LUThatHas->AccessTy, F)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); LUThatHas->DeleteFormula(F); --i; --e; @@ -4354,7 +4428,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { } } - DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that @@ -4362,15 +4436,14 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { /// eliminate. void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); - DEBUG(dbgs() << "Narrowing the search space by re-filtering out " - "undesirable dedicated registers.\n"); + LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); FilterOutUndesirableDedicatedRegisters(); - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } } @@ -4381,15 +4454,16 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ /// The benefit is that it is more likely to find out a better solution /// from a formulae set with more Scale and ScaledReg variations than /// a formulae set with the same Scale and ScaledReg. The picking winner -/// reg heurstic will often keep the formulae with the same Scale and +/// reg heuristic will often keep the formulae with the same Scale and /// ScaledReg and filter others, and we want to avoid that if possible. void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; - DEBUG(dbgs() << "The search space is too complex.\n" - "Narrowing the search space by choosing the best Formula " - "from the Formulae with the same Scale and ScaledReg.\n"); + LLVM_DEBUG( + dbgs() << "The search space is too complex.\n" + "Narrowing the search space by choosing the best Formula " + "from the Formulae with the same Scale and ScaledReg.\n"); // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>; @@ -4403,7 +4477,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); + dbgs() << '\n'); // Return true if Formula FA is better than Formula FB. auto IsBetterThan = [&](Formula &FA, Formula &FB) { @@ -4447,10 +4522,10 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { Formula &Best = LU.Formulae[P.first->second]; if (IsBetterThan(F, Best)) std::swap(F, Best); - DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); - dbgs() << "\n" - " in favor of formula "; - Best.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); #ifndef NDEBUG ChangedFormulae = true; #endif @@ -4466,7 +4541,7 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { BestFormulae.clear(); } - DEBUG(if (ChangedFormulae) { + LLVM_DEBUG(if (ChangedFormulae) { dbgs() << "\n" "After filtering out undesirable candidates:\n"; print_uses(dbgs()); @@ -4525,7 +4600,7 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { // Used in each formula of a solution (in example above this is reg(c)). // We can skip them in calculations. SmallPtrSet<const SCEV *, 4> UniqRegs; - DEBUG(dbgs() << "The search space is too complex.\n"); + LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); // Map each register to probability of not selecting DenseMap <const SCEV *, float> RegNumMap; @@ -4545,7 +4620,8 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { RegNumMap.insert(std::make_pair(Reg, PNotSel)); } - DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n"); + LLVM_DEBUG( + dbgs() << "Narrowing the search space by deleting costly formulas\n"); // Delete formulas where registers number expectation is high. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { @@ -4587,26 +4663,25 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { MinIdx = i; } } - DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); - dbgs() << " with min reg num " << FMinRegNum << '\n'); + LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); + dbgs() << " with min reg num " << FMinRegNum << '\n'); if (MinIdx != 0) std::swap(LU.Formulae[MinIdx], LU.Formulae[0]); while (LU.Formulae.size() != 1) { - DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); + dbgs() << '\n'); LU.Formulae.pop_back(); } LU.RecomputeRegs(LUIdx, RegUses); assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"); Formula &F = LU.Formulae[0]; - DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); // When we choose the formula, the regs become unique. UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); if (F.ScaledReg) UniqRegs.insert(F.ScaledReg); } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// Pick a register which seems likely to be profitable, and then in any use @@ -4619,7 +4694,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { // Ok, we have too many of formulae on our hands to conveniently handle. // Use a rough heuristic to thin out the list. - DEBUG(dbgs() << "The search space is too complex.\n"); + LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. @@ -4640,8 +4715,8 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { } } - DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best - << " will yield profitable reuse.\n"); + LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best + << " will yield profitable reuse.\n"); Taken.insert(Best); // In any use with formulae which references this register, delete formulae @@ -4654,7 +4729,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { Formula &F = LU.Formulae[i]; if (!F.referencesReg(Best)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); LU.DeleteFormula(F); --e; --i; @@ -4668,8 +4743,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { LU.RecomputeRegs(LUIdx, RegUses); } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } } @@ -4751,11 +4825,11 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, if (F.getNumRegs() == 1 && Workspace.size() == 1) VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); } else { - DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); - dbgs() << ".\n Regs:"; - for (const SCEV *S : NewRegs) - dbgs() << ' ' << *S; - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); + dbgs() << ".\n Regs:"; for (const SCEV *S + : NewRegs) dbgs() + << ' ' << *S; + dbgs() << '\n'); SolutionCost = NewCost; Solution = Workspace; @@ -4780,22 +4854,22 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); if (Solution.empty()) { - DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); + LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); return; } // Ok, we've now made all our decisions. - DEBUG(dbgs() << "\n" - "The chosen solution requires "; SolutionCost.print(dbgs()); - dbgs() << ":\n"; - for (size_t i = 0, e = Uses.size(); i != e; ++i) { - dbgs() << " "; - Uses[i].print(dbgs()); - dbgs() << "\n" - " "; - Solution[i]->print(dbgs()); - dbgs() << '\n'; - }); + LLVM_DEBUG(dbgs() << "\n" + "The chosen solution requires "; + SolutionCost.print(dbgs()); dbgs() << ":\n"; + for (size_t i = 0, e = Uses.size(); i != e; ++i) { + dbgs() << " "; + Uses[i].print(dbgs()); + dbgs() << "\n" + " "; + Solution[i]->print(dbgs()); + dbgs() << '\n'; + }); assert(Solution.size() == Uses.size() && "Malformed solution!"); } @@ -4996,7 +5070,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -5269,7 +5343,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, for (const IVStrideUse &U : IU) { if (++NumUsers > MaxIVUsers) { (void)U; - DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n"); + LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U + << "\n"); return; } // Bail out if we have a PHI on an EHPad that gets a value from a @@ -5302,9 +5377,9 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, } #endif // DEBUG - DEBUG(dbgs() << "\nLSR on loop "; - L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); - dbgs() << ":\n"); + LLVM_DEBUG(dbgs() << "\nLSR on loop "; + L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); + dbgs() << ":\n"); // First, perform some low-level loop optimizations. OptimizeShadowIV(); @@ -5315,7 +5390,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Skip nested loops until we can model them better with formulae. if (!L->empty()) { - DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); + LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; } @@ -5325,9 +5400,11 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); - assert(!Uses.empty() && "IVUsers reported at least one use"); - DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; - print_uses(dbgs())); + if (Uses.empty()) + return; + + LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; + print_uses(dbgs())); // Now use the reuse data to generate a bunch of interesting ways // to formulate the values needed for the uses. |