src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2011-10-20 21:10:27 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2011-10-20 21:10:27 +0000
commit	30815c536baacc07e925f0aef23a5395883173dc (patch)
tree	2cbcf22585e99f8a87d12d5ff94f392c0d266819 /lib/Transforms/Scalar/LoopStrengthReduce.cpp
parent	411bd29eea3c360d5b48a18a17b5e87f5671af0e (diff)
download	src-30815c536baacc07e925f0aef23a5395883173dc.tar.gz src-30815c536baacc07e925f0aef23a5395883173dc.zip

vendor/llvm/llvm-r142614

Notes

Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')

-rw-r--r--

lib/Transforms/Scalar/LoopStrengthReduce.cpp

215

1 files changed, 160 insertions, 55 deletions

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 509d0264f10b..3e122c2a866e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

@@ -70,12 +70,27 @@

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/DenseSet.h"

#include "llvm/Support/Debug.h"

+#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ValueHandle.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetLowering.h"

#include <algorithm>

using namespace llvm;

+namespace llvm {

+cl::opt<bool> EnableNested(

+ "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));

+cl::opt<bool> EnableRetry(

+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));

+// Temporary flag to cleanup congruent phis after LSR phi expansion.

+// It's currently disabled until we can determine whether it's truly useful or

+// not. The flag should be removed after the v3.0 release.

+cl::opt<bool> EnablePhiElim(

+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));

namespace {

/// RegSortData - This class holds data which is used to order reuse candidates.

@@ -219,7 +234,7 @@ struct Formula {

void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);

unsigned getNumRegs() const;

- const Type *getType() const;

+ Type *getType() const;

void DeleteBaseReg(const SCEV *&S);

@@ -319,7 +334,7 @@ unsigned Formula::getNumRegs() const {

/// getType - Return the type of this formula, if it has one, or null

/// otherwise. This type is meaningless except for the bit size.

-const Type *Formula::getType() const {

+Type *Formula::getType() const {

return !BaseRegs.empty() ? BaseRegs.front()->getType() :

ScaledReg ? ScaledReg->getType() :

AM.BaseGV ? AM.BaseGV->getType() :

@@ -397,7 +412,7 @@ void Formula::dump() const {

/// isAddRecSExtable - Return true if the given addrec can be sign-extended

/// without changing its value.

static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

- const Type *WideTy =

+ Type *WideTy =

IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);

return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));

}

@@ -405,7 +420,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

/// isAddSExtable - Return true if the given add can be sign-extended

/// without changing its value.

static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {

- const Type *WideTy =

+ Type *WideTy =

IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);

return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));

}

@@ -413,7 +428,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {

/// isMulSExtable - Return true if the given mul can be sign-extended

/// without changing its value.

static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {

- const Type *WideTy =

+ Type *WideTy =

IntegerType::get(SE.getContext(),

SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());

return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));

@@ -594,8 +609,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {

}

/// getAccessType - Return the type of the memory being accessed.

-static const Type *getAccessType(const Instruction *Inst) {

- const Type *AccessTy = Inst->getType();

+static Type *getAccessType(const Instruction *Inst) {

+ Type *AccessTy = Inst->getType();

if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))

AccessTy = SI->getOperand(0)->getType();

else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

@@ -614,7 +629,7 @@ static const Type *getAccessType(const Instruction *Inst) {

// All pointers have the same requirements, so canonicalize them to an

// arbitrary pointer type to minimize variation.

- if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))

+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))

AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),

PTy->getAddressSpace());

@@ -670,6 +685,21 @@ public:

void Loose();

+#ifndef NDEBUG

+ // Once any of the metrics loses, they must all remain losers.

+ bool isValid() {

+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds

+ | ImmCost | SetupCost) != ~0u)

+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds

+ & ImmCost & SetupCost) == ~0u);

+ }

+#endif

+ bool isLoser() {

+ assert(isValid() && "invalid cost");

+ return NumRegs == ~0u;

+ }

void RateFormula(const Formula &F,

SmallPtrSet<const SCEV *, 16> &Regs,

const DenseSet<const SCEV *> &VisitedRegs,

@@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg,

if (AR->getLoop() == L)

AddRecCost += 1; /// TODO: This should be a function of the stride.

- // If this is an addrec for a loop that's already been visited by LSR,

- // don't second-guess its addrec phi nodes. LSR isn't currently smart

- // enough to reason about more than one loop at a time. Consider these

- // registers free and leave them alone.

- else if (L->contains(AR->getLoop()) ||

+ // If this is an addrec for another loop, don't second-guess its addrec phi

+ // nodes. LSR isn't currently smart enough to reason about more than one

+ // loop at a time. LSR has either already run on inner loops, will not run

+ // on other loops, and cannot be expected to change sibling loops. If the

+ // AddRec exists, consider it's register free and leave it alone. Otherwise,

+ // do not consider this formula at all.

+ // FIXME: why do we need to generate such fomulae?

+ else if (!EnableNested || L->contains(AR->getLoop()) ||

(!AR->getLoop()->contains(L) &&

DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {

for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();

- PHINode *PN = dyn_cast<PHINode>(I); ++I)

+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {

if (SE.isSCEVable(PN->getType()) &&

(SE.getEffectiveSCEVType(PN->getType()) ==

SE.getEffectiveSCEVType(AR->getType())) &&

SE.getSCEV(PN) == AR)

return;

+ }

+ if (!EnableNested) {

+ Loose();

+ return;

+ }

// If this isn't one of the addrecs that the loop already has, it

// would require a costly new phi and add. TODO: This isn't

// precisely modeled right now.

++NumBaseAdds;

- if (!Regs.count(AR->getStart()))

+ if (!Regs.count(AR->getStart())) {

RateRegister(AR->getStart(), Regs, L, SE, DT);

+ if (isLoser())

+ return;

+ }

}

// Add the step value register, if it needs one.

// TODO: The non-affine case isn't precisely modeled here.

- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))

- if (!Regs.count(AR->getStart()))

+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {

+ if (!Regs.count(AR->getOperand(1))) {

RateRegister(AR->getOperand(1), Regs, L, SE, DT);

+ if (isLoser())

+ return;

+ }

}

++NumRegs;

@@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F,

return;

}

RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);

+ if (isLoser())

+ return;

}

for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),

E = F.BaseRegs.end(); I != E; ++I) {

@@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F,

return;

}

RatePrimaryRegister(BaseReg, Regs, L, SE, DT);

+ if (isLoser())

+ return;

}

// Determine how many (unfolded) adds we'll need inside the loop.

@@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F,

else if (Offset != 0)

ImmCost += APInt(64, Offset, true).getMinSignedBits();

}

+ assert(isValid() && "invalid cost");

}

/// Loose - Set this cost to a losing value.

@@ -980,7 +1029,7 @@ public:

};

KindType Kind;

- const Type *AccessTy;

+ Type *AccessTy;

SmallVector<int64_t, 8> Offsets;

int64_t MinOffset;

@@ -995,7 +1044,7 @@ public:

/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different

/// max fixup widths to be equivalent, because the narrower one may be relying

/// on the implicit truncation to truncate away bogus bits.

- const Type *WidestFixupType;

+ Type *WidestFixupType;

/// Formulae - A list of ways to build a value that can satisfy this user.

/// After the list is populated, one of these is selected heuristically and

@@ -1005,7 +1054,7 @@ public:

/// Regs - The set of register candidates used by all formulae in this LSRUse.

SmallPtrSet<const SCEV *, 4> Regs;

- LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),

+ LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),

MinOffset(INT64_MAX),

MaxOffset(INT64_MIN),

AllFixupsOutsideLoop(true),

@@ -1127,7 +1176,7 @@ void LSRUse::dump() const {

/// be completely folded into the user instruction at isel time. This includes

/// address-mode folding and special icmp tricks.

static bool isLegalUse(const TargetLowering::AddrMode &AM,

- LSRUse::KindType Kind, const Type *AccessTy,

+ LSRUse::KindType Kind, Type *AccessTy,

const TargetLowering *TLI) {

switch (Kind) {

case LSRUse::Address:

@@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,

// If we have low-level target information, ask the target if it can fold an

// integer immediate on an icmp.

if (AM.BaseOffs != 0) {

- if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);

+ if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);

return false;

}

@@ -1176,7 +1225,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,

static bool isLegalUse(TargetLowering::AddrMode AM,

int64_t MinOffset, int64_t MaxOffset,

- LSRUse::KindType Kind, const Type *AccessTy,

+ LSRUse::KindType Kind, Type *AccessTy,

const TargetLowering *TLI) {

// Check for overflow.

if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=

@@ -1198,7 +1247,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM,

static bool isAlwaysFoldable(int64_t BaseOffs,

GlobalValue *BaseGV,

bool HasBaseReg,

- LSRUse::KindType Kind, const Type *AccessTy,

+ LSRUse::KindType Kind, Type *AccessTy,

const TargetLowering *TLI) {

// Fast-path: zero is always foldable.

if (BaseOffs == 0 && !BaseGV) return true;

@@ -1224,7 +1273,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,

static bool isAlwaysFoldable(const SCEV *S,

int64_t MinOffset, int64_t MaxOffset,

bool HasBaseReg,

- LSRUse::KindType Kind, const Type *AccessTy,

+ LSRUse::KindType Kind, Type *AccessTy,

const TargetLowering *TLI,

ScalarEvolution &SE) {

// Fast-path: zero is always foldable.

@@ -1299,7 +1348,7 @@ class LSRInstance {

SmallSetVector<int64_t, 8> Factors;

/// Types - Interesting use types, to facilitate truncation reuse.

- SmallSetVector<const Type *, 4> Types;

+ SmallSetVector<Type *, 4> Types;

/// Fixups - The list of operands which are to be replaced.

SmallVector<LSRFixup, 16> Fixups;

@@ -1330,11 +1379,11 @@ class LSRInstance {

UseMapTy UseMap;

bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,

- LSRUse::KindType Kind, const Type *AccessTy);

+ LSRUse::KindType Kind, Type *AccessTy);

std::pair<size_t, int64_t> getUse(const SCEV *&Expr,

LSRUse::KindType Kind,

- const Type *AccessTy);

+ Type *AccessTy);

void DeleteUse(LSRUse &LU, size_t LUIdx);

@@ -1426,7 +1475,8 @@ void LSRInstance::OptimizeShadowIV() {

IVUsers::const_iterator CandidateUI = UI;

++UI;

Instruction *ShadowUse = CandidateUI->getUser();

- const Type *DestTy = NULL;

+ Type *DestTy = NULL;

+ bool IsSigned = false;

/* If shadow use is a int->float cast then insert a second IV

to eliminate this cast.

@@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() {

for (unsigned i = 0; i < n; ++i, ++d)

foo(d);

- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))

+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {

+ IsSigned = false;

DestTy = UCast->getDestTy();

- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))

+ }

+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {

+ IsSigned = true;

DestTy = SCast->getDestTy();

+ }

if (!DestTy) continue;

if (TLI) {

@@ -1457,7 +1511,7 @@ void LSRInstance::OptimizeShadowIV() {

if (!PH) continue;

if (PH->getNumIncomingValues() != 2) continue;

- const Type *SrcTy = PH->getType();

+ Type *SrcTy = PH->getType();

int Mantissa = DestTy->getFPMantissaWidth();

if (Mantissa == -1) continue;

if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)

@@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() {

ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));

if (!Init) continue;

- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());

+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?

+ (double)Init->getSExtValue() :

+ (double)Init->getZExtValue());

BinaryOperator *Incr =

dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));

@@ -1776,7 +1832,7 @@ LSRInstance::OptimizeLoopTermCond() {

if (!TLI)

goto decline_post_inc;

// Check for possible scaled-address reuse.

- const Type *AccessTy = getAccessType(UI->getUser());

+ Type *AccessTy = getAccessType(UI->getUser());

TargetLowering::AddrMode AM;

AM.Scale = C->getSExtValue();

if (TLI->isLegalAddressingMode(AM, AccessTy))

@@ -1840,10 +1896,10 @@ LSRInstance::OptimizeLoopTermCond() {

/// return true.

bool

LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,

- LSRUse::KindType Kind, const Type *AccessTy) {

+ LSRUse::KindType Kind, Type *AccessTy) {

int64_t NewMinOffset = LU.MinOffset;

int64_t NewMaxOffset = LU.MaxOffset;

- const Type *NewAccessTy = AccessTy;

+ Type *NewAccessTy = AccessTy;

// Check for a mismatched kind. It's tempting to collapse mismatched kinds to

// something conservative, however this can pessimize in the case that one of

@@ -1882,7 +1938,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,

/// Either reuse an existing use or create a new one, as needed.

std::pair<size_t, int64_t>

LSRInstance::getUse(const SCEV *&Expr,

- LSRUse::KindType Kind, const Type *AccessTy) {

+ LSRUse::KindType Kind, Type *AccessTy) {

const SCEV *Copy = Expr;

int64_t Offset = ExtractImmediate(Expr, SE);

@@ -2044,7 +2100,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {

LF.PostIncLoops = UI->getPostIncLoops();

LSRUse::KindType Kind = LSRUse::Basic;

- const Type *AccessTy = 0;

+ Type *AccessTy = 0;

if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {

Kind = LSRUse::Address;

AccessTy = getAccessType(LF.UserInst);

@@ -2464,7 +2520,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,

if (LU.Kind != LSRUse::ICmpZero) return;

// Determine the integer type for the base formula.

- const Type *IntTy = Base.getType();

+ Type *IntTy = Base.getType();

if (!IntTy) return;

if (SE.getTypeSizeInBits(IntTy) > 64) return;

@@ -2538,7 +2594,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,

/// scaled-offset address modes, for example.

void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {

// Determine the integer type for the base formula.

- const Type *IntTy = Base.getType();

+ Type *IntTy = Base.getType();

if (!IntTy) return;

// If this Formula already has a scaled register, we can't add another one.

@@ -2598,13 +2654,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {

if (Base.AM.BaseGV) return;

// Determine the integer type for the base formula.

- const Type *DstTy = Base.getType();

+ Type *DstTy = Base.getType();

if (!DstTy) return;

DstTy = SE.getEffectiveSCEVType(DstTy);

- for (SmallSetVector<const Type *, 4>::const_iterator

+ for (SmallSetVector<Type *, 4>::const_iterator

I = Types.begin(), E = Types.end(); I != E; ++I) {

- const Type *SrcTy = *I;

+ Type *SrcTy = *I;

if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {

Formula F = Base;

@@ -2741,7 +2797,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {

int64_t Imm = WI.Imm;

const SCEV *OrigReg = WI.OrigReg;

- const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());

+ Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());

const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));

unsigned BitWidth = SE.getTypeSizeInBits(IntTy);

@@ -3275,6 +3331,9 @@ retry:

skip:;

}

+ if (!EnableRetry && !AnySatisfiedReqRegs)

+ return;

// If none of the formulae had all of the required registers, relax the

// constraint so that we don't exclude all formulae.

if (!AnySatisfiedReqRegs) {

@@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {

// SolveRecurse does all the work.

SolveRecurse(Solution, SolutionCost, Workspace, CurCost,

CurRegs, VisitedRegs);

+ if (Solution.empty()) {

+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");

+ return;

+ }

// Ok, we've now made all our decisions.

DEBUG(dbgs() << "\n"

@@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,

// Don't insert instructions before PHI nodes.

while (isa<PHINode>(IP)) ++IP;

+ // Ignore landingpad instructions.

+ while (isa<LandingPadInst>(IP)) ++IP;

// Ignore debug intrinsics.

while (isa<DbgInfoIntrinsic>(IP)) ++IP;

@@ -3440,9 +3506,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,

Rewriter.setPostInc(LF.PostIncLoops);

// This is the type that the user actually needs.

- const Type *OpTy = LF.OperandValToReplace->getType();

+ Type *OpTy = LF.OperandValToReplace->getType();

// This will be the type that we'll initially expand to.

- const Type *Ty = F.getType();

+ Type *Ty = F.getType();

if (!Ty)

// No type known; just expand directly to the ultimate type.

Ty = OpTy;

@@ -3450,7 +3516,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,

// Expand directly to the ultimate type if it's the right size.

Ty = OpTy;

// This is the type to do integer arithmetic in.

- const Type *IntTy = SE.getEffectiveSCEVType(Ty);

+ Type *IntTy = SE.getEffectiveSCEVType(Ty);

// Build up a list of operands to add together to form the full base.

SmallVector<const SCEV *, 8> Ops;

@@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,

// The other interesting way of "folding" with an ICmpZero is to use a

// negated immediate.

if (!ICmpScaledV)

- ICmpScaledV = ConstantInt::get(IntTy, -Offset);

+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);

else {

Ops.push_back(SE.getUnknown(ICmpScaledV));

ICmpScaledV = ConstantInt::get(IntTy, Offset);

@@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,

// users.

if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&

!isa<IndirectBrInst>(BB->getTerminator())) {

- Loop *PNLoop = LI.getLoopFor(PN->getParent());

- if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {

+ BasicBlock *Parent = PN->getParent();

+ Loop *PNLoop = LI.getLoopFor(Parent);

+ if (!PNLoop || Parent != PNLoop->getHeader()) {

// Split the critical edge.

- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);

+ BasicBlock *NewBB = 0;

+ if (!Parent->isLandingPad()) {

+ NewBB = SplitCriticalEdge(BB, Parent, P,

+ /*MergeIdenticalEdges=*/true,

+ /*DontDeleteUselessPhis=*/true);

+ } else {

+ SmallVector<BasicBlock*, 2> NewBBs;

+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);

+ NewBB = NewBBs[0];

+ }

// If PN is outside of the loop and BB is in the loop, we want to

// move the block to be immediately before the PHI block, not

@@ -3637,7 +3713,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,

Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);

// If this is reuse-by-noop-cast, insert the noop cast.

- const Type *OpTy = LF.OperandValToReplace->getType();

+ Type *OpTy = LF.OperandValToReplace->getType();

if (FullV->getType() != OpTy)

FullV =

CastInst::Create(CastInst::getCastOpcode(FullV, false,

@@ -3667,7 +3743,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,

Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);

// If this is reuse-by-noop-cast, insert the noop cast.

- const Type *OpTy = LF.OperandValToReplace->getType();

+ Type *OpTy = LF.OperandValToReplace->getType();

if (FullV->getType() != OpTy) {

Instruction *Cast =

CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),

@@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,

SCEVExpander Rewriter(SE, "lsr");

Rewriter.disableCanonicalMode();

+ Rewriter.enableLSRMode();

Rewriter.setIVIncInsertPos(L, IVIncInsertPos);

// Expand the new value definitions and update the users.

@@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)

OptimizeShadowIV();

OptimizeLoopTermCond();

+ // If loop preparation eliminates all interesting IV users, bail.

+ if (IU.empty()) return;

+ // Skip nested loops until we can model them better with formulae.

+ if (!EnableNested && !L->empty()) {

+ if (EnablePhiElim) {

+ // Remove any extra phis created by processing inner loops.

+ SmallVector<WeakVH, 16> DeadInsts;

+ SCEVExpander Rewriter(SE, "lsr");

+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);

+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);

+ }

+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");

+ return;

+ }

// Start collecting data and preparing for the solver.

CollectInterestingTypesAndFactors();

CollectFixupsAndInitialFormulae();

@@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)

Types.clear();

RegUses.clear();

+ if (Solution.empty())

+ return;

#ifndef NDEBUG

// Formulae should be legal.

for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),

@@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)

// Now that we've decided what we want, make it so.

ImplementSolution(Solution, P);

+ if (EnablePhiElim) {

+ // Remove any extra phis created by processing inner loops.

+ SmallVector<WeakVH, 16> DeadInsts;

+ SCEVExpander Rewriter(SE, "lsr");

+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);

+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);

+ }

}

void LSRInstance::print_factors_and_types(raw_ostream &OS) const {

@@ -3793,7 +3898,7 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {

OS << '*' << *I;

}

- for (SmallSetVector<const Type *, 4>::const_iterator

+ for (SmallSetVector<Type *, 4>::const_iterator

I = Types.begin(), E = Types.end(); I != E; ++I) {

if (!First) OS << ", ";

First = false;