diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2016-08-17 19:41:29 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2016-08-17 19:41:29 +0000 |
| commit | 6c4bc1bd2772d4db151a13576558e1320c7c3b65 (patch) | |
| tree | 06f8f3845db41aed4b2b4228d561c3f3b5a09563 /contrib/llvm/lib/Transforms | |
| parent | 4bb0738ee7438ed572a4b9d8b609271b029de5b8 (diff) | |
| parent | a7fe922b98bb45be7dce7c1cfe668ec27eeddc74 (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Transforms')
17 files changed, 254 insertions, 49 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index fff544085414..787f4342831d 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker { namespace llvm { template <> struct GraphTraits<ArgumentGraphNode *> { typedef ArgumentGraphNode NodeType; + typedef ArgumentGraphNode *NodeRef; typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType; static inline NodeType *getEntryNode(NodeType *A) { return A; } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 310c29275faf..99b12d4db0d0 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL, // Instructions could multiply use V. while (UI != E && *UI == I) ++UI; - I->eraseFromParent(); + if (isInstructionTriviallyDead(I, TLI)) + I->eraseFromParent(); } } diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index cf5b76dc365b..df6a48e05d42 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); +static cl::opt<bool> EnableGVNHoist( + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental GVN Hoisting pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); - FPM.add(createGVNHoistPass()); + if(EnableGVNHoist) + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index d7eed790e2ab..8f1ff8ac0e66 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring + // decomposeBitTestICmp() might help. { - unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()); + unsigned BitWidth = + DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); APInt MinSignedValue = APInt::getSignBit(BitWidth); Value *X; const APInt *Y, *C; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 51c3262b5d14..377ccb9c37f7 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2830,7 +2830,8 @@ bool InstCombiner::run() { // Add operands to the worklist. replaceInstUsesWith(*I, C); ++NumConstProp; - eraseInstFromFunction(*I); + if (isInstructionTriviallyDead(I, TLI)) + eraseInstFromFunction(*I); MadeIRChange = true; continue; } @@ -2851,7 +2852,8 @@ bool InstCombiner::run() { // Add operands to the worklist. replaceInstUsesWith(*I, C); ++NumConstProp; - eraseInstFromFunction(*I); + if (isInstructionTriviallyDead(I, TLI)) + eraseInstFromFunction(*I); MadeIRChange = true; continue; } @@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; - Inst->eraseFromParent(); + if (isInstructionTriviallyDead(Inst, TLI)) + Inst->eraseFromParent(); continue; } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index dcb62d3ed1b5..41041c78db97 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -272,8 +272,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { return false; } - // Check if the global is in a GCOV counter array. - if (GV->getName().startswith("__llvm_gcov_ctr")) + // Check if the global is private gcov data. + if (GV->getName().startswith("__llvm_gcov") || + GV->getName().startswith("__llvm_gcda")) return false; } diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp index 88172d19fe5a..9e982194bac7 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constant.h" @@ -90,11 +91,13 @@ bool ConstantPropagation::runOnFunction(Function &F) { // Remove the dead instruction. WorkList.erase(I); - I->eraseFromParent(); + if (isInstructionTriviallyDead(I, TLI)) { + I->eraseFromParent(); + ++NumInstKilled; + } // We made a change to the function... Changed = true; - ++NumInstKilled; } } return Changed; diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 9d0ef42e0396..0b16e2703dc4 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -582,6 +582,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // its simpler value. if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); + bool Killed = false; if (!Inst->use_empty()) { Inst->replaceAllUsesWith(V); Changed = true; @@ -589,11 +590,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (isInstructionTriviallyDead(Inst, &TLI)) { Inst->eraseFromParent(); Changed = true; + Killed = true; } - if (Changed) { + if (Changed) ++NumSimplify; + if (Killed) continue; - } } // If this is a simple instruction that we can value number, process it. diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 542cf38e43bb..e958563e2d10 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -815,6 +815,14 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) return; + // Check that `Cast` actually extends the induction variable (we rely on this + // later). This takes care of cases where `Cast` is extending a truncation of + // the narrow induction variable, and thus can end up being narrower than the + // "narrow" induction variable. + uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); + if (NarrowIVWidth >= Width) + return; + // Cast is either an sext or zext up to this point. // We should not widen an indvar if arithmetics on the wider indvar are more // expensive than those on the narrower indvar. We check only the cost of ADD diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index b9e717cf763e..d1769fc3ebb3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -758,7 +758,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); - I->eraseFromParent(); + if (isInstructionTriviallyDead(I, TLI)) + I->eraseFromParent(); Condition = SimpleVal; } } diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 2c0a70e44f57..cdd17fc516a8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -377,9 +377,11 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); - CurAST->deleteValue(&I); I.replaceAllUsesWith(C); - I.eraseFromParent(); + if (isInstructionTriviallyDead(&I, TLI)) { + CurAST->deleteValue(&I); + I.eraseFromParent(); + } continue; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 77c77eb7d798..70bd9d3cca95 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4442,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Determine an input position which will be dominated by the operands and // which will dominate the result. IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -4473,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); } // Expand the ScaledReg portion. @@ -4491,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand ScaleReg as if it was part of the base regs. if (F.Scale == 1) Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP))); + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4508,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); @@ -4524,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4534,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Flush the operand list to suppress SCEVExpander hoisting of both folded and // unfolded offsets. LSR assumes they both live next to their uses. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4570,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); - Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty); // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index c5ca56360fc8..4f1052d81433 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -552,9 +553,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). + const DataLayout &DL = NewFunc->getParent()->getDataLayout(); + SmallSetVector<const Value *, 8> Worklist; for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) - if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) - recursivelySimplifyInstruction(PN); + if (isa<PHINode>(VMap[PHIToResolve[Idx]])) + Worklist.insert(PHIToResolve[Idx]); + + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + const Value *OrigV = Worklist[Idx]; + auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); + if (!I) + continue; + + // See if this instruction simplifies. + Value *SimpleV = SimplifyInstruction(I, DL); + if (!SimpleV) + continue; + + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (const User *U : OrigV->users()) + Worklist.insert(cast<Instruction>(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // If the original instruction had no side effects, remove it. + if (isInstructionTriviallyDead(I)) + I->eraseFromParent(); + else + VMap[OrigV] = I; + } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1fbb19d2b8ad..e82c07fd7b59 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1294,6 +1294,13 @@ updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); } +/// Return the result of AI->isStaticAlloca() if AI were moved to the entry +/// block. Allocas used in inalloca calls and allocas of dynamic array size +/// cannot be static. +static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { + return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); +} + /// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, @@ -1328,7 +1335,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // Don't update static allocas, as they may get moved later. if (auto *AI = dyn_cast<AllocaInst>(BI)) - if (isa<Constant>(AI->getArraySize())) + if (allocaWouldBeStaticInEntry(AI)) continue; BI->setDebugLoc(TheCallDL); @@ -1626,7 +1633,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, continue; } - if (!isa<Constant>(AI->getArraySize())) + if (!allocaWouldBeStaticInEntry(AI)) continue; // Keep track of the static allocas that we inline into the caller. @@ -1635,7 +1642,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && - isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { + allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 9658966779b9..0d5a25b8ebc5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -64,6 +64,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, DominatorTree &DT, LoopInfo &LI) { SmallVector<Use *, 16> UsesToRewrite; SmallVector<BasicBlock *, 8> ExitBlocks; + SmallSetVector<PHINode *, 16> PHIsToRemove; PredIteratorCache PredCache; bool Changed = false; @@ -115,7 +116,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, SmallVector<PHINode *, 16> AddedPHIs; SmallVector<PHINode *, 8> PostProcessPHIs; - SSAUpdater SSAUpdate; + SmallVector<PHINode *, 4> InsertedPHIs; + SSAUpdater SSAUpdate(&InsertedPHIs); SSAUpdate.Initialize(I->getType(), I->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the @@ -184,6 +186,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); + + // SSAUpdater might have inserted phi-nodes inside other loops. We'll need + // to post-process them to keep LCSSA form. + for (PHINode *InsertedPN : InsertedPHIs) { + if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(InsertedPN); + } } // Post process PHI instructions that were inserted into another disjoint @@ -196,13 +206,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, Worklist.push_back(PostProcessPN); } - // Remove PHI nodes that did not have any uses rewritten. + // Keep track of PHI nodes that we want to remove because they did not have + // any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) - PN->eraseFromParent(); + PHIsToRemove.insert(PN); Changed = true; } + // Remove PHI nodes that did not have any uses rewritten. + for (PHINode *PN : PHIsToRemove) { + assert (PN->use_empty() && "Trying to remove a phi with uses."); + PN->eraseFromParent(); + } return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index b3a928bf7753..2846e8f235b7 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -327,6 +327,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, else NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + SmallVector<BasicBlock *, 8> OuterLoopBlocks; + OuterLoopBlocks.push_back(NewBB); // Now that we know which blocks are in L and which need to be moved to // OuterLoop, move any blocks that need it. for (unsigned i = 0; i != L->getBlocks().size(); ++i) { @@ -334,12 +336,53 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (!BlocksInL.count(BB)) { // Move this block to the parent, updating the exit blocks sets L->removeBlockFromLoop(BB); - if ((*LI)[BB] == L) + if ((*LI)[BB] == L) { LI->changeLoopFor(BB, NewOuter); + OuterLoopBlocks.push_back(BB); + } --i; } } + // Split edges to exit blocks from the inner loop, if they emerged in the + // process of separating the outer one. + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (BasicBlock *ExitBlock : ExitBlockSet) { + if (any_of(predecessors(ExitBlock), + [L](BasicBlock *BB) { return !L->contains(BB); })) { + rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); + } + } + + if (PreserveLCSSA) { + // Fix LCSSA form for L. Some values, which previously were only used inside + // L, can now be used in NewOuter loop. We need to insert phi-nodes for them + // in corresponding exit blocks. + + // Go through all instructions in OuterLoopBlocks and check if they are + // using operands from the inner loop. In this case we'll need to fix LCSSA + // for these instructions. + SmallSetVector<Instruction *, 8> WorklistSet; + for (BasicBlock *OuterBB: OuterLoopBlocks) { + for (Instruction &I : *OuterBB) { + for (Value *Op : I.operands()) { + Instruction *OpI = dyn_cast<Instruction>(Op); + if (!OpI || !L->contains(OpI)) + continue; + WorklistSet.insert(OpI); + } + } + } + SmallVector<Instruction *, 8> Worklist(WorklistSet.begin(), + WorklistSet.end()); + formLCSSAForInstructions(Worklist, *DT, *LI); + assert(NewOuter->isRecursivelyLCSSAForm(*DT) && + "LCSSA is broken after separating nested loops!"); + } + return NewOuter; } @@ -541,17 +584,12 @@ ReprocessLoop: SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (BasicBlock *ExitBlock : ExitBlockSet) { - for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); - PI != PE; ++PI) - // Must be exactly this loop: no subloops, parent loops, or non-loop preds - // allowed. - if (!L->contains(*PI)) { - if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) { - ++NumInserted; - Changed = true; - } - break; - } + if (any_of(predecessors(ExitBlock), + [L](BasicBlock *BB) { return !L->contains(BB); })) { + rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); + ++NumInserted; + Changed = true; + } } // If the header has more than two predecessors at this point (from the diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8b85e320d3b2..ee5733d20f4f 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -50,6 +50,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -220,6 +221,81 @@ class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizationRequirements; +// A traits type that is intended to be used in graph algorithms. The graph it +// models starts at the loop header, and traverses the BasicBlocks that are in +// the loop body, but not the loop header. Since the loop header is skipped, +// the back edges are excluded. +struct LoopBodyTraits { + using NodeRef = std::pair<const Loop *, BasicBlock *>; + + // This wraps a const Loop * into the iterator, so we know which edges to + // filter out. + class WrappedSuccIterator + : public iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits<succ_iterator>::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { + using BaseT = iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits<succ_iterator>::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>; + + const Loop *L; + + public: + WrappedSuccIterator(succ_iterator Begin, const Loop *L) + : BaseT(Begin), L(L) {} + + NodeRef operator*() const { return {L, *I}; } + }; + + struct LoopBodyFilter { + bool operator()(NodeRef N) const { + const Loop *L = N.first; + return N.second != L->getHeader() && L->contains(N.second); + } + }; + + using ChildIteratorType = + filter_iterator<WrappedSuccIterator, LoopBodyFilter>; + + static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; } + + static ChildIteratorType child_begin(NodeRef Node) { + return make_filter_range(make_range<WrappedSuccIterator>( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .begin(); + } + + static ChildIteratorType child_end(NodeRef Node) { + return make_filter_range(make_range<WrappedSuccIterator>( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .end(); + } +}; + +/// Returns true if the given loop body has a cycle, excluding the loop +/// itself. +static bool hasCyclesInLoopBody(const Loop &L) { + if (!L.empty()) + return true; + + for (const auto SCC : + make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L), + scc_iterator<Loop, LoopBodyTraits>::end(L))) { + if (SCC.size() > 1) { + DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n"); + DEBUG(L.dump()); + return true; + } + } + return false; +} + /// \brief This modifies LoopAccessReport to initialize message with /// loop-vectorizer-specific part. class VectorizationReport : public LoopAccessReport { @@ -1782,12 +1858,14 @@ private: Instruction *UnsafeAlgebraInst; }; -static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) { - if (L.empty()) - return V.push_back(&L); - +static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) { + if (L.empty()) { + if (!hasCyclesInLoopBody(L)) + V.push_back(&L); + return; + } for (Loop *InnerL : L) - addInnerLoop(*InnerL, V); + addAcyclicInnerLoop(*InnerL, V); } /// The LoopVectorize Pass. @@ -4395,6 +4473,9 @@ bool LoopVectorizationLegality::canVectorize() { return false; } + // FIXME: The code is currently dead, since the loop gets sent to + // LoopVectorizationLegality is already an innermost loop. + // // We can only vectorize innermost loops. if (!TheLoop->empty()) { emitAnalysis(VectorizationReport() << "loop is not the innermost loop"); @@ -6639,7 +6720,7 @@ bool LoopVectorizePass::runImpl( SmallVector<Loop *, 8> Worklist; for (Loop *L : *LI) - addInnerLoop(*L, Worklist); + addAcyclicInnerLoop(*L, Worklist); LoopsAnalyzed += Worklist.size(); |
