diff options
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 143 |
1 files changed, 98 insertions, 45 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 0057b4ba7ce1..00d2fd2fdbac 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -70,6 +70,17 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + // Loop structure should be the following: + // Preheader + // PrologHeader + // ... + // PrologLatch + // PrologExit + // NewPreheader + // Header + // ... + // Latch + // LatchExit BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); @@ -83,14 +94,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, for (PHINode &PN : Succ->phis()) { // Add a new PHI node to the prolog end block and add the // appropriate incoming values. + // TODO: This code assumes that the PrologExit (or the LatchExit block for + // prolog loop) contains only one predecessor from the loop, i.e. the + // PrologLatch. When supporting multiple-exiting block loops, we can have + // two or more blocks that have the LatchExit as the target in the + // original loop. PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(&PN)) { + // Succ is loop header. NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); } else { + // Succ is LatchExit. NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); } @@ -124,7 +142,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, - PreserveLCSSA); + nullptr, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no @@ -143,7 +161,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, - PreserveLCSSA); + nullptr, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); @@ -257,7 +275,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, assert(Exit && "Loop must have a single exit block only"); // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, + SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); @@ -267,7 +285,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // Split the main loop exit to maintain canonicalization guarantees. SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; - SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, PreserveLCSSA); } @@ -380,6 +398,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; + MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely @@ -387,6 +406,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, if (UnrollRemainder) return NewLoop; + Optional<MDNode *> NewLoopID = makeFollowupLoopID( + LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); + if (NewLoopID.hasValue()) { + NewLoop->setLoopID(NewLoopID.getValue()); + + // Do not setLoopAlreadyUnrolled if loop attributes have been defined + // explicitly. + return NewLoop; + } + // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; @@ -525,10 +554,10 @@ static bool canProfitablyUnrollMultiExitLoop( bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, - bool UnrollRemainder, - LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, - bool PreserveLCSSA) { + bool UnrollRemainder, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, bool PreserveLCSSA, + Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -545,13 +574,27 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + + if (!LatchBR || LatchBR->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + LLVM_DEBUG( + dbgs() + << "Loop latch not terminated by a conditional branch.\n"); + return false; + } + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); - // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be an exit block out of the loop. This needs - // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - assert(!L->contains(LatchExit) && - "one of the loop latch successors should be the exit block!"); + + if (L->contains(LatchExit)) { + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be an exit block out of the loop. + LLVM_DEBUG( + dbgs() + << "One of the loop latch successors must be the exit block.\n"); + return false; + } + // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = @@ -636,8 +679,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); - NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", - DT, LI, PreserveLCSSA); + NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, + nullptr, PreserveLCSSA); // NewExit gets its DebugLoc from LatchExit, which is not part of the // original Loop. // Fix this by setting Loop's DebugLoc to NewExit. @@ -762,10 +805,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the - // values from the cloned region. Also update the dominator info for - // OtherExits and their immediate successors, since we have new edges into - // OtherExits. - SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; + // values from the cloned region. for (auto *BB : OtherExits) { for (auto &II : *BB) { @@ -800,27 +840,30 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, "Breaks the definition of dedicated exits!"); } #endif - // Update the dominator info because the immediate dominator is no longer the - // header of the original Loop. BB has edges both from L and remainder code. - // Since the preheader determines which loop is run (L or directly jump to - // the remainder code), we set the immediate dominator as the preheader. - if (DT) { - DT->changeImmediateDominator(BB, PreHeader); - // Also update the IDom for immediate successors of BB. If the current - // IDom is the header, update the IDom to be the preheader because that is - // the nearest common dominator of all predecessors of SuccBB. We need to - // check for IDom being the header because successors of exit blocks can - // have edges from outside the loop, and we should not incorrectly update - // the IDom in that case. - for (BasicBlock *SuccBB: successors(BB)) - if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { - if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { - assert(!SuccBB->getSinglePredecessor() && - "BB should be the IDom then!"); - DT->changeImmediateDominator(SuccBB, PreHeader); - } - } + } + + // Update the immediate dominator of the exit blocks and blocks that are + // reachable from the exit blocks. This is needed because we now have paths + // from both the original loop and the remainder code reaching the exit + // blocks. While the IDom of these exit blocks were from the original loop, + // now the IDom is the preheader (which decides whether the original loop or + // remainder code should run). + if (DT && !L->getExitingBlock()) { + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + // NB! We have to examine the dom children of all loop blocks, not just + // those which are the IDom of the exit blocks. This is because blocks + // reachable from the exit blocks can have their IDom as the nearest common + // dominator of the exit blocks. + for (auto *BB : L->blocks()) { + auto *DomNodeBB = DT->getNode(BB); + for (auto *DomChild : DomNodeBB->getChildren()) { + auto *DomChildBB = DomChild->getBlock(); + if (!L->contains(LI->getLoopFor(DomChildBB))) + ChildrenToUpdate.push_back(DomChildBB); + } } + for (auto *BB : ChildrenToUpdate) + DT->changeImmediateDominator(BB, PreHeader); } // Loop structure should be the following: @@ -884,6 +927,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // of its parent loops, so the Scalar Evolution pass needs to be run again. SE->forgetTopmostLoop(L); + // Verify that the Dom Tree is correct. +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + if (DT) + assert(DT->verify(DominatorTree::VerificationLevel::Full)); +#endif + // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. @@ -897,16 +946,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } + auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); - UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, - /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, - /*ORE*/ nullptr, PreserveLCSSA); + UnrollResult = + UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, + /*Force*/ false, /*AllowRuntime*/ false, + /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, + /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, + /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, + /*ORE*/ nullptr, PreserveLCSSA); } + if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) + *ResultLoop = remainderLoop; NumRuntimeUnrolled++; return true; } |