diff options
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 143 |
1 files changed, 91 insertions, 52 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 9ad2b707e6b2..5170c68e2915 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -65,9 +65,11 @@ static cl::opt<bool> UnrollRuntimeMultiExit( /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, BasicBlock *PreHeader, - BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); @@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) - DT->changeImmediateDominator(Exit, PrologExit); + DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, return nullptr; } +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, + BasicBlock *LatchExit, bool PreserveLCSSA, + bool UseEpilogRemainder) { + + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!UnrollRuntimeMultiExit) + return false; + // Even if runtime multi exit is enabled, we currently have some correctness + // constrains in unrolling a multi-exit loop. + // We rely on LCSSA form being preserved when the exit blocks are transformed. + if (!PreserveLCSSA) + return false; + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + + // TODO: Support multiple exiting blocks jumping to the `LatchExit` when + // UnrollRuntimeMultiExit is true. This will need updating the logic in + // connectEpilog/connectProlog. + if (!LatchExit->getSinglePredecessor()) { + DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); + return false; + } + // FIXME: We bail out of multi-exit unrolling when epilog loop is generated + // and L is an inner loop. This is because in presence of multiple exits, the + // outer loop is incorrect: we do not add the EpilogPreheader and exit to the + // outer loop. This is automatically handled in the prolog case, so we do not + // have that bug in prolog generation. + if (UseEpilogRemainder && L->getParentLoop()) + return false; + + // All constraints have been satisfied. + return true; +} + + + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { - // for now, only unroll loops that contain a single exit - if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) - return false; + DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + DEBUG(L->dump()); // Make sure the loop is in canonical form. - if (!L->isLoopSimplifyForm()) + if (!L->isLoopSimplifyForm()) { + DEBUG(dbgs() << "Not in simplify form!\n"); return false; + } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); - BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop - if (!LatchExit && !UnrollRuntimeMultiExit) - return false; - // These are exit blocks other than the target of the latch exiting block. - SmallVector<BasicBlock *, 4> OtherExits; BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && + assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); - // Support runtime unrolling for multiple exit blocks and multiple exiting - // blocks. - if (!LatchExit) { - assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " - "unrolling with epilog remainder only!"); - LatchExit = LatchBR->getSuccessor(ExitIndex); - // We rely on LCSSA form being preserved when the exit blocks are - // transformed. - if (!PreserveLCSSA) - return false; - // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This - // will need updating the logic in connectEpilog. - if (!LatchExit->getSinglePredecessor()) - return false; - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueExitBlocks(Exits); - for (auto *BB : Exits) - if (BB != LatchExit) - OtherExits.push_back(BB); + // These are exit blocks other than the target of the latch exiting block. + SmallVector<BasicBlock *, 4> OtherExits; + bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( + L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. + if (!isMultiExitUnrollingEnabled && + (!L->getExitingBlock() || OtherExits.size())) { + DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; } - - assert(LatchExit && "Latch Exit should exist!"); - // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) + !BECountSC->getType()->isIntegerTy()) { + DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; + } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa<SCEVCouldNotCompute>(TripCountSC)) + if (isa<SCEVCouldNotCompute>(TripCountSC)) { + DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; + } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; + } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. - if (Log2_32(Count) > BEWidth) + if (Log2_32(Count) > BEWidth) { + DEBUG(dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; + } // Loop structure is the following: // @@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; - if (!newVal) { - assert(isa<Constant>(Phi->getIncomingValue(i)) && - "VMap should exist for all values except constants!"); + // newVal can be a constant or derived from values outside the loop, and + // hence need not have a VMap value. + if (!newVal) newVal = Phi->getIncomingValue(i); - } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } @@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } else { // Connect the prolog code to the original loop and update the // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, - VMap, DT, LI, PreserveLCSSA); + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the |