diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
commit | 9df3605dea17e84f8183581f6103bd0c79e2a606 (patch) | |
tree | 70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Transforms/Utils/LoopUnrollRuntime.cpp | |
parent | 08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff) |
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollRuntime.cpp | 130 |
1 files changed, 106 insertions, 24 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5f85e17927fa2..9ad2b707e6b23 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -36,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> @@ -45,6 +46,10 @@ using namespace llvm; STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); +static cl::opt<bool> UnrollRuntimeMultiExit( + "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolling for loops with multiple exits, when " + "epilog is generated")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -285,15 +290,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. -/// -static void CloneLoopBlocks(Loop *L, Value *NewIter, - const bool CreateRemainderLoop, - const bool UseEpilogRemainder, - BasicBlock *InsertTop, BasicBlock *InsertBot, - BasicBlock *Preheader, - std::vector<BasicBlock *> &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI) { +/// Return the new cloned loop that is created when CreateRemainderLoop is true. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, + const bool UseEpilogRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, + std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -418,7 +421,10 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); + return NewLoop; } + else + return nullptr; } /// Insert code in the prolog/epilog code when unrolling a loop with a @@ -465,29 +471,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit - if (!L->getExitingBlock()) + if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) return false; - // Make sure the loop is in canonical form, and there is a single - // exit block only. + // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) return false; // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop - if (!LatchExit) + if (!LatchExit && !UnrollRuntimeMultiExit) return false; + // These are exit blocks other than the target of the latch exiting block. + SmallVector<BasicBlock *, 4> OtherExits; + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be the single exit block out of the loop. This needs + // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - assert((LatchBR->getSuccessor(0) == LatchExit || - LatchBR->getSuccessor(1) == LatchExit) && - "one of the loop latch successors should be " - "the exit block!"); - (void)LatchBR; + assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && + "one of the loop latch successors should be the exit block!"); + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!LatchExit) { + assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " + "unrolling with epilog remainder only!"); + LatchExit = LatchBR->getSuccessor(ExitIndex); + // We rely on LCSSA form being preserved when the exit blocks are + // transformed. + if (!PreserveLCSSA) + return false; + // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This + // will need updating the logic in connectEpilog. + if (!LatchExit->getSinglePredecessor()) + return false; + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + } + + assert(LatchExit && "Latch Exit should exist!"); + // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -495,7 +524,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). - const SCEV *BECountSC = SE->getBackedgeTakenCount(L); + // We calculate the backedge count by using getExitCount on the Latch block, + // which is proven to be the only exiting block in this loop. This is same as + // calculating getBackedgeTakenCount on the loop (which computes SCEV for all + // exiting blocks). + const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; @@ -508,7 +541,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; - BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); @@ -650,8 +682,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + Loop *remainderLoop = CloneLoopBlocks( + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -659,6 +692,42 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, NewBlocks[0]->getIterator(), F->end()); + // Now the loop blocks are cloned and the other exiting blocks from the + // remainder are connected to the original Loop's exit blocks. The remaining + // work is to update the phi nodes in the original loop, and take in the + // values from the cloned region. Also update the dominator info for + // OtherExits, since we have new edges into OtherExits. + for (auto *BB : OtherExits) { + for (auto &II : *BB) { + + // Given we preserve LCSSA form, we know that the values used outside the + // loop will be used through these phi nodes at the exit blocks that are + // transformed below. + if (!isa<PHINode>(II)) + break; + PHINode *Phi = cast<PHINode>(&II); + unsigned oldNumOperands = Phi->getNumIncomingValues(); + // Add the incoming values from the remainder code to the end of the phi + // node. + for (unsigned i =0; i < oldNumOperands; i++){ + Value *newVal = VMap[Phi->getIncomingValue(i)]; + if (!newVal) { + assert(isa<Constant>(Phi->getIncomingValue(i)) && + "VMap should exist for all values except constants!"); + newVal = Phi->getIncomingValue(i); + } + Phi->addIncoming(newVal, + cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); + } + } + // Update the dominator info because the immediate dominator is no longer the + // header of the original Loop. BB has edges both from L and remainder code. + // Since the preheader determines which loop is run (L or directly jump to + // the remainder code), we set the immediate dominator as the preheader. + if (DT) + DT->changeImmediateDominator(BB, PreHeader); + } + // Loop structure should be the following: // Epilog Prolog // @@ -721,6 +790,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Canonicalize to LoopSimplifyForm both original and remainder loops. We + // cannot rely on the LoopUnrollPass to do this because it only does + // canonicalization for parent/subloops and not the sibling loops. + if (OtherExits.size() > 0) { + // Generate dedicated exit blocks for the original loop, to preserve + // LoopSimplifyForm. + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + // Generate dedicated exit blocks for the remainder loop if one exists, to + // preserve LoopSimplifyForm. + if (remainderLoop) + formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); + } + NumRuntimeUnrolled++; return true; } |