diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Transforms/Utils/LoopUnrollPeel.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Diffstat (limited to 'lib/Transforms/Utils/LoopUnrollPeel.cpp')
-rw-r--r-- | lib/Transforms/Utils/LoopUnrollPeel.cpp | 210 |
1 files changed, 136 insertions, 74 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 151a285af4e9..005306cf1898 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -1,9 +1,8 @@ //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +static cl::opt<bool> UnrollPeelMultiDeoptExit( + "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + cl::desc("Allow peeling of loops with multiple deopt exits.")); + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) { if (!L->isLoopSimplifyForm()) return false; + if (UnrollPeelMultiDeoptExit) { + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueNonLatchExitBlocks(Exits); + + if (!Exits.empty()) { + // Latch's terminator is a conditional branch, Latch is exiting and + // all non Latch exits ends up with deoptimize. + const BasicBlock *Latch = L->getLoopLatch(); + const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator()); + return T && T->isConditional() && L->isLoopExiting(Latch) && + all_of(Exits, [](const BasicBlock *BB) { + return BB->getTerminatingDeoptimizeCall(); + }); + } + } + // Only peel loops that contain a single exit if (!L->getExitingBlock() || !L->getUniqueExitBlock()) return false; @@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight) { + if (!PeeledHeaderWeight) + return; // FIXME: Pick a more realistic distribution. // Currently the proportion of weight we assign to the fall-through // side of the branch drops linearly with the iteration number, and we use // a 0.9 fudge factor to make the drop-off less sharp... - if (PeeledHeaderWeight) { - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; - - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - } + uint64_t FallThruWeight = + PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); + uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; + PeeledHeaderWeight -= ExitWeight; + + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(LatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) + : MDB.createBranchWeights(FallThruWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); +} + +/// Initialize the weights. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param AvgIters The average number of iterations we expect the loop to have. +/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. +/// \param[out] CurHeaderWeight The # of times the header is executed. +static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + unsigned AvgIters, uint64_t &ExitWeight, + uint64_t &CurHeaderWeight) { + uint64_t TrueWeight, FalseWeight; + if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) + return; + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; + // The # of times the loop body executes is the sum of the exit block + // is taken and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; +} + +/// Update the weights of original Latch block after peeling off all iterations. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param ExitWeight The weight of the edge from Latch to Exit block. +/// \param CurHeaderWeight The # of time the header is executed. +static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + uint64_t ExitWeight, uint64_t CurHeaderWeight) { + // Adjust the branch weights on the loop exit. + if (!ExitWeight) + return; + + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; + MDBuilder MDB(LatchBR->getContext()); + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. -/// \param Exit The exit block of the original loop. +/// \param ExitEdges The exit edges of the original loop. /// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. -static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *Exit, - SmallVectorImpl<BasicBlock *> &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI) { +static void cloneLoopBlocks( + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, + SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges, + SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); @@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, // iteration (for every other iteration) BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - LatchBR->setSuccessor(HeaderIdx, InsertBot); - LatchBR->setSuccessor(1 - HeaderIdx, Exit); + for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) + if (LatchBR->getSuccessor(idx) == Header) { + LatchBR->setSuccessor(idx, InsertBot); + break; + } if (DT) DT->changeImmediateDominator(InsertBot, NewLatch); @@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, // we've just created. Note that this must happen *after* the incoming // values are adjusted, since the value going out of the latch may also be // a value coming into the header. - for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - Value *LatchVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - LatchVal = VMap[LatchVal]; - PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch])); - } + for (auto Edge : ExitEdges) + for (PHINode &PHI : Edge.second->phis()) { + Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + LatchVal = VMap[LatchVal]; + PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); + } // LastValueMap is updated with the values for the current loop // which are used the next time this function is called. @@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Exit = L->getUniqueExitBlock(); + SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; + L->getExitEdges(ExitEdges); + + DenseMap<BasicBlock *, BasicBlock *> ExitIDom; + if (DT) { + assert(L->hasDedicatedExits() && "No dedicated exits?"); + for (auto Edge : ExitEdges) { + if (ExitIDom.count(Edge.second)) + continue; + BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + assert(L->contains(BB) && "IDom is not in a loop"); + ExitIDom[Edge.second] = BB; + } + } Function *F = Header->getParent(); @@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - - uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, CurHeaderWeight = 0; - if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { - ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // weight and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; - } + initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { @@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, else CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, - NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, DT, LI); // Remap to use values from the current iteration instead of the // previous one. @@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // latter is the first cloned loop body, as original PreHeader dominates // the original loop body. if (Iter == 0) - DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); + for (auto Exit : ExitIDom) + DT->changeImmediateDominator(Exit.first, + cast<BasicBlock>(LVMap[Exit.second])); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif @@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; - PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - // Adjust the branch weights on the loop exit. - if (ExitWeight) { - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - } + fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); - // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) { - SE->forgetLoop(ParentLoop); + if (Loop *ParentLoop = L->getParentLoop()) + L = ParentLoop; - // FIXME: Incrementally update loop-simplify - simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); - } else { - // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); - } + // We modified the loop, update SE. + SE->forgetTopmostLoop(L); + + // Finally DomtTree must be correct. + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; |