diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp')
| -rw-r--r-- | contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 186 | 
1 files changed, 108 insertions, 78 deletions
| diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 6772511b5d5a..62e4fa295378 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -11,9 +11,6 @@  // actual pass or policy, but provides a single function to perform loop  // unrolling.  // -// It works best when loops have been canonicalized by the -indvars pass, -// allowing it to determine the trip counts of loops easily. -//  // The process of unrolling can produce extraneous basic blocks linked with  // unconditional branches.  This will be corrected in the future.  // @@ -24,6 +21,7 @@  #include "llvm/BasicBlock.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Support/Debug.h" @@ -31,6 +29,7 @@  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h"  using namespace llvm;  // TODO: Should these be here or in LoopUnroll? @@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I,  /// only has one predecessor, and that predecessor only has one successor.  /// The LoopInfo Analysis that is passed will be kept consistent.  /// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, +                                            LPPassManager *LPM) {    // Merge basic blocks into their predecessor if there is only one distinct    // pred, and if there is only one distinct successor of the predecessor, and    // if there are no PHI nodes. @@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {    std::string OldName = BB->getName();    // Erase basic block from the function... + +  // ScalarEvolution holds references to loop exit blocks. +  if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { +    if (Loop *L = LI->getLoopFor(BB)) +      SE->forgetLoop(L); +  }    LI->removeBlock(BB);    BB->eraseFromParent(); @@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {  /// branch instruction. However, if the trip count (and multiple) are not known,  /// loop unrolling will mostly produce more code that is no faster.  /// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +///  /// The LoopInfo Analysis that is passed will be kept consistent.  ///  /// If a LoopPassManager is passed in, and the loop is fully removed, it will be  /// removed from the LoopPassManager as well. LPM can also be NULL. -bool llvm::UnrollLoop(Loop *L, unsigned Count, -                      LoopInfo *LI, LPPassManager *LPM) { +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, +                      unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {    BasicBlock *Preheader = L->getLoopPreheader();    if (!Preheader) {      DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n"); @@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,    BasicBlock *Header = L->getHeader();    BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); -   +    if (!BI || BI->isUnconditional()) {      // The loop-rotate pass can be helpful to avoid this in many cases.      DEBUG(dbgs() <<               "  Can't unroll; loop not terminated by a conditional branch.\n");      return false;    } -   +    if (Header->hasAddressTaken()) {      // The loop-rotate pass can be helpful to avoid this in many cases.      DEBUG(dbgs() << @@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,    // Notify ScalarEvolution that the loop will be substantially changed,    // if not outright eliminated. -  if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) +  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); +  if (SE)      SE->forgetLoop(L); -  // Find trip count -  unsigned TripCount = L->getSmallConstantTripCount(); -  // Find trip multiple if count is not available -  unsigned TripMultiple = 1; -  if (TripCount == 0) -    TripMultiple = L->getSmallConstantTripMultiple(); -    if (TripCount != 0)      DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");    if (TripMultiple != 1) @@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,    ValueToValueMapTy LastValueMap;    std::vector<PHINode*> OrigPHINode;    for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { -    PHINode *PN = cast<PHINode>(I); -    OrigPHINode.push_back(PN); -    if (Instruction *I =  -                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) -      if (L->contains(I)) -        LastValueMap[I] = I; +    OrigPHINode.push_back(cast<PHINode>(I));    }    std::vector<BasicBlock*> Headers; @@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,    Headers.push_back(Header);    Latches.push_back(LatchBlock); +  // The current on-the-fly SSA update requires blocks to be processed in +  // reverse postorder so that LastValueMap contains the correct value at each +  // exit. +  LoopBlocksDFS DFS(L); +  DFS.perform(LI); + +  // Stash the DFS iterators before adding blocks to the loop. +  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); +  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); +    for (unsigned It = 1; It != Count; ++It) {      std::vector<BasicBlock*> NewBlocks; -     -    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), -         E = LoopBlocks.end(); BB != E; ++BB) { + +    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {        ValueToValueMapTy VMap;        BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));        Header->getParent()->getBasicBlockList().push_back(New); @@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,        L->addBasicBlockToLoop(New, LI->getBase()); -      // Add phi entries for newly created values to all exit blocks except -      // the successor of the latch block.  The successor of the exit block will -      // be updated specially after unrolling all the way. -      if (*BB != LatchBlock) -        for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; -             ++SI) -          if (!L->contains(*SI)) -            for (BasicBlock::iterator BBI = (*SI)->begin(); -                 PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { -              Value *Incoming = phi->getIncomingValueForBlock(*BB); -              phi->addIncoming(Incoming, New); -            } - +      // Add phi entries for newly created values to all exit blocks. +      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); +           SI != SE; ++SI) { +        if (L->contains(*SI)) +          continue; +        for (BasicBlock::iterator BBI = (*SI)->begin(); +             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { +          Value *Incoming = phi->getIncomingValueForBlock(*BB); +          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); +          if (It != LastValueMap.end()) +            Incoming = It->second; +          phi->addIncoming(Incoming, New); +        } +      }        // Keep track of new headers and latches as we create them, so that        // we can insert the proper branches later.        if (*BB == Header)          Headers.push_back(New); -      if (*BB == LatchBlock) { +      if (*BB == LatchBlock)          Latches.push_back(New); -        // Also, clear out the new latch's back edge so that it doesn't look -        // like a new loop, so that it's amenable to being merged with adjacent -        // blocks later on. -        TerminatorInst *Term = New->getTerminator(); -        assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); -        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); -        Term->setSuccessor(!ContinueOnTrue, NULL); -      } -        NewBlocks.push_back(New);      } -     +      // Remap all instructions in the most recent iteration      for (unsigned i = 0; i < NewBlocks.size(); ++i)        for (BasicBlock::iterator I = NewBlocks[i]->begin(),             E = NewBlocks[i]->end(); I != E; ++I)          ::RemapInstruction(I, LastValueMap);    } -   -  // The latch block exits the loop.  If there are any PHI nodes in the -  // successor blocks, update them to use the appropriate values computed as the -  // last iteration of the loop. -  if (Count != 1) { -    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); -    for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); -         SI != SE; ++SI) { -      for (BasicBlock::iterator BBI = (*SI)->begin(); -           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { -        Value *InVal = PN->removeIncomingValue(LatchBlock, false); -        // If this value was defined in the loop, take the value defined by the -        // last iteration of the loop. -        if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { -          if (L->contains(InValI)) -            InVal = LastValueMap[InVal]; -        } -        PN->addIncoming(InVal, LastIterationBB); -      } -    } -  } -  // Now, if we're doing complete unrolling, loop over the PHI nodes in the -  // original block, setting them to their incoming values. -  if (CompletelyUnroll) { -    BasicBlock *Preheader = L->getLoopPreheader(); -    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { -      PHINode *PN = OrigPHINode[i]; +  // Loop over the PHI nodes in the original block, setting incoming values. +  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { +    PHINode *PN = OrigPHINode[i]; +    if (CompletelyUnroll) {        PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));        Header->getInstList().erase(PN);      } +    else if (Count > 1) { +      Value *InVal = PN->removeIncomingValue(LatchBlock, false); +      // If this value was defined in the loop, take the value defined by the +      // last iteration of the loop. +      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { +        if (L->contains(InValI)) +          InVal = LastValueMap[InVal]; +      } +      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); +      PN->addIncoming(InVal, Latches.back()); +    }    }    // Now that all the basic blocks for the unrolled iterations are in place, @@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,        // iteration.        Term->setSuccessor(!ContinueOnTrue, Dest);      } else { +      // Remove phi operands at this loop exit +      if (Dest != LoopExit) { +        BasicBlock *BB = Latches[i]; +        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); +             SI != SE; ++SI) { +          if (*SI == Headers[i]) +            continue; +          for (BasicBlock::iterator BBI = (*SI)->begin(); +               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { +            Phi->removeIncomingValue(BB, false); +          } +        } +      }        // Replace the conditional branch with an unconditional one.        BranchInst::Create(Dest, Term);        Term->eraseFromParent(); @@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,      BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());      if (Term->isUnconditional()) {        BasicBlock *Dest = Term->getSuccessor(0); -      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) +      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))          std::replace(Latches.begin(), Latches.end(), Dest, Fold);      }    } -   + +  // FIXME: Reconstruct dom info, because it is not preserved properly. +  // Incrementally updating domtree after loop unrolling would be easy. +  if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) +    DT->runOnFunction(*L->getHeader()->getParent()); + +  // Simplify any new induction variables in the partially unrolled loop. +  if (SE && !CompletelyUnroll) { +    SmallVector<WeakVH, 16> DeadInsts; +    simplifyLoopIVs(L, SE, LPM, DeadInsts); + +    // Aggressively clean up dead instructions that simplifyLoopIVs already +    // identified. Any remaining should be cleaned up below. +    while (!DeadInsts.empty()) +      if (Instruction *Inst = +          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) +        RecursivelyDeleteTriviallyDeadInstructions(Inst); +  } +    // At this point, the code is well formed.  We now do a quick sweep over the    // inserted code, doing constant propagation and dead code elimination as we    // go. | 
