1 files changed, 98 insertions, 45 deletions
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0057b4ba7ce1..00d2fd2fdbac 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -70,6 +70,17 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                           BasicBlock *PreHeader, BasicBlock *NewPreHeader,
                           ValueToValueMapTy &VMap, DominatorTree *DT,
                           LoopInfo *LI, bool PreserveLCSSA) {
+  // Loop structure should be the following:
+  // Preheader
+  //  PrologHeader
+  //  ...
+  //  PrologLatch
+  //  PrologExit
+  //   NewPreheader
+  //    Header
+  //    ...
+  //    Latch
+  //      LatchExit
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -83,14 +94,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
     for (PHINode &PN : Succ->phis()) {
       // Add a new PHI node to the prolog end block and add the
       // appropriate incoming values.
+      // TODO: This code assumes that the PrologExit (or the LatchExit block for
+      // prolog loop) contains only one predecessor from the loop, i.e. the
+      // PrologLatch. When supporting multiple-exiting block loops, we can have
+      // two or more blocks that have the LatchExit as the target in the
+      // original loop.
       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
                                        PrologExit->getFirstNonPHI());
       // Adding a value to the new PHI node from the original loop preheader.
       // This is the value that skips all the prolog code.
       if (L->contains(&PN)) {
+        // Succ is loop header.
         NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
                            PreHeader);
       } else {
+        // Succ is LatchExit.
         NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
       }
 
@@ -124,7 +142,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
         PrologExitPreds.push_back(PredBB);
 
     SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
-                           PreserveLCSSA);
+                           nullptr, PreserveLCSSA);
   }
 
   // Create a branch around the original loop, which is taken if there are no
@@ -143,7 +161,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   // Split the exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
-                         PreserveLCSSA);
+                         nullptr, PreserveLCSSA);
   // Add the branch to the exit block (around the unrolled loop)
   B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
   InsertPt->eraseFromParent();
@@ -257,7 +275,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
   assert(Exit && "Loop must have a single exit block only");
   // Split the epilogue exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
-  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
                          PreserveLCSSA);
   // Add the branch to the exit block (around the unrolling loop)
   B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
@@ -267,7 +285,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
 
   // Split the main loop exit to maintain canonicalization guarantees.
   SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
-  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
                          PreserveLCSSA);
 }
 
@@ -380,6 +398,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
   }
   if (CreateRemainderLoop) {
     Loop *NewLoop = NewLoops[L];
+    MDNode *LoopID = NewLoop->getLoopID();
     assert(NewLoop && "L should have been cloned");
 
     // Only add loop metadata if the loop is not going to be completely
@@ -387,6 +406,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     if (UnrollRemainder)
       return NewLoop;
 
+    Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+        LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+    if (NewLoopID.hasValue()) {
+      NewLoop->setLoopID(NewLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+      // explicitly.
+      return NewLoop;
+    }
+
     // Add unroll disable metadata to disable future unrolling for this loop.
     NewLoop->setLoopAlreadyUnrolled();
     return NewLoop;
@@ -525,10 +554,10 @@ static bool canProfitablyUnrollMultiExitLoop(
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
-                                      bool UnrollRemainder,
-                                      LoopInfo *LI, ScalarEvolution *SE,
-                                      DominatorTree *DT, AssumptionCache *AC,
-                                      bool PreserveLCSSA) {
+                                      bool UnrollRemainder, LoopInfo *LI,
+                                      ScalarEvolution *SE, DominatorTree *DT,
+                                      AssumptionCache *AC, bool PreserveLCSSA,
+                                      Loop **ResultLoop) {
   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   LLVM_DEBUG(L->dump());
   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -545,13 +574,27 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   BasicBlock *Header = L->getHeader();
 
   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+
+  if (!LatchBR || LatchBR->isUnconditional()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    LLVM_DEBUG(
+        dbgs()
+        << "Loop latch not terminated by a conditional branch.\n");
+    return false;
+  }
+
   unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
   BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
-  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
-  // targets of the Latch be an exit block out of the loop. This needs
-  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
-  assert(!L->contains(LatchExit) &&
-         "one of the loop latch successors should be the exit block!");
+
+  if (L->contains(LatchExit)) {
+    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+    // targets of the Latch be an exit block out of the loop.
+    LLVM_DEBUG(
+        dbgs()
+        << "One of the loop latch successors must be the exit block.\n");
+    return false;
+  }
+
   // These are exit blocks other than the target of the latch exiting block.
   SmallVector<BasicBlock *, 4> OtherExits;
   bool isMultiExitUnrollingEnabled =
@@ -636,8 +679,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
     NewPreHeader->setName(PreHeader->getName() + ".new");
     // Split LatchExit to create phi nodes from branch above.
     SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
-    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
-                                     DT, LI, PreserveLCSSA);
+    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+                                     nullptr, PreserveLCSSA);
     // NewExit gets its DebugLoc from LatchExit, which is not part of the
     // original Loop.
     // Fix this by setting Loop's DebugLoc to NewExit.
@@ -762,10 +805,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // Now the loop blocks are cloned and the other exiting blocks from the
   // remainder are connected to the original Loop's exit blocks. The remaining
   // work is to update the phi nodes in the original loop, and take in the
-  // values from the cloned region. Also update the dominator info for
-  // OtherExits and their immediate successors, since we have new edges into
-  // OtherExits.
-  SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+  // values from the cloned region.
   for (auto *BB : OtherExits) {
    for (auto &II : *BB) {
 
@@ -800,27 +840,30 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
              "Breaks the definition of dedicated exits!");
     }
 #endif
-   // Update the dominator info because the immediate dominator is no longer the
-   // header of the original Loop. BB has edges both from L and remainder code.
-   // Since the preheader determines which loop is run (L or directly jump to
-   // the remainder code), we set the immediate dominator as the preheader.
-   if (DT) {
-     DT->changeImmediateDominator(BB, PreHeader);
-     // Also update the IDom for immediate successors of BB.  If the current
-     // IDom is the header, update the IDom to be the preheader because that is
-     // the nearest common dominator of all predecessors of SuccBB.  We need to
-     // check for IDom being the header because successors of exit blocks can
-     // have edges from outside the loop, and we should not incorrectly update
-     // the IDom in that case.
-     for (BasicBlock *SuccBB: successors(BB))
-       if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
-         if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
-           assert(!SuccBB->getSinglePredecessor() &&
-                  "BB should be the IDom then!");
-           DT->changeImmediateDominator(SuccBB, PreHeader);
-         }
-       }
+  }
+
+  // Update the immediate dominator of the exit blocks and blocks that are
+  // reachable from the exit blocks. This is needed because we now have paths
+  // from both the original loop and the remainder code reaching the exit
+  // blocks. While the IDom of these exit blocks were from the original loop,
+  // now the IDom is the preheader (which decides whether the original loop or
+  // remainder code should run).
+  if (DT && !L->getExitingBlock()) {
+    SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+    // NB! We have to examine the dom children of all loop blocks, not just
+    // those which are the IDom of the exit blocks. This is because blocks
+    // reachable from the exit blocks can have their IDom as the nearest common
+    // dominator of the exit blocks.
+    for (auto *BB : L->blocks()) {
+      auto *DomNodeBB = DT->getNode(BB);
+      for (auto *DomChild : DomNodeBB->getChildren()) {
+        auto *DomChildBB = DomChild->getBlock();
+        if (!L->contains(LI->getLoopFor(DomChildBB)))
+          ChildrenToUpdate.push_back(DomChildBB);
+      }
     }
+    for (auto *BB : ChildrenToUpdate)
+      DT->changeImmediateDominator(BB, PreHeader);
   }
 
   // Loop structure should be the following:
@@ -884,6 +927,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // of its parent loops, so the Scalar Evolution pass needs to be run again.
   SE->forgetTopmostLoop(L);
 
+  // Verify that the Dom Tree is correct.
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+  if (DT)
+    assert(DT->verify(DominatorTree::VerificationLevel::Full));
+#endif
+
   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
   // cannot rely on the LoopUnrollPass to do this because it only does
   // canonicalization for parent/subloops and not the sibling loops.
@@ -897,16 +946,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
       formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
   }
 
+  auto UnrollResult = LoopUnrollResult::Unmodified;
   if (remainderLoop && UnrollRemainder) {
     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
-    UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
-               /*Force*/ false, /*AllowRuntime*/ false,
-               /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
-               /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
-               /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
-               /*ORE*/ nullptr, PreserveLCSSA);
+    UnrollResult =
+        UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+                   /*Force*/ false, /*AllowRuntime*/ false,
+                   /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+                   /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+                   /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+                   /*ORE*/ nullptr, PreserveLCSSA);
   }
 
+  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+    *ResultLoop = remainderLoop;
   NumRuntimeUnrolled++;
   return true;
 }