6 files changed, 244 insertions, 61 deletions
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 7fd77a082b822..c5c9b2c185d63 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -562,13 +562,27 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
   if (!MSSA)
     return false;
 
+  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
+  // read/write memory, then we can safely return true here.
+  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
+  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
+  // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
+  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
+  // with the default optimization pipeline.
+  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
+  if (!EarlierMA)
+    return true;
+  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
+  if (!LaterMA)
+    return true;
+
   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
   // EarlierInst and LaterInst and neither can any other write that potentially
   // clobbers LaterInst.
   MemoryAccess *LaterDef =
       MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
-  return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
+  return MSSA->dominates(LaterDef, EarlierMA);
 }
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0fe72f3f73318..ea28705e684dc 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1168,6 +1168,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
                                  LI->isVolatile(), LI->getAlignment(),
                                  LI->getOrdering(), LI->getSyncScopeID(),
                                  UnavailablePred->getTerminator());
+    NewLoad->setDebugLoc(LI->getDebugLoc());
 
     // Transfer the old load's AA tags to the new load.
     AAMDNodes Tags;
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index a40c22c3fce98..99b4458ea0fa2 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -805,6 +805,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
   ConstantInt *One = ConstantInt::get(IndVarTy, 1);
   // TODO: generalize the predicates here to also match their unsigned variants.
   if (IsIncreasing) {
+    bool DecreasedRightValueByOne = false;
+    // Try to turn eq/ne predicates to those we can work with.
+    if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+      // while (++i != len) {         while (++i < len) {
+      //   ...                 --->     ...
+      // }                            }
+      Pred = ICmpInst::ICMP_SLT;
+    else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+             !CanBeSMin(SE, RightSCEV)) {
+      // while (true) {               while (true) {
+      //   if (++i == len)     --->     if (++i > len - 1)
+      //     break;                       break;
+      //   ...                          ...
+      // }                            }
+      Pred = ICmpInst::ICMP_SGT;
+      RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+      DecreasedRightValueByOne = true;
+    }
+
     bool FoundExpectedPred =
         (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
         (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
@@ -829,16 +848,41 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
         return None;
       }
 
-      IRBuilder<> B(Preheader->getTerminator());
-      RightValue = B.CreateAdd(RightValue, One);
+      // We need to increase the right value unless we have already decreased
+      // it virtually when we replaced EQ with SGT.
+      if (!DecreasedRightValueByOne) {
+        IRBuilder<> B(Preheader->getTerminator());
+        RightValue = B.CreateAdd(RightValue, One);
+      }
     } else {
       if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart,
                                        RightSCEV)) {
         FailureReason = "Induction variable start not bounded by upper limit";
         return None;
       }
+      assert(!DecreasedRightValueByOne &&
+             "Right value can be decreased only for LatchBrExitIdx == 0!");
     }
   } else {
+    bool IncreasedRightValueByOne = false;
+    // Try to turn eq/ne predicates to those we can work with.
+    if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+      // while (--i != len) {         while (--i > len) {
+      //   ...                 --->     ...
+      // }                            }
+      Pred = ICmpInst::ICMP_SGT;
+    else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+             !CanBeSMax(SE, RightSCEV)) {
+      // while (true) {               while (true) {
+      //   if (--i == len)     --->     if (--i < len + 1)
+      //     break;                       break;
+      //   ...                          ...
+      // }                            }
+      Pred = ICmpInst::ICMP_SLT;
+      RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+      IncreasedRightValueByOne = true;
+    }
+
     bool FoundExpectedPred =
         (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
         (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
@@ -863,14 +907,20 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
         return None;
       }
 
-      IRBuilder<> B(Preheader->getTerminator());
-      RightValue = B.CreateSub(RightValue, One);
+      // We need to decrease the right value unless we have already increased
+      // it virtually when we replaced EQ with SLT.
+      if (!IncreasedRightValueByOne) {
+        IRBuilder<> B(Preheader->getTerminator());
+        RightValue = B.CreateSub(RightValue, One);
+      }
     } else {
       if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart,
                                        RightSCEV)) {
         FailureReason = "Induction variable start not bounded by lower limit";
         return None;
       }
+      assert(!IncreasedRightValueByOne &&
+             "Right value can be increased only for LatchBrExitIdx == 0!");
     }
   }
 
@@ -922,14 +972,18 @@ LoopConstrainer::calculateSubRanges() const {
 
   bool Increasing = MainLoopStructure.IndVarIncreasing;
 
-  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
-  // range of values the induction variable takes.
+  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or
+  // [Smallest, GreatestSeen] is the range of values the induction variable
+  // takes.
 
-  const SCEV *Smallest = nullptr, *Greatest = nullptr;
+  const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
 
+  const SCEV *One = SE.getOne(Ty);
   if (Increasing) {
     Smallest = Start;
     Greatest = End;
+    // No overflow, because the range [Smallest, GreatestSeen] is not empty.
+    GreatestSeen = SE.getMinusSCEV(End, One);
   } else {
     // These two computations may sign-overflow.  Here is why that is okay:
     //
@@ -947,9 +1001,9 @@ LoopConstrainer::calculateSubRanges() const {
     //    will be an empty range.  Returning an empty range is always safe.
     //
 
-    const SCEV *One = SE.getOne(Ty);
     Smallest = SE.getAddExpr(End, One);
     Greatest = SE.getAddExpr(Start, One);
+    GreatestSeen = Start;
   }
 
   auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
@@ -964,7 +1018,7 @@ LoopConstrainer::calculateSubRanges() const {
     Result.LowLimit = Clamp(Range.getBegin());
 
   bool ProvablyNoPostLoop =
-      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
+      SE.isKnownPredicate(ICmpInst::ICMP_SLT, GreatestSeen, Range.getEnd());
   if (!ProvablyNoPostLoop)
     Result.HighLimit = Clamp(Range.getEnd());
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ee3de51b13606..4056cc5cb3462 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -2168,11 +2168,19 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
   return false;
 }
 
-/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
+/// same BB in the form
 /// bb:
 ///   %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
-///   %s = select p, trueval, falseval
+///   %s = select %p, trueval, falseval
 ///
+/// or
+///
+/// bb:
+///   %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
+///   %c = cmp %p, 0
+///   %s = select %c, trueval, falseval
+//
 /// And expand the select into a branch structure. This later enables
 /// jump-threading over bb in this pass.
 ///
@@ -2186,44 +2194,54 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
   if (LoopHeaders.count(BB))
     return false;
 
-  // Look for a Phi/Select pair in the same basic block.  The Phi feeds the
-  // condition of the Select and at least one of the incoming values is a
-  // constant.
   for (BasicBlock::iterator BI = BB->begin();
        PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
-    unsigned NumPHIValues = PN->getNumIncomingValues();
-    if (NumPHIValues == 0 || !PN->hasOneUse())
-      continue;
-
-    SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
-    if (!SI || SI->getParent() != BB)
-      continue;
-
-    Value *Cond = SI->getCondition();
-    if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+    // Look for a Phi having at least one constant incoming value.
+    if (llvm::all_of(PN->incoming_values(),
+                     [](Value *V) { return !isa<ConstantInt>(V); }))
       continue;
 
-    bool HasConst = false;
-    for (unsigned i = 0; i != NumPHIValues; ++i) {
-      if (PN->getIncomingBlock(i) == BB)
+    auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
+      // Check if SI is in BB and use V as condition.
+      if (SI->getParent() != BB)
         return false;
-      if (isa<ConstantInt>(PN->getIncomingValue(i)))
-        HasConst = true;
-    }
+      Value *Cond = SI->getCondition();
+      return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
+    };
 
-    if (HasConst) {
-      // Expand the select.
-      TerminatorInst *Term =
-          SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
-      PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
-      NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
-      NewPN->addIncoming(SI->getFalseValue(), BB);
-      SI->replaceAllUsesWith(NewPN);
-      SI->eraseFromParent();
-      return true;
+    SelectInst *SI = nullptr;
+    for (Use &U : PN->uses()) {
+      if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+        // Look for a ICmp in BB that compares PN with a constant and is the
+        // condition of a Select.
+        if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
+            isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
+          if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
+            if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
+              SI = SelectI;
+              break;
+            }
+      } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
+        // Look for a Select in BB that uses PN as condtion.
+        if (isUnfoldCandidate(SelectI, U.get())) {
+          SI = SelectI;
+          break;
+        }
+      }
     }
+
+    if (!SI)
+      continue;
+    // Expand the select.
+    TerminatorInst *Term =
+        SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+    PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+    NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+    NewPN->addIncoming(SI->getFalseValue(), BB);
+    SI->replaceAllUsesWith(NewPN);
+    SI->eraseFromParent();
+    return true;
   }
-  
   return false;
 }
 
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 606136dc31a4b..2e0d8e0374c08 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -323,9 +324,10 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
 class LoopInterchangeLegality {
 public:
   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
-                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
+                          OptimizationRemarkEmitter *ORE)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
-        PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
+        PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {}
 
   /// Check if the loops can be interchanged.
   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -353,6 +355,8 @@ private:
   LoopInfo *LI;
   DominatorTree *DT;
   bool PreserveLCSSA;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
 
   bool InnerLoopHasReduction;
 };
@@ -361,8 +365,9 @@ private:
 /// loop.
 class LoopInterchangeProfitability {
 public:
-  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
-      : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
+  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+                               OptimizationRemarkEmitter *ORE)
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
 
   /// Check if the loop interchange is profitable.
   bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -376,6 +381,8 @@ private:
 
   /// Scev analysis.
   ScalarEvolution *SE;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
 };
 
 /// LoopInterchangeTransform interchanges the loop.
@@ -422,6 +429,9 @@ struct LoopInterchange : public FunctionPass {
   DependenceInfo *DI;
   DominatorTree *DT;
   bool PreserveLCSSA;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
+
   LoopInterchange()
       : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
     initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
@@ -435,6 +445,7 @@ struct LoopInterchange : public FunctionPass {
     AU.addRequired<DependenceAnalysisWrapperPass>();
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequiredID(LCSSAID);
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
   }
 
   bool runOnFunction(Function &F) override {
@@ -446,6 +457,7 @@ struct LoopInterchange : public FunctionPass {
     DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
     auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
     DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
     PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
     // Build up a worklist of loop pairs to analyze.
@@ -575,18 +587,23 @@ struct LoopInterchange : public FunctionPass {
     Loop *OuterLoop = LoopList[OuterLoopId];
 
     LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
-                                PreserveLCSSA);
+                                PreserveLCSSA, ORE);
     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
       DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
       return false;
     }
     DEBUG(dbgs() << "Loops are legal to interchange\n");
-    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
+    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
     if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
       DEBUG(dbgs() << "Interchanging loops not profitable\n");
       return false;
     }
 
+    ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged",
+                                 InnerLoop->getStartLoc(),
+                                 InnerLoop->getHeader())
+              << "Loop interchanged with enclosing loop.");
+
     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
                                  LoopNestExit, LIL.hasInnerLoopReduction());
     LIT.transform();
@@ -760,6 +777,12 @@ bool LoopInterchangeLegality::currentLimitations() {
   if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
     DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
                  << "are supported currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "UnsupportedPHIInner",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Only inner loops with induction or reduction PHI nodes can be"
+                 " interchange currently.");
     return true;
   }
 
@@ -767,6 +790,12 @@ bool LoopInterchangeLegality::currentLimitations() {
   if (Inductions.size() != 1) {
     DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
                  << "Failed to interchange due to current limitation\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "MultiInductionInner",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Only inner loops with 1 induction variable can be "
+                 "interchanged currently.");
     return true;
   }
   if (Reductions.size() > 0)
@@ -777,6 +806,12 @@ bool LoopInterchangeLegality::currentLimitations() {
   if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
     DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
                  << "are supported currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "UnsupportedPHIOuter",
+                                       OuterLoop->getStartLoc(),
+                                       OuterLoop->getHeader())
+              << "Only outer loops with induction or reduction PHI nodes can be"
+                 " interchanged currently.");
     return true;
   }
 
@@ -785,18 +820,35 @@ bool LoopInterchangeLegality::currentLimitations() {
   if (!Reductions.empty()) {
     DEBUG(dbgs() << "Outer loops with reductions are not supported "
                  << "currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "ReductionsOuter",
+                                       OuterLoop->getStartLoc(),
+                                       OuterLoop->getHeader())
+              << "Outer loops with reductions cannot be interchangeed "
+                 "currently.");
     return true;
   }
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
     DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
                  << "supported currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "MultiIndutionOuter",
+                                       OuterLoop->getStartLoc(),
+                                       OuterLoop->getHeader())
+              << "Only outer loops with 1 induction variable can be "
+                 "interchanged currently.");
     return true;
   }
 
   // TODO: Triangular loops are not handled for now.
   if (!isLoopStructureUnderstood(InnerInductionVar)) {
     DEBUG(dbgs() << "Loop structure not understood by pass\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "UnsupportedStructureInner",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Inner loop structure not understood currently.");
     return true;
   }
 
@@ -805,12 +857,24 @@ bool LoopInterchangeLegality::currentLimitations() {
       getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
   if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
     DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "NoLCSSAPHIOuter",
+                                       OuterLoop->getStartLoc(),
+                                       OuterLoop->getHeader())
+              << "Only outer loops with LCSSA PHIs can be interchange "
+                 "currently.");
     return true;
   }
 
   LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
   if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
     DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "NoLCSSAPHIOuterInner",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Only inner loops with LCSSA PHIs can be interchange "
+                 "currently.");
     return true;
   }
 
@@ -835,6 +899,11 @@ bool LoopInterchangeLegality::currentLimitations() {
   if (!InnerIndexVarInc) {
     DEBUG(dbgs() << "Did not find an instruction to increment the induction "
                  << "variable.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "NoIncrementInInner",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "The inner loop does not increment the induction variable.");
     return true;
   }
 
@@ -852,6 +921,12 @@ bool LoopInterchangeLegality::currentLimitations() {
     if (!I.isIdenticalTo(InnerIndexVarInc)) {
       DEBUG(dbgs() << "Found unsupported instructions between induction "
                    << "variable increment and branch.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "UnsupportedInsBetweenInduction",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Found unsupported instruction between induction variable "
+                 "increment and branch.");
       return true;
     }
 
@@ -862,6 +937,11 @@ bool LoopInterchangeLegality::currentLimitations() {
   // current limitation.
   if (!FoundInduction) {
     DEBUG(dbgs() << "Did not find the induction variable.\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "NoIndutionVariable",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Did not find the induction variable.");
     return true;
   }
   return false;
@@ -875,6 +955,11 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
     DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
                  << " and OuterLoopId = " << OuterLoopId
                  << " due to dependence\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "Dependence",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Cannot interchange loops due to dependences.");
     return false;
   }
 
@@ -910,6 +995,12 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
   // Check if the loops are tightly nested.
   if (!tightlyNested(OuterLoop, InnerLoop)) {
     DEBUG(dbgs() << "Loops not tightly nested\n");
+    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                       "NotTightlyNested",
+                                       InnerLoop->getStartLoc(),
+                                       InnerLoop->getHeader())
+              << "Cannot interchange loops because they are not tightly "
+                 "nested.");
     return false;
   }
 
@@ -1005,9 +1096,18 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
 
   // It is not profitable as per current cache profitability model. But check if
   // we can move this loop outside to improve parallelism.
-  bool ImprovesPar =
-      isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
-  return ImprovesPar;
+  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
+    return true;
+
+  ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+                                     "InterchangeNotProfitable",
+                                     InnerLoop->getStartLoc(),
+                                     InnerLoop->getHeader())
+            << "Interchanging loops is too costly (cost="
+            << ore::NV("Cost", Cost) << ", threshold="
+            << ore::NV("Threshold", LoopInterchangeCostThreshold) <<
+            ") and it does not improve parallelism.");
+  return false;
 }
 
 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
@@ -1291,6 +1391,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 
 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
                     "Interchanges loops for cache reuse", false, false)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9397b87cdf563..90c5c243f4648 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -68,6 +68,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
@@ -90,16 +91,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 /// If it contains any dynamic allocas, returns false.
 static bool canTRE(Function &F) {
   // Because of PR962, we don't TRE dynamic allocas.
-  for (auto &BB : F) {
-    for (auto &I : BB) {
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
-        if (!AI->isStaticAlloca())
-          return false;
-      }
-    }
-  }
-
-  return true;
+  return llvm::all_of(instructions(F), [](Instruction &I) {
+    auto *AI = dyn_cast<AllocaInst>(&I);
+    return !AI || AI->isStaticAlloca();
+  });
 }
 
 namespace {