diff options
Diffstat (limited to 'lib/Transforms/Scalar')
| -rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 16 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 1 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp | 72 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 82 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopInterchange.cpp | 119 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 15 | 
6 files changed, 244 insertions, 61 deletions
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 7fd77a082b822..c5c9b2c185d63 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -562,13 +562,27 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,    if (!MSSA)      return false; +  // If MemorySSA has determined that one of EarlierInst or LaterInst does not +  // read/write memory, then we can safely return true here. +  // FIXME: We could be more aggressive when checking doesNotAccessMemory(), +  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass +  // by also checking the MemorySSA MemoryAccess on the instruction.  Initial +  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled +  // with the default optimization pipeline. +  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst); +  if (!EarlierMA) +    return true; +  auto *LaterMA = MSSA->getMemoryAccess(LaterInst); +  if (!LaterMA) +    return true; +    // Since we know LaterDef dominates LaterInst and EarlierInst dominates    // LaterInst, if LaterDef dominates EarlierInst then it can't occur between    // EarlierInst and LaterInst and neither can any other write that potentially    // clobbers LaterInst.    MemoryAccess *LaterDef =        MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); -  return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst)); +  return MSSA->dominates(LaterDef, EarlierMA);  }  bool EarlyCSE::processNode(DomTreeNode *Node) { diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0fe72f3f73318..ea28705e684dc 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1168,6 +1168,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,                                   LI->isVolatile(), LI->getAlignment(),                                   LI->getOrdering(), LI->getSyncScopeID(),                                   UnavailablePred->getTerminator()); +    NewLoad->setDebugLoc(LI->getDebugLoc());      // Transfer the old load's AA tags to the new load.      AAMDNodes Tags; diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index a40c22c3fce98..99b4458ea0fa2 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -805,6 +805,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP    ConstantInt *One = ConstantInt::get(IndVarTy, 1);    // TODO: generalize the predicates here to also match their unsigned variants.    if (IsIncreasing) { +    bool DecreasedRightValueByOne = false; +    // Try to turn eq/ne predicates to those we can work with. +    if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) +      // while (++i != len) {         while (++i < len) { +      //   ...                 --->     ... +      // }                            } +      Pred = ICmpInst::ICMP_SLT; +    else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && +             !CanBeSMin(SE, RightSCEV)) { +      // while (true) {               while (true) { +      //   if (++i == len)     --->     if (++i > len - 1) +      //     break;                       break; +      //   ...                          ... +      // }                            } +      Pred = ICmpInst::ICMP_SGT; +      RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); +      DecreasedRightValueByOne = true; +    } +      bool FoundExpectedPred =          (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||          (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); @@ -829,16 +848,41 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP          return None;        } -      IRBuilder<> B(Preheader->getTerminator()); -      RightValue = B.CreateAdd(RightValue, One); +      // We need to increase the right value unless we have already decreased +      // it virtually when we replaced EQ with SGT. +      if (!DecreasedRightValueByOne) { +        IRBuilder<> B(Preheader->getTerminator()); +        RightValue = B.CreateAdd(RightValue, One); +      }      } else {        if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart,                                         RightSCEV)) {          FailureReason = "Induction variable start not bounded by upper limit";          return None;        } +      assert(!DecreasedRightValueByOne && +             "Right value can be decreased only for LatchBrExitIdx == 0!");      }    } else { +    bool IncreasedRightValueByOne = false; +    // Try to turn eq/ne predicates to those we can work with. +    if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) +      // while (--i != len) {         while (--i > len) { +      //   ...                 --->     ... +      // }                            } +      Pred = ICmpInst::ICMP_SGT; +    else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && +             !CanBeSMax(SE, RightSCEV)) { +      // while (true) {               while (true) { +      //   if (--i == len)     --->     if (--i < len + 1) +      //     break;                       break; +      //   ...                          ... +      // }                            } +      Pred = ICmpInst::ICMP_SLT; +      RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); +      IncreasedRightValueByOne = true; +    } +      bool FoundExpectedPred =          (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||          (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); @@ -863,14 +907,20 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP          return None;        } -      IRBuilder<> B(Preheader->getTerminator()); -      RightValue = B.CreateSub(RightValue, One); +      // We need to decrease the right value unless we have already increased +      // it virtually when we replaced EQ with SLT. +      if (!IncreasedRightValueByOne) { +        IRBuilder<> B(Preheader->getTerminator()); +        RightValue = B.CreateSub(RightValue, One); +      }      } else {        if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart,                                         RightSCEV)) {          FailureReason = "Induction variable start not bounded by lower limit";          return None;        } +      assert(!IncreasedRightValueByOne && +             "Right value can be increased only for LatchBrExitIdx == 0!");      }    } @@ -922,14 +972,18 @@ LoopConstrainer::calculateSubRanges() const {    bool Increasing = MainLoopStructure.IndVarIncreasing; -  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the -  // range of values the induction variable takes. +  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or +  // [Smallest, GreatestSeen] is the range of values the induction variable +  // takes. -  const SCEV *Smallest = nullptr, *Greatest = nullptr; +  const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr; +  const SCEV *One = SE.getOne(Ty);    if (Increasing) {      Smallest = Start;      Greatest = End; +    // No overflow, because the range [Smallest, GreatestSeen] is not empty. +    GreatestSeen = SE.getMinusSCEV(End, One);    } else {      // These two computations may sign-overflow.  Here is why that is okay:      // @@ -947,9 +1001,9 @@ LoopConstrainer::calculateSubRanges() const {      //    will be an empty range.  Returning an empty range is always safe.      // -    const SCEV *One = SE.getOne(Ty);      Smallest = SE.getAddExpr(End, One);      Greatest = SE.getAddExpr(Start, One); +    GreatestSeen = Start;    }    auto Clamp = [this, Smallest, Greatest](const SCEV *S) { @@ -964,7 +1018,7 @@ LoopConstrainer::calculateSubRanges() const {      Result.LowLimit = Clamp(Range.getBegin());    bool ProvablyNoPostLoop = -      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); +      SE.isKnownPredicate(ICmpInst::ICMP_SLT, GreatestSeen, Range.getEnd());    if (!ProvablyNoPostLoop)      Result.HighLimit = Clamp(Range.getEnd()); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index ee3de51b13606..4056cc5cb3462 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -2168,11 +2168,19 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {    return false;  } -/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form +/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the +/// same BB in the form  /// bb:  ///   %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... -///   %s = select p, trueval, falseval +///   %s = select %p, trueval, falseval  /// +/// or +/// +/// bb: +///   %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... +///   %c = cmp %p, 0 +///   %s = select %c, trueval, falseval +//  /// And expand the select into a branch structure. This later enables  /// jump-threading over bb in this pass.  /// @@ -2186,44 +2194,54 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {    if (LoopHeaders.count(BB))      return false; -  // Look for a Phi/Select pair in the same basic block.  The Phi feeds the -  // condition of the Select and at least one of the incoming values is a -  // constant.    for (BasicBlock::iterator BI = BB->begin();         PHINode *PN = dyn_cast<PHINode>(BI); ++BI) { -    unsigned NumPHIValues = PN->getNumIncomingValues(); -    if (NumPHIValues == 0 || !PN->hasOneUse()) -      continue; - -    SelectInst *SI = dyn_cast<SelectInst>(PN->user_back()); -    if (!SI || SI->getParent() != BB) -      continue; - -    Value *Cond = SI->getCondition(); -    if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1)) +    // Look for a Phi having at least one constant incoming value. +    if (llvm::all_of(PN->incoming_values(), +                     [](Value *V) { return !isa<ConstantInt>(V); }))        continue; -    bool HasConst = false; -    for (unsigned i = 0; i != NumPHIValues; ++i) { -      if (PN->getIncomingBlock(i) == BB) +    auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) { +      // Check if SI is in BB and use V as condition. +      if (SI->getParent() != BB)          return false; -      if (isa<ConstantInt>(PN->getIncomingValue(i))) -        HasConst = true; -    } +      Value *Cond = SI->getCondition(); +      return (Cond && Cond == V && Cond->getType()->isIntegerTy(1)); +    }; -    if (HasConst) { -      // Expand the select. -      TerminatorInst *Term = -          SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); -      PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); -      NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); -      NewPN->addIncoming(SI->getFalseValue(), BB); -      SI->replaceAllUsesWith(NewPN); -      SI->eraseFromParent(); -      return true; +    SelectInst *SI = nullptr; +    for (Use &U : PN->uses()) { +      if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) { +        // Look for a ICmp in BB that compares PN with a constant and is the +        // condition of a Select. +        if (Cmp->getParent() == BB && Cmp->hasOneUse() && +            isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo()))) +          if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back())) +            if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) { +              SI = SelectI; +              break; +            } +      } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) { +        // Look for a Select in BB that uses PN as condtion. +        if (isUnfoldCandidate(SelectI, U.get())) { +          SI = SelectI; +          break; +        } +      }      } + +    if (!SI) +      continue; +    // Expand the select. +    TerminatorInst *Term = +        SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); +    PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); +    NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); +    NewPN->addIncoming(SI->getFalseValue(), BB); +    SI->replaceAllUsesWith(NewPN); +    SI->eraseFromParent(); +    return true;    } -      return false;  } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 606136dc31a4b..2e0d8e0374c08 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -22,6 +22,7 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopIterator.h"  #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/ScalarEvolutionExpander.h"  #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -323,9 +324,10 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {  class LoopInterchangeLegality {  public:    LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, -                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) +                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, +                          OptimizationRemarkEmitter *ORE)        : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), -        PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} +        PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {}    /// Check if the loops can be interchanged.    bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -353,6 +355,8 @@ private:    LoopInfo *LI;    DominatorTree *DT;    bool PreserveLCSSA; +  /// Interface to emit optimization remarks. +  OptimizationRemarkEmitter *ORE;    bool InnerLoopHasReduction;  }; @@ -361,8 +365,9 @@ private:  /// loop.  class LoopInterchangeProfitability {  public: -  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) -      : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} +  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, +                               OptimizationRemarkEmitter *ORE) +      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}    /// Check if the loop interchange is profitable.    bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, @@ -376,6 +381,8 @@ private:    /// Scev analysis.    ScalarEvolution *SE; +  /// Interface to emit optimization remarks. +  OptimizationRemarkEmitter *ORE;  };  /// LoopInterchangeTransform interchanges the loop. @@ -422,6 +429,9 @@ struct LoopInterchange : public FunctionPass {    DependenceInfo *DI;    DominatorTree *DT;    bool PreserveLCSSA; +  /// Interface to emit optimization remarks. +  OptimizationRemarkEmitter *ORE; +    LoopInterchange()        : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {      initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); @@ -435,6 +445,7 @@ struct LoopInterchange : public FunctionPass {      AU.addRequired<DependenceAnalysisWrapperPass>();      AU.addRequiredID(LoopSimplifyID);      AU.addRequiredID(LCSSAID); +    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();    }    bool runOnFunction(Function &F) override { @@ -446,6 +457,7 @@ struct LoopInterchange : public FunctionPass {      DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();      auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();      DT = DTWP ? &DTWP->getDomTree() : nullptr; +    ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();      PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);      // Build up a worklist of loop pairs to analyze. @@ -575,18 +587,23 @@ struct LoopInterchange : public FunctionPass {      Loop *OuterLoop = LoopList[OuterLoopId];      LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, -                                PreserveLCSSA); +                                PreserveLCSSA, ORE);      if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {        DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");        return false;      }      DEBUG(dbgs() << "Loops are legal to interchange\n"); -    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); +    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);      if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {        DEBUG(dbgs() << "Interchanging loops not profitable\n");        return false;      } +    ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged", +                                 InnerLoop->getStartLoc(), +                                 InnerLoop->getHeader()) +              << "Loop interchanged with enclosing loop."); +      LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,                                   LoopNestExit, LIL.hasInnerLoopReduction());      LIT.transform(); @@ -760,6 +777,12 @@ bool LoopInterchangeLegality::currentLimitations() {    if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {      DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "                   << "are supported currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "UnsupportedPHIInner", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Only inner loops with induction or reduction PHI nodes can be" +                 " interchange currently.");      return true;    } @@ -767,6 +790,12 @@ bool LoopInterchangeLegality::currentLimitations() {    if (Inductions.size() != 1) {      DEBUG(dbgs() << "We currently only support loops with 1 induction variable."                   << "Failed to interchange due to current limitation\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "MultiInductionInner", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Only inner loops with 1 induction variable can be " +                 "interchanged currently.");      return true;    }    if (Reductions.size() > 0) @@ -777,6 +806,12 @@ bool LoopInterchangeLegality::currentLimitations() {    if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {      DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "                   << "are supported currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "UnsupportedPHIOuter", +                                       OuterLoop->getStartLoc(), +                                       OuterLoop->getHeader()) +              << "Only outer loops with induction or reduction PHI nodes can be" +                 " interchanged currently.");      return true;    } @@ -785,18 +820,35 @@ bool LoopInterchangeLegality::currentLimitations() {    if (!Reductions.empty()) {      DEBUG(dbgs() << "Outer loops with reductions are not supported "                   << "currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "ReductionsOuter", +                                       OuterLoop->getStartLoc(), +                                       OuterLoop->getHeader()) +              << "Outer loops with reductions cannot be interchangeed " +                 "currently.");      return true;    }    // TODO: Currently we handle only loops with 1 induction variable.    if (Inductions.size() != 1) {      DEBUG(dbgs() << "Loops with more than 1 induction variables are not "                   << "supported currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "MultiIndutionOuter", +                                       OuterLoop->getStartLoc(), +                                       OuterLoop->getHeader()) +              << "Only outer loops with 1 induction variable can be " +                 "interchanged currently.");      return true;    }    // TODO: Triangular loops are not handled for now.    if (!isLoopStructureUnderstood(InnerInductionVar)) {      DEBUG(dbgs() << "Loop structure not understood by pass\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "UnsupportedStructureInner", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Inner loop structure not understood currently.");      return true;    } @@ -805,12 +857,24 @@ bool LoopInterchangeLegality::currentLimitations() {        getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);    if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {      DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "NoLCSSAPHIOuter", +                                       OuterLoop->getStartLoc(), +                                       OuterLoop->getHeader()) +              << "Only outer loops with LCSSA PHIs can be interchange " +                 "currently.");      return true;    }    LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);    if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {      DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "NoLCSSAPHIOuterInner", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Only inner loops with LCSSA PHIs can be interchange " +                 "currently.");      return true;    } @@ -835,6 +899,11 @@ bool LoopInterchangeLegality::currentLimitations() {    if (!InnerIndexVarInc) {      DEBUG(dbgs() << "Did not find an instruction to increment the induction "                   << "variable.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "NoIncrementInInner", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "The inner loop does not increment the induction variable.");      return true;    } @@ -852,6 +921,12 @@ bool LoopInterchangeLegality::currentLimitations() {      if (!I.isIdenticalTo(InnerIndexVarInc)) {        DEBUG(dbgs() << "Found unsupported instructions between induction "                     << "variable increment and branch.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "UnsupportedInsBetweenInduction", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Found unsupported instruction between induction variable " +                 "increment and branch.");        return true;      } @@ -862,6 +937,11 @@ bool LoopInterchangeLegality::currentLimitations() {    // current limitation.    if (!FoundInduction) {      DEBUG(dbgs() << "Did not find the induction variable.\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "NoIndutionVariable", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Did not find the induction variable.");      return true;    }    return false; @@ -875,6 +955,11 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,      DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId                   << " and OuterLoopId = " << OuterLoopId                   << " due to dependence\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "Dependence", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Cannot interchange loops due to dependences.");      return false;    } @@ -910,6 +995,12 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,    // Check if the loops are tightly nested.    if (!tightlyNested(OuterLoop, InnerLoop)) {      DEBUG(dbgs() << "Loops not tightly nested\n"); +    ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                       "NotTightlyNested", +                                       InnerLoop->getStartLoc(), +                                       InnerLoop->getHeader()) +              << "Cannot interchange loops because they are not tightly " +                 "nested.");      return false;    } @@ -1005,9 +1096,18 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,    // It is not profitable as per current cache profitability model. But check if    // we can move this loop outside to improve parallelism. -  bool ImprovesPar = -      isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); -  return ImprovesPar; +  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) +    return true; + +  ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, +                                     "InterchangeNotProfitable", +                                     InnerLoop->getStartLoc(), +                                     InnerLoop->getHeader()) +            << "Interchanging loops is too costly (cost=" +            << ore::NV("Cost", Cost) << ", threshold=" +            << ore::NV("Threshold", LoopInterchangeCostThreshold) << +            ") and it does not improve parallelism."); +  return false;  }  void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, @@ -1291,6 +1391,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)  INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)  INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",                      "Interchanges loops for cache reuse", false, false) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 9397b87cdf563..90c5c243f4648 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -68,6 +68,7 @@  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/Module.h" @@ -90,16 +91,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");  /// If it contains any dynamic allocas, returns false.  static bool canTRE(Function &F) {    // Because of PR962, we don't TRE dynamic allocas. -  for (auto &BB : F) { -    for (auto &I : BB) { -      if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { -        if (!AI->isStaticAlloca()) -          return false; -      } -    } -  } - -  return true; +  return llvm::all_of(instructions(F), [](Instruction &I) { +    auto *AI = dyn_cast<AllocaInst>(&I); +    return !AI || AI->isStaticAlloca(); +  });  }  namespace {  | 
