diff options
Diffstat (limited to 'lib/Transforms/Scalar/TailRecursionElimination.cpp')
-rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 86 |
1 files changed, 58 insertions, 28 deletions
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index f8cd6c17a5a6..0f6db21f73b6 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -61,6 +61,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -68,6 +69,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -124,6 +127,12 @@ struct AllocaDerivedValueTracker { case Instruction::Call: case Instruction::Invoke: { CallSite CS(I); + // If the alloca-derived argument is passed byval it is not an escape + // point, or a use of an alloca. Calling with byval copies the contents + // of the alloca into argument registers or stack slots, which exist + // beyond the lifetime of the current frame. + if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U))) + continue; bool IsNocapture = CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); callUsesLocalStack(CS, IsNocapture); @@ -488,12 +497,10 @@ static CallInst *findTRECandidate(Instruction *TI, return CI; } -static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, - BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl<PHINode *> &ArgumentPHIs, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool eliminateRecursiveTailCall( + CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -566,7 +573,8 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); - BranchInst::Create(OldEntry, NewEntry); + BranchInst *BI = BranchInst::Create(OldEntry, NewEntry); + BI->setDebugLoc(CI->getDebugLoc()); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. @@ -592,6 +600,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } + // The entry block was changed from OldEntry to NewEntry. + // The forward DominatorTree needs to be recalculated when the EntryBB is + // changed. In this corner-case we recalculate the entire tree. + DTU.recalculate(*NewEntry->getParent()); } // If this function has self recursive calls in the tail position where some @@ -667,6 +679,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. + DTU.insertEdge(BB, OldEntry); ++NumEliminated; return true; } @@ -675,7 +688,7 @@ static bool foldReturnAndProcessPred( BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, - AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) { + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { bool Change = false; // Make sure this block is a trivial return block. @@ -689,7 +702,7 @@ static bool foldReturnAndProcessPred( SmallVector<BranchInst*, 8> UncondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *Pred = *PI; - TerminatorInst *PTI = Pred->getTerminator(); + Instruction *PTI = Pred->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) if (BI->isUnconditional()) UncondBranchPreds.push_back(BI); @@ -701,17 +714,17 @@ static bool foldReturnAndProcessPred( if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){ LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); + ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); // Cleanup: if all predecessors of BB have been eliminated by // FoldReturnIntoUncondBranch, delete it. It is important to empty it, // because the ret instruction in there is still using a value which // eliminateRecursiveTailCall will attempt to remove. if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - BB->eraseFromParent(); + DTU.deleteBB(BB); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); ++NumRetDuped; Change = true; } @@ -720,24 +733,23 @@ static bool foldReturnAndProcessPred( return Change; } -static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl<PHINode *> &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI, - AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { +static bool processReturningBlock( + ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, + SmallVectorImpl<PHINode *> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI, + AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) { CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); if (!CI) return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, AA, ORE); + ArgumentPHIs, AA, ORE, DTU); } static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, + DomTreeUpdater &DTU) { if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") return false; @@ -772,11 +784,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall, - TTI, AA, ORE); + TTI, AA, ORE, DTU); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = foldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall, TTI, AA, ORE); + Change = foldReturnAndProcessPred( + BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall, TTI, AA, ORE, DTU); MadeChange |= Change; } } @@ -809,16 +821,27 @@ struct TailCallElim : public FunctionPass { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<PostDominatorTreeWrapperPass>(); } bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + return eliminateTailRecursion( F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), &getAnalysis<AAResultsWrapperPass>().getAAResults(), - &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE()); + &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU); } }; } @@ -842,12 +865,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F, TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); AliasAnalysis &AA = AM.getResult<AAManager>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - - bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE); + auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F); + // There is no noticable performance difference here between Lazy and Eager + // UpdateStrategy based on some test results. It is feasible to switch the + // UpdateStrategy to Lazy if we find it profitable later. + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager); + bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<PostDominatorTreeAnalysis>(); return PA; } |