summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/TailRecursionElimination.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/TailRecursionElimination.cpp')
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp86
1 files changed, 58 insertions, 28 deletions
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f8cd6c17a5a6..0f6db21f73b6 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -61,6 +61,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
@@ -68,6 +69,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -124,6 +127,12 @@ struct AllocaDerivedValueTracker {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(I);
+ // If the alloca-derived argument is passed byval it is not an escape
+ // point, or a use of an alloca. Calling with byval copies the contents
+ // of the alloca into argument registers or stack slots, which exist
+ // beyond the lifetime of the current frame.
+ if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U)))
+ continue;
bool IsNocapture =
CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
callUsesLocalStack(CS, IsNocapture);
@@ -488,12 +497,10 @@ static CallInst *findTRECandidate(Instruction *TI,
return CI;
}
-static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
- BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+static bool eliminateRecursiveTailCall(
+ CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -566,7 +573,8 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);
OldEntry->setName("tailrecurse");
- BranchInst::Create(OldEntry, NewEntry);
+ BranchInst *BI = BranchInst::Create(OldEntry, NewEntry);
+ BI->setDebugLoc(CI->getDebugLoc());
// If this tail call is marked 'tail' and if there are any allocas in the
// entry block, move them up to the new entry block.
@@ -592,6 +600,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
+ // The entry block was changed from OldEntry to NewEntry.
+ // The forward DominatorTree needs to be recalculated when the EntryBB is
+ // changed. In this corner-case we recalculate the entire tree.
+ DTU.recalculate(*NewEntry->getParent());
}
// If this function has self recursive calls in the tail position where some
@@ -667,6 +679,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
+ DTU.insertEdge(BB, OldEntry);
++NumEliminated;
return true;
}
@@ -675,7 +688,7 @@ static bool foldReturnAndProcessPred(
BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
- AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) {
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
bool Change = false;
// Make sure this block is a trivial return block.
@@ -689,7 +702,7 @@ static bool foldReturnAndProcessPred(
SmallVector<BranchInst*, 8> UncondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *Pred = *PI;
- TerminatorInst *PTI = Pred->getTerminator();
+ Instruction *PTI = Pred->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
if (BI->isUnconditional())
UncondBranchPreds.push_back(BI);
@@ -701,17 +714,17 @@ static bool foldReturnAndProcessPred(
if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
- ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
+ ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
// Cleanup: if all predecessors of BB have been eliminated by
// FoldReturnIntoUncondBranch, delete it. It is important to empty it,
// because the ret instruction in there is still using a value which
// eliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->eraseFromParent();
+ DTU.deleteBB(BB);
eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, AA, ORE);
+ ArgumentPHIs, AA, ORE, DTU);
++NumRetDuped;
Change = true;
}
@@ -720,24 +733,23 @@ static bool foldReturnAndProcessPred(
return Change;
}
-static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI,
- AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+static bool processReturningBlock(
+ ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
+ AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI)
return false;
return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, AA, ORE);
+ ArgumentPHIs, AA, ORE, DTU);
}
static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE,
+ DomTreeUpdater &DTU) {
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
@@ -772,11 +784,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall,
- TTI, AA, ORE);
+ TTI, AA, ORE, DTU);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = foldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall, TTI, AA, ORE);
+ Change = foldReturnAndProcessPred(
+ BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall, TTI, AA, ORE, DTU);
MadeChange |= Change;
}
}
@@ -809,16 +821,27 @@ struct TailCallElim : public FunctionPass {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ // There is no noticable performance difference here between Lazy and Eager
+ // UpdateStrategy based on some test results. It is feasible to switch the
+ // UpdateStrategy to Lazy if we find it profitable later.
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+
return eliminateTailRecursion(
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
}
};
}
@@ -842,12 +865,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AliasAnalysis &AA = AM.getResult<AAManager>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
- bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
+ // There is no noticable performance difference here between Lazy and Eager
+ // UpdateStrategy based on some test results. It is feasible to switch the
+ // UpdateStrategy to Lazy if we find it profitable later.
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+ bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
return PA;
}