aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils/InlineFunction.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
commit01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree4def12e759965de927d963ac65840d663ef9d1ea /lib/Transforms/Utils/InlineFunction.cpp
parentf0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
downloadsrc-01095a5d43bbfde13731688ddcf6048ebb8b7721.tar.gz
src-01095a5d43bbfde13731688ddcf6048ebb8b7721.zip
Notes
Diffstat (limited to 'lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp190
1 files changed, 157 insertions, 33 deletions
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 79282a2a703b..1fbb19d2b8ad 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -427,6 +427,17 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
+ // We do not need to (and in fact, cannot) convert possibly throwing calls
+ // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
+ // invokes. The caller's "segment" of the deoptimization continuation
+ // attached to the newly inlined @llvm.experimental_deoptimize
+ // (resp. @llvm.experimental.guard) call should contain the exception
+ // handling logic, if any.
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
+ F->getIntrinsicID() == Intrinsic::experimental_guard)
+ continue;
+
if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
// This call is nested inside a funclet. If that funclet has an unwind
// destination within the inlinee, then unwinding out of this call would
@@ -677,6 +688,34 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
+/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
+/// that metadata should be propagated to all memory-accessing cloned
+/// instructions.
+static void PropagateParallelLoopAccessMetadata(CallSite CS,
+ ValueToValueMapTy &VMap) {
+ MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ if (!M)
+ return;
+
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+ M = MDNode::concatenate(PM, M);
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ } else if (NI->mayReadOrWriteMemory()) {
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ }
+ }
+}
+
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
@@ -693,12 +732,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// inter-procedural alias analysis passes. We can revisit this if it becomes
// an efficiency or overhead problem.
- for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end();
- I != IE; ++I)
- for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
- if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope))
+ for (const BasicBlock &I : *CalledFunc)
+ for (const Instruction &J : I) {
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
MD.insert(M);
- if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias))
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
MD.insert(M);
}
@@ -720,20 +758,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// the noalias scopes and the lists of those scopes.
SmallVector<TempMDTuple, 16> DummyNodes;
DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
- for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
- I != IE; ++I) {
+ for (const MDNode *I : MD) {
DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
- MDMap[*I].reset(DummyNodes.back().get());
+ MDMap[I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
// metadata references with either a dummy node or an already-created new
// node.
- for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
- I != IE; ++I) {
+ for (const MDNode *I : MD) {
SmallVector<Metadata *, 4> NewOps;
- for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) {
- const Metadata *V = (*I)->getOperand(i);
+ for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
+ const Metadata *V = I->getOperand(i);
if (const MDNode *M = dyn_cast<MDNode>(V))
NewOps.push_back(MDMap[M]);
else
@@ -741,7 +777,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
}
MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
- MDTuple *TempM = cast<MDTuple>(MDMap[*I]);
+ MDTuple *TempM = cast<MDTuple>(MDMap[I]);
assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
@@ -801,10 +837,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
const Function *CalledFunc = CS.getCalledFunction();
SmallVector<const Argument *, 4> NoAliasArgs;
- for (const Argument &I : CalledFunc->args()) {
- if (I.hasNoAliasAttr() && !I.hasNUses(0))
- NoAliasArgs.push_back(&I);
- }
+ for (const Argument &Arg : CalledFunc->args())
+ if (Arg.hasNoAliasAttr() && !Arg.use_empty())
+ NoAliasArgs.push_back(&Arg);
if (NoAliasArgs.empty())
return;
@@ -885,17 +920,16 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
IsArgMemOnlyCall = true;
}
- for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(),
- AE = ICS.arg_end(); AI != AE; ++AI) {
+ for (Value *Arg : ICS.args()) {
// We need to check the underlying objects of all arguments, not just
// the pointer arguments, because we might be passing pointers as
// integers, etc.
// However, if we know that the call only accesses pointer arguments,
// then we only need to check the pointer arguments.
- if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy())
+ if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
continue;
- PtrArgs.push_back(*AI);
+ PtrArgs.push_back(Arg);
}
}
@@ -913,9 +947,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
SmallVector<Metadata *, 4> Scopes, NoAliases;
SmallSetVector<const Argument *, 4> NAPtrArgs;
- for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
+ for (const Value *V : PtrArgs) {
SmallVector<Value *, 4> Objects;
- GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
+ GetUnderlyingObjects(const_cast<Value*>(V),
Objects, DL, /* LI = */ nullptr);
for (Value *O : Objects)
@@ -1228,7 +1262,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
/// Rebuild the entire inlined-at chain for this instruction so that the top of
/// the chain now is inlined-at the new call site.
static DebugLoc
-updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
+updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
+ LLVMContext &Ctx,
DenseMap<const DILocation *, DILocation *> &IANodes) {
SmallVector<DILocation *, 3> InlinedAtLocations;
DILocation *Last = InlinedAtNode;
@@ -1249,8 +1284,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
// Starting from the top, rebuild the nodes to point to the new inlined-at
// location (then rebuilding the rest of the chain behind it) and update the
// map of already-constructed inlined-at nodes.
- for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(),
- InlinedAtLocations.rend())) {
+ for (const DILocation *MD : reverse(InlinedAtLocations)) {
Last = IANodes[MD] = DILocation::getDistinct(
Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
}
@@ -1264,7 +1298,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
Instruction *TheCall) {
- DebugLoc TheCallDL = TheCall->getDebugLoc();
+ const DebugLoc &TheCallDL = TheCall->getDebugLoc();
if (!TheCallDL)
return;
@@ -1422,6 +1456,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ // Determine if we are dealing with a call in an EHPad which does not unwind
+ // to caller.
+ bool EHPadForCallUnwindsLocally = false;
+ if (CallSiteEHPad && CS.isCall()) {
+ UnwindDestMemoTy FuncletUnwindMap;
+ Value *CallSiteUnwindDestToken =
+ getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
+
+ EHPadForCallUnwindsLocally =
+ CallSiteUnwindDestToken &&
+ !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
Function::iterator LastBlock = --Caller->end();
@@ -1552,6 +1599,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+ // Propagate llvm.mem.parallel_loop_access if necessary.
+ PropagateParallelLoopAccessMetadata(CS, VMap);
+
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
if (IFI.ACT)
@@ -1602,7 +1652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
}
- bool InlinedMustTailCalls = false;
+ bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
if (InlinedFunctionInfo.ContainsCalls) {
CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
if (CallInst *CI = dyn_cast<CallInst>(TheCall))
@@ -1615,6 +1665,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!CI)
continue;
+ if (Function *F = CI->getCalledFunction())
+ InlinedDeoptimizeCalls |=
+ F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
+
// We need to reduce the strength of any inlined tail calls. For
// musttail, we have to avoid introducing potential unbounded stack
// growth. For example, if functions 'f' and 'g' are mutually recursive
@@ -1677,11 +1731,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
builder.CreateLifetimeStart(AI, AllocaSize);
for (ReturnInst *RI : Returns) {
- // Don't insert llvm.lifetime.end calls between a musttail call and a
- // return. The return kills all local allocas.
+ // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
+ // call and a return. The return kills all local allocas.
if (InlinedMustTailCalls &&
RI->getParent()->getTerminatingMustTailCall())
continue;
+ if (InlinedDeoptimizeCalls &&
+ RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
}
@@ -1702,10 +1759,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
for (ReturnInst *RI : Returns) {
- // Don't insert llvm.stackrestore calls between a musttail call and a
- // return. The return will restore the stack pointer.
+ // Don't insert llvm.stackrestore calls between a musttail or deoptimize
+ // call and a return. The return will restore the stack pointer.
if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
continue;
+ if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
}
@@ -1758,7 +1817,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
else
NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
- NewInst->setDebugLoc(I->getDebugLoc());
NewInst->takeName(I);
I->replaceAllUsesWith(NewInst);
I->eraseFromParent();
@@ -1766,6 +1824,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
OpBundles.clear();
}
+ // It is problematic if the inlinee has a cleanupret which unwinds to
+ // caller and we inline it into a call site which doesn't unwind but into
+ // an EH pad that does. Such an edge must be dynamically unreachable.
+ // As such, we replace the cleanupret with unreachable.
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
+ if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
+ changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
+
Instruction *I = BB->getFirstNonPHI();
if (!I->isEHPad())
continue;
@@ -1781,6 +1847,64 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ if (InlinedDeoptimizeCalls) {
+ // We need to at least remove the deoptimizing returns from the Return set,
+ // so that the control flow from those returns does not get merged into the
+ // caller (but terminate it instead). If the caller's return type does not
+ // match the callee's return type, we also need to change the return type of
+ // the intrinsic.
+ if (Caller->getReturnType() == TheCall->getType()) {
+ auto NewEnd = remove_if(Returns, [](ReturnInst *RI) {
+ return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
+ });
+ Returns.erase(NewEnd, Returns.end());
+ } else {
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
+ Caller->getParent(), Intrinsic::experimental_deoptimize,
+ {Caller->getReturnType()});
+
+ for (ReturnInst *RI : Returns) {
+ CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
+ if (!DeoptCall) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+
+ // The calling convention on the deoptimize call itself may be bogus,
+ // since the code we're inlining may have undefined behavior (and may
+ // never actually execute at runtime); but all
+ // @llvm.experimental.deoptimize declarations have to have the same
+ // calling convention in a well-formed module.
+ auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
+ NewDeoptIntrinsic->setCallingConv(CallingConv);
+ auto *CurBB = RI->getParent();
+ RI->eraseFromParent();
+
+ SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
+ DeoptCall->arg_end());
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ DeoptCall->getOperandBundlesAsDefs(OpBundles);
+ DeoptCall->eraseFromParent();
+ assert(!OpBundles.empty() &&
+ "Expected at least the deopt operand bundle");
+
+ IRBuilder<> Builder(CurBB);
+ CallInst *NewDeoptCall =
+ Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
+ NewDeoptCall->setCallingConv(CallingConv);
+ if (NewDeoptCall->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(NewDeoptCall);
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+ }
+
// Handle any inlined musttail call sites. In order for a new call site to be
// musttail, the source of the clone and the inlined call site must have been
// musttail. Therefore it's safe to return without merging control into the