diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
commit | 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch) | |
tree | 4def12e759965de927d963ac65840d663ef9d1ea /lib/Transforms/Utils/InlineFunction.cpp | |
parent | f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff) | |
download | src-01095a5d43bbfde13731688ddcf6048ebb8b7721.tar.gz src-01095a5d43bbfde13731688ddcf6048ebb8b7721.zip |
Notes
Diffstat (limited to 'lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 190 |
1 files changed, 157 insertions, 33 deletions
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 79282a2a703b..1fbb19d2b8ad 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -427,6 +427,17 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into + // invokes. The caller's "segment" of the deoptimization continuation + // attached to the newly inlined @llvm.experimental_deoptimize + // (resp. @llvm.experimental.guard) call should contain the exception + // handling logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || + F->getIntrinsicID() == Intrinsic::experimental_guard) + continue; + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { // This call is nested inside a funclet. If that funclet has an unwind // destination within the inlinee, then unwinding out of this call would @@ -677,6 +688,34 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } +/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata, +/// that metadata should be propagated to all memory-accessing cloned +/// instructions. +static void PropagateParallelLoopAccessMetadata(CallSite CS, + ValueToValueMapTy &VMap) { + MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); + if (!M) + return; + + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI) + continue; + + if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { + M = MDNode::concatenate(PM, M); + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } else if (NI->mayReadOrWriteMemory()) { + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } + } +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then @@ -693,12 +732,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. - for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end(); - I != IE; ++I) - for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) { - if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope)) + for (const BasicBlock &I : *CalledFunc) + for (const Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); - if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias)) + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); } @@ -720,20 +758,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // the noalias scopes and the lists of those scopes. SmallVector<TempMDTuple, 16> DummyNodes; DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; - for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); - I != IE; ++I) { + for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); - MDMap[*I].reset(DummyNodes.back().get()); + MDMap[I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old // metadata references with either a dummy node or an already-created new // node. - for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); - I != IE; ++I) { + for (const MDNode *I : MD) { SmallVector<Metadata *, 4> NewOps; - for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { - const Metadata *V = (*I)->getOperand(i); + for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { + const Metadata *V = I->getOperand(i); if (const MDNode *M = dyn_cast<MDNode>(V)) NewOps.push_back(MDMap[M]); else @@ -741,7 +777,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { } MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDTuple *TempM = cast<MDTuple>(MDMap[*I]); + MDTuple *TempM = cast<MDTuple>(MDMap[I]); assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); @@ -801,10 +837,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const Function *CalledFunc = CS.getCalledFunction(); SmallVector<const Argument *, 4> NoAliasArgs; - for (const Argument &I : CalledFunc->args()) { - if (I.hasNoAliasAttr() && !I.hasNUses(0)) - NoAliasArgs.push_back(&I); - } + for (const Argument &Arg : CalledFunc->args()) + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); if (NoAliasArgs.empty()) return; @@ -885,17 +920,16 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, IsArgMemOnlyCall = true; } - for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(), - AE = ICS.arg_end(); AI != AE; ++AI) { + for (Value *Arg : ICS.args()) { // We need to check the underlying objects of all arguments, not just // the pointer arguments, because we might be passing pointers as // integers, etc. // However, if we know that the call only accesses pointer arguments, // then we only need to check the pointer arguments. - if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy()) + if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) continue; - PtrArgs.push_back(*AI); + PtrArgs.push_back(Arg); } } @@ -913,9 +947,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, SmallVector<Metadata *, 4> Scopes, NoAliases; SmallSetVector<const Argument *, 4> NAPtrArgs; - for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { + for (const Value *V : PtrArgs) { SmallVector<Value *, 4> Objects; - GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]), + GetUnderlyingObjects(const_cast<Value*>(V), Objects, DL, /* LI = */ nullptr); for (Value *O : Objects) @@ -1228,7 +1262,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { /// Rebuild the entire inlined-at chain for this instruction so that the top of /// the chain now is inlined-at the new call site. static DebugLoc -updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, +updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, + LLVMContext &Ctx, DenseMap<const DILocation *, DILocation *> &IANodes) { SmallVector<DILocation *, 3> InlinedAtLocations; DILocation *Last = InlinedAtNode; @@ -1249,8 +1284,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, // Starting from the top, rebuild the nodes to point to the new inlined-at // location (then rebuilding the rest of the chain behind it) and update the // map of already-constructed inlined-at nodes. - for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), - InlinedAtLocations.rend())) { + for (const DILocation *MD : reverse(InlinedAtLocations)) { Last = IANodes[MD] = DILocation::getDistinct( Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); } @@ -1264,7 +1298,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { - DebugLoc TheCallDL = TheCall->getDebugLoc(); + const DebugLoc &TheCallDL = TheCall->getDebugLoc(); if (!TheCallDL) return; @@ -1422,6 +1456,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // Determine if we are dealing with a call in an EHPad which does not unwind + // to caller. + bool EHPadForCallUnwindsLocally = false; + if (CallSiteEHPad && CS.isCall()) { + UnwindDestMemoTy FuncletUnwindMap; + Value *CallSiteUnwindDestToken = + getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); + + EHPadForCallUnwindsLocally = + CallSiteUnwindDestToken && + !isa<ConstantTokenNone>(CallSiteUnwindDestToken); + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = --Caller->end(); @@ -1552,6 +1599,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + // Propagate llvm.mem.parallel_loop_access if necessary. + PropagateParallelLoopAccessMetadata(CS, VMap); + // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. if (IFI.ACT) @@ -1602,7 +1652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } - bool InlinedMustTailCalls = false; + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast<CallInst>(TheCall)) @@ -1615,6 +1665,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!CI) continue; + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive @@ -1677,11 +1731,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, builder.CreateLifetimeStart(AI, AllocaSize); for (ReturnInst *RI : Returns) { - // Don't insert llvm.lifetime.end calls between a musttail call and a - // return. The return kills all local allocas. + // Don't insert llvm.lifetime.end calls between a musttail or deoptimize + // call and a return. The return kills all local allocas. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; + if (InlinedDeoptimizeCalls && + RI->getParent()->getTerminatingDeoptimizeCall()) + continue; IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } @@ -1702,10 +1759,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (ReturnInst *RI : Returns) { - // Don't insert llvm.stackrestore calls between a musttail call and a - // return. The return will restore the stack pointer. + // Don't insert llvm.stackrestore calls between a musttail or deoptimize + // call and a return. The return will restore the stack pointer. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; + if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) + continue; IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } } @@ -1758,7 +1817,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); else NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); - NewInst->setDebugLoc(I->getDebugLoc()); NewInst->takeName(I); I->replaceAllUsesWith(NewInst); I->eraseFromParent(); @@ -1766,6 +1824,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, OpBundles.clear(); } + // It is problematic if the inlinee has a cleanupret which unwinds to + // caller and we inline it into a call site which doesn't unwind but into + // an EH pad that does. Such an edge must be dynamically unreachable. + // As such, we replace the cleanupret with unreachable. + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) + if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) + changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); + Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) continue; @@ -1781,6 +1847,64 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == TheCall->getType()) { + auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; + }); + Returns.erase(NewEnd, Returns.end()); + } else { + SmallVector<ReturnInst *, 8> NormalReturns; + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + // The calling convention on the deoptimize call itself may be bogus, + // since the code we're inlining may have undefined behavior (and may + // never actually execute at runtime); but all + // @llvm.experimental.deoptimize declarations have to have the same + // calling convention in a well-formed module. + auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); + NewDeoptIntrinsic->setCallingConv(CallingConv); + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), + DeoptCall->arg_end()); + + SmallVector<OperandBundleDef, 1> OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + CallInst *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); + NewDeoptCall->setCallingConv(CallingConv); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the |