diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 279 |
1 files changed, 201 insertions, 78 deletions
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 0ac8fa537f4e..792aa8208f27 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -27,8 +27,9 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -44,6 +45,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +63,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -543,9 +546,16 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); - if (!CI || CI->doesNotThrow() || CI->isInlineAsm()) + if (!CI || CI->doesNotThrow()) continue; + if (CI->isInlineAsm()) { + InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); + if (!IA->canThrow()) { + continue; + } + } + // We do not need to (and in fact, cannot) convert possibly throwing calls // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into // invokes. The caller's "segment" of the deoptimization continuation @@ -780,7 +790,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. -static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { MDNode *MemParallelLoopAccess = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); @@ -789,41 +800,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!VMI->second || !isa<Instruction>(VMI->first)) - continue; - - Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) - continue; - - // This metadata is only relevant for instructions that access memory. - if (!NI->mayReadOrWriteMemory()) - continue; + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; - if (MemParallelLoopAccess) { - // TODO: This probably should not overwrite MemParalleLoopAccess. - MemParallelLoopAccess = MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access), - MemParallelLoopAccess); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, MemParallelLoopAccess); - } + } - if (AccessGroup) - NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), AccessGroup)); + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); - if (AliasScope) - NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); - if (NoAlias) - NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), NoAlias)); + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); + } } } @@ -844,9 +847,9 @@ public: /// subsequent remap() calls. void clone(); - /// Remap instructions in the given VMap from the original to the cloned + /// Remap instructions in the given range from the original to the cloned /// metadata. - void remap(ValueToValueMapTy &VMap); + void remap(Function::iterator FStart, Function::iterator FEnd); }; ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( @@ -907,28 +910,27 @@ void ScopedAliasMetadataDeepCloner::clone() { } } -void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { if (MDMap.empty()) return; // Nothing to do. - for (auto Entry : VMap) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!Entry->second || !isa<Instruction>(Entry->first)) - continue; - - Instruction *I = dyn_cast<Instruction>(Entry->second); - if (!I) - continue; - - if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) - I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]); - - if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) - I->setMetadata(LLVMContext::MD_noalias, MDMap[M]); - - if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - Decl->setScopeList(MDMap[Decl->getScopeList()]); + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); + + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } } } @@ -937,7 +939,8 @@ void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, - const DataLayout &DL, AAResults *CalleeAAR) { + const DataLayout &DL, AAResults *CalleeAAR, + ClonedCodeInfo &InlinedFunctionInfo) { if (!EnableNoAliasConversion) return; @@ -1007,7 +1010,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, continue; Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) + if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) continue; bool IsArgMemOnlyCall = false, IsFuncCall = false; @@ -1033,6 +1036,11 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, IsFuncCall = true; if (CalleeAAR) { FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); + + // We'll retain this knowledge without additional metadata. + if (AAResults::onlyAccessesInaccessibleMem(MRB)) + continue; + if (AAResults::onlyAccessesArgPointees(MRB)) IsArgMemOnlyCall = true; } @@ -1288,7 +1296,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align); - AC->registerAssumption(NewAsmp); + AC->registerAssumption(cast<AssumeInst>(NewAsmp)); } } } @@ -1512,9 +1520,11 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI != BE; ++BI) { // Loop metadata needs to be updated so that the start and end locs // reference inlined-at locations. - auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes]( - const DILocation &Loc) -> DILocation * { - return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get(); + auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, + &IANodes](Metadata *MD) -> Metadata * { + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); + return MD; }; updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc); @@ -1644,6 +1654,99 @@ void llvm::updateProfileCallee( } } +/// An operand bundle "clang.arc.attachedcall" on a call indicates the call +/// result is implicitly consumed by a call to retainRV or claimRV immediately +/// after the call. This function inlines the retainRV/claimRV calls. +/// +/// There are three cases to consider: +/// +/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned +/// object in the callee return block, the autoreleaseRV call and the +/// retainRV/claimRV call in the caller cancel out. If the call in the caller +/// is a claimRV call, a call to objc_release is emitted. +/// +/// 2. If there is a call in the callee return block that doesn't have operand +/// bundle "clang.arc.attachedcall", the operand bundle on the original call +/// is transferred to the call in the callee. +/// +/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is +/// a retainRV call. +static void +inlineRetainOrClaimRVCalls(CallBase &CB, + const SmallVectorImpl<ReturnInst *> &Returns) { + Module *Mod = CB.getModule(); + bool IsRetainRV = objcarc::hasAttachedCallOpBundle(&CB, true), + IsClaimRV = !IsRetainRV; + + for (auto *RI : Returns) { + Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); + BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse()); + BasicBlock::reverse_iterator EI = RI->getParent()->rend(); + bool InsertRetainCall = IsRetainRV; + IRBuilder<> Builder(RI->getContext()); + + // Walk backwards through the basic block looking for either a matching + // autoreleaseRV call or an unannotated call. + for (; I != EI;) { + auto CurI = I++; + + // Ignore casts. + if (isa<CastInst>(*CurI)) + continue; + + if (auto *II = dyn_cast<IntrinsicInst>(&*CurI)) { + if (II->getIntrinsicID() == Intrinsic::objc_autoreleaseReturnValue && + II->hasNUses(0) && + objcarc::GetRCIdentityRoot(II->getOperand(0)) == RetOpnd) { + // If we've found a matching authoreleaseRV call: + // - If claimRV is attached to the call, insert a call to objc_release + // and erase the autoreleaseRV call. + // - If retainRV is attached to the call, just erase the autoreleaseRV + // call. + if (IsClaimRV) { + Builder.SetInsertPoint(II); + Function *IFn = + Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); + Value *BC = + Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + II->eraseFromParent(); + InsertRetainCall = false; + } + } else if (auto *CI = dyn_cast<CallInst>(&*CurI)) { + if (objcarc::GetRCIdentityRoot(CI) == RetOpnd && + !objcarc::hasAttachedCallOpBundle(CI)) { + // If we've found an unannotated call that defines RetOpnd, add a + // "clang.arc.attachedcall" operand bundle. + Value *BundleArgs[] = {ConstantInt::get( + Builder.getInt64Ty(), + objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))}; + OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); + auto *NewCall = CallBase::addOperandBundle( + CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI); + NewCall->copyMetadata(*CI); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + InsertRetainCall = false; + } + } + + break; + } + + if (InsertRetainCall) { + // The retainRV is attached to the call and we've failed to find a + // matching autoreleaseRV or an annotated call in the callee. Emit a call + // to objc_retain. + Builder.SetInsertPoint(RI); + Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); + Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType()); + Builder.CreateCall(IFn, BC, ""); + } + } +} + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. @@ -1681,6 +1784,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // ... and "funclet" operand bundles. if (Tag == LLVMContext::OB_funclet) continue; + if (Tag == LLVMContext::OB_clang_arc_attachedcall) + continue; return InlineResult::failure("unsupported operand bundle"); } @@ -1843,17 +1948,27 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, &CB); + &InlinedFunctionInfo); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; - if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) - // Update the BFI of blocks cloned into the caller. - updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, - CalledFunc->front()); - - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB, - IFI.PSI, IFI.CallerBFI); + // Insert retainRV/clainRV runtime calls. + if (objcarc::hasAttachedCallOpBundle(&CB)) + inlineRetainOrClaimRVCalls(CB, Returns); + + // Updated caller/callee profiles only when requested. For sample loader + // inlining, the context-sensitive inlinee profile doesn't need to be + // subtracted from callee profile, and the inlined clone also doesn't need + // to be scaled based on call site count. + if (IFI.UpdateProfile) { + if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) + // Update the BFI of blocks cloned into the caller. + updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, + CalledFunc->front()); + + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB, + IFI.PSI, IFI.CallerBFI); + } // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) @@ -1920,26 +2035,25 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Now clone the inlined noalias scope metadata. SAMetadataCloner.clone(); - SAMetadataCloner.remap(VMap); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); + AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. AddReturnAttributes(CB, VMap); // Propagate metadata on the callsite if necessary. - PropagateCallSiteMetadata(CB, VMap); + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBlock) - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - IFI.GetAssumptionCache(*Caller).registerAssumption(II); + if (auto *II = dyn_cast<AssumeInst>(&I)) + IFI.GetAssumptionCache(*Caller).registerAssumption(II); } // If there are any alloca instructions in the block that used to be the entry @@ -2076,7 +2190,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. - if (InsertLifetime && !IFI.StaticAllocas.empty()) { + // We need to insert lifetime intrinsics even at O0 to avoid invalid + // access caused by multithreaded coroutines. The check + // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. + if ((InsertLifetime || Caller->isPresplitCoroutine()) && + !IFI.StaticAllocas.empty()) { IRBuilder<> builder(&FirstNewBlock->front()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; @@ -2209,7 +2327,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // As such, we replace the cleanupret with unreachable. if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) - changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); + changeToUnreachable(CleanupRet); Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) @@ -2263,6 +2381,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, SmallVector<OperandBundleDef, 1> OpBundles; DeoptCall->getOperandBundlesAsDefs(OpBundles); + auto DeoptAttributes = DeoptCall->getAttributes(); DeoptCall->eraseFromParent(); assert(!OpBundles.empty() && "Expected at least the deopt operand bundle"); @@ -2271,6 +2390,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, CallInst *NewDeoptCall = Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); NewDeoptCall->setCallingConv(CallingConv); + NewDeoptCall->setAttributes(DeoptAttributes); if (NewDeoptCall->getType()->isVoidTy()) Builder.CreateRetVoid(); else @@ -2323,14 +2443,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // before we splice the inlined code into the CFG and lose track of which // blocks were actually inlined, collect the call sites. We only do this if // call graph updates weren't requested, as those provide value handle based - // tracking of inlined call sites instead. + // tracking of inlined call sites instead. Calls to intrinsics are not + // collected because they are not inlineable. if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { // Otherwise just collect the raw call sites that were inlined. for (BasicBlock &NewBB : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBB) if (auto *CB = dyn_cast<CallBase>(&I)) - IFI.InlinedCallSites.push_back(CB); + if (!(CB->getCalledFunction() && + CB->getCalledFunction()->isIntrinsic())) + IFI.InlinedCallSites.push_back(CB); } // If we cloned in _exactly one_ basic block, and if that block ends in a |