diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
commit | 7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (patch) | |
tree | 99ec531924f6078534b100ab9d7696abce848099 /lib/Transforms/IPO | |
parent | 7ab83427af0f77b59941ceba41d509d7d097b065 (diff) |
Notes
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r-- | lib/Transforms/IPO/CrossDSOCFI.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 53 | ||||
-rw-r--r-- | lib/Transforms/IPO/LowerTypeTests.cpp | 166 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 414 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 50 |
6 files changed, 467 insertions, 233 deletions
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index 1b111de061576..d94aa5da85601 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) { } } + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto Func : CfiFunctionsMD->operands()) { + assert(Func->getNumOperands() >= 2); + for (unsigned I = 2; I < Func->getNumOperands(); ++I) + if (ConstantInt *TypeId = + extractNumericTypeId(cast<MDNode>(Func->getOperand(I).get()))) + TypeIds.insert(TypeId->getZExtValue()); + } + } + LLVMContext &Ctx = M.getContext(); Constant *C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index c0dfeede05c5a..ad89e40661c67 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -523,40 +523,47 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (!Callee || Callee->isDeclaration()) continue; - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { - DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() - << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - CG[Caller]->removeCallEdgeFor(CS); - CS.getInstruction()->eraseFromParent(); - ++NumCallsDeleted; - } else { + Instruction *Instr = CS.getInstruction(); + + bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + + int InlineHistoryID; + if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. - int InlineHistoryID = CallSites[CSi].second; + InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; + } + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); + + // If the policy determines that we should inline this function, + // delete the call instead. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (IsTriviallyDead) { + DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + Instr->eraseFromParent(); + ++NumCallsDeleted; + } else { // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + DebugLoc DLoc = Instr->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS, GetInlineCost, ORE)) - continue; // Attempt to inline the function. using namespace ore; diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 90896d285f5af..b406c22c69d7a 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -206,17 +207,26 @@ struct ByteArrayInfo { class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { GlobalObject *GO; size_t NTypes; + // For functions: true if this is a definition (either in the merged module or + // in one of the thinlto modules). + bool IsDefinition; + // For functions: true if this function is either defined or used in a thinlto + // module and its jumptable entry needs to be exported to thinlto backends. + bool IsExported; friend TrailingObjects; size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; } public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, + bool IsDefinition, bool IsExported, ArrayRef<MDNode *> Types) { auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); + GTM->IsDefinition = IsDefinition; + GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects<MDNode *>()); return GTM; @@ -224,6 +234,12 @@ public: GlobalObject *getGlobal() const { return GO; } + bool isDefinition() const { + return IsDefinition; + } + bool isExported() const { + return IsExported; + } ArrayRef<MDNode *> types() const { return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes); } @@ -294,6 +310,7 @@ class LowerTypeTestsModule { void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); + void importFunction(Function *F, bool isDefinition); BitSetInfo buildBitSet(Metadata *TypeId, @@ -820,6 +837,41 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { CI->eraseFromParent(); } +// ThinLTO backend: the function F has a jump table entry; update this module +// accordingly. isDefinition describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { + assert(F->getType()->getAddressSpace() == 0); + + // Declaration of a local function - nothing to do. + if (F->isDeclarationForLinker() && isDefinition) + return; + + GlobalValue::VisibilityTypes Visibility = F->getVisibility(); + std::string Name = F->getName(); + Function *FDecl; + + if (F->isDeclarationForLinker() && !isDefinition) { + // Declaration of an external function. + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name + ".cfi_jt", &M); + FDecl->setVisibility(GlobalValue::HiddenVisibility); + } else { + // Definition. + assert(isDefinition); + F->setName(Name + ".cfi"); + F->setLinkage(GlobalValue::ExternalLinkage); + F->setVisibility(GlobalValue::HiddenVisibility); + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name, &M); + FDecl->setVisibility(Visibility); + } + + if (F->isWeakForLinker()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl); + else + F->replaceAllUsesWith(FDecl); +} + void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { @@ -1143,7 +1195,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // arithmetic that we normally use for globals. // FIXME: find a better way to represent the jumptable in the IR. - assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. @@ -1167,6 +1218,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { Function *F = cast<Function>(Functions[I]->getGlobal()); + bool IsDefinition = Functions[I]->isDefinition(); Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getInBoundsGetElementPtr( @@ -1174,7 +1226,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); - if (F->isDeclarationForLinker()) { + if (Functions[I]->isExported()) { + if (IsDefinition) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsDefinition) { if (F->isWeakForLinker()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); else @@ -1182,9 +1245,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( } else { assert(F->getType()->getAddressSpace() == 0); - GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0, - F->getLinkage(), "", - CombinedGlobalElemPtr, &M); + GlobalAlias *FAlias = GlobalAlias::create( + F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); FAlias->setVisibility(F->getVisibility()); FAlias->takeName(F); if (FAlias->hasName()) @@ -1353,15 +1415,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); - if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary) + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary && + !ImportSummary) return false; if (ImportSummary) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast<CallInst>((*UI++).getUser()); - importTypeTest(CI); + if (TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast<CallInst>((*UI++).getUser()); + importTypeTest(CI); + } + } + + SmallVector<Function *, 8> Defs; + SmallVector<Function *, 8> Decls; + for (auto &F : M) { + // CFI functions are either external, or promoted. A local function may + // have the same name, but it's not the one we are looking for. + if (F.hasLocalLinkage()) + continue; + if (ImportSummary->cfiFunctionDefs().count(F.getName())) + Defs.push_back(&F); + else if (ImportSummary->cfiFunctionDecls().count(F.getName())) + Decls.push_back(&F); } + + for (auto F : Defs) + importFunction(F, /*isDefinition*/ true); + for (auto F : Decls) + importFunction(F, /*isDefinition*/ false); + return true; } @@ -1387,6 +1471,58 @@ bool LowerTypeTestsModule::lower() { llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo; unsigned I = 0; SmallVector<MDNode *, 2> Types; + + struct ExportedFunctionInfo { + CfiFunctionLinkage Linkage; + MDNode *FuncMD; // {name, linkage, type[, type...]} + }; + DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions; + if (ExportSummary) { + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto FuncMD : CfiFunctionsMD->operands()) { + assert(FuncMD->getNumOperands() >= 2); + StringRef FunctionName = + cast<MDString>(FuncMD->getOperand(0))->getString(); + if (!ExportSummary->isGUIDLive(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(FunctionName)))) + continue; + CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>( + cast<ConstantAsMetadata>(FuncMD->getOperand(1)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); + if (!P.second && P.first->second.Linkage != CFL_Definition) + P.first->second = {Linkage, FuncMD}; + } + + for (const auto &P : ExportedFunctions) { + StringRef FunctionName = P.first; + CfiFunctionLinkage Linkage = P.second.Linkage; + MDNode *FuncMD = P.second.FuncMD; + Function *F = M.getFunction(FunctionName); + if (!F) + F = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalVariable::ExternalLinkage, FunctionName, &M); + + if (Linkage == CFL_Definition) + F->eraseMetadata(LLVMContext::MD_type); + + if (F->isDeclaration()) { + if (Linkage == CFL_WeakDeclaration) + F->setLinkage(GlobalValue::ExternalWeakLinkage); + + SmallVector<MDNode *, 2> Types; + for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) + F->addMetadata(LLVMContext::MD_type, + *cast<MDNode>(FuncMD->getOperand(I).get())); + } + } + } + } + for (GlobalObject &GO : M.global_objects()) { if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker()) continue; @@ -1396,7 +1532,15 @@ bool LowerTypeTestsModule::lower() { if (Types.empty()) continue; - auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types); + bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsExported = false; + if (isa<Function>(GO) && ExportedFunctions.count(GO.getName())) { + IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition; + IsExported = true; + } + + auto *GTM = + GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)]; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index ea805efc66b79..8840435af6421 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -103,6 +103,35 @@ struct PartialInlinerImpl { bool run(Module &M); Function *unswitchFunction(Function *F); + // This class speculatively clones the the function to be partial inlined. + // At the end of partial inlining, the remaining callsites to the cloned + // function that are not partially inlined will be fixed up to reference + // the original function, and the cloned function will be erased. + struct FunctionCloner { + FunctionCloner(Function *F, FunctionOutliningInfo *OI); + ~FunctionCloner(); + + // Prepare for function outlining: making sure there is only + // one incoming edge from the extracted/outlined region to + // the return block. + void NormalizeReturnBlock(); + + // Do function outlining: + Function *doFunctionOutlining(); + + Function *OrigFunc = nullptr; + Function *ClonedFunc = nullptr; + Function *OutlinedFunc = nullptr; + BasicBlock *OutliningCallBB = nullptr; + // ClonedFunc is inlined in one of its callers after function + // outlining. + bool IsFunctionInlined = false; + // The cost of the region to be outlined. + int OutlinedRegionCost = 0; + std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr; + std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr; + }; + private: int NumPartialInlining = 0; std::function<AssumptionCache &(Function &)> *GetAssumptionCache; @@ -114,27 +143,18 @@ private: // The result is no larger than 1 and is represented using BP. // (Note that the outlined region's 'head' block can only have incoming // edges from the guarding entry blocks). - BranchProbability getOutliningCallBBRelativeFreq(Function *F, - FunctionOutliningInfo *OI, - Function *DuplicateFunction, - BlockFrequencyInfo *BFI, - BasicBlock *OutliningCallBB); + BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); // Return true if the callee of CS should be partially inlined with // profit. - bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, - BasicBlock *OutliningCallBB, - int OutliningCallOverhead, + bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true // if there is any successful inlining. - bool tryPartialInline(Function *DuplicateFunction, - Function *F, /*orignal function */ - FunctionOutliningInfo *OI, Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI); + bool tryPartialInline(FunctionCloner &Cloner); // Compute the mapping from use site of DuplicationFunction to the enclosing // BB's profile count. @@ -146,7 +166,7 @@ private: NumPartialInlining >= MaxNumPartialInlining); } - CallSite getCallSite(User *U) { + static CallSite getCallSite(User *U) { CallSite CS; if (CallInst *CI = dyn_cast<CallInst>(U)) CS = CallSite(CI); @@ -157,7 +177,7 @@ private: return CS; } - CallSite getOneCallSiteTo(Function *F) { + static CallSite getOneCallSiteTo(Function *F) { User *User = *F->user_begin(); return getCallSite(User); } @@ -171,20 +191,15 @@ private: // Returns the costs associated with function outlining: // - The first value is the non-weighted runtime cost for making the call - // to the outlined function 'OutlinedFunction', including the addtional - // setup cost in the outlined function itself; + // to the outlined function, including the addtional setup cost in the + // outlined function itself; // - The second value is the estimated size of the new call sequence in - // basic block 'OutliningCallBB'; - // - The third value is the estimated size of the original code from - // function 'F' that is extracted into the outlined function. - std::tuple<int, int, int> - computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo, - Function *OutlinedFunction, - BasicBlock *OutliningCallBB); + // basic block Cloner.OutliningCallBB; + std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner); // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB); std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); @@ -396,19 +411,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { return false; } -BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( - Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction, - BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) { +BranchProbability +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { auto EntryFreq = - BFI->getBlockFreq(&DuplicateFunction->getEntryBlock()); - auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB); + Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); + auto OutliningCallFreq = + Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); - if (hasProfileData(F, OI)) + if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get())) return OutlineRegionRelFreq; // When profile data is not available, we need to be conservative in @@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB, - int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { + CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { + using namespace ore; if (SkipCostAnalysis) return true; Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); + assert(Callee == Cloner.ClonedFunc); + Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, @@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialInline( if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) - << NV("Callee", F) + << NV("Callee", Cloner.OrigFunc) << " should always be fully inlined, not partially"); return false; } if (IC.isNever()) { ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because it should never be inlined (cost=never)"); return false; @@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialInline( if (!IC) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); return false; } const DataLayout &DL = Caller->getParent()->getDataLayout(); + // The savings of eliminating the call: int NonWeightedSavings = getCallsiteCost(CS, DL); BlockFrequency NormWeightedSavings(NonWeightedSavings); - auto RelativeFreq = - getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB); - auto NormWeightedRcost = - BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq; - // Weighted saving is smaller than weighted cost, return false - if (NormWeightedSavings < NormWeightedRcost) { + if (NormWeightedSavings < WeightedOutliningRcost) { ORE.emit( OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " runtime overhead (overhead=" - << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency()) + << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) << ", savings=" << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" << " of making the outlined call is too high"); @@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialInline( } ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) - << NV("Callee", F) << " can be partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); @@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { return InlineCost; } -std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( - Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, - BasicBlock *OutliningCallBB) { - // First compute the cost of the outlined region 'OI' in the original - // function 'F'. - // FIXME: The code extractor (outliner) can now do code sinking/hoisting - // to reduce outlining cost. The hoisted/sunk code currently do not - // incur any runtime cost so it is still OK to compare the outlined - // function cost with the outlined region in the original function. - // If this ever changes, we will need to introduce new extractor api - // to pass the information. - int OutlinedRegionCost = 0; - for (BasicBlock &BB : *F) { - if (&BB != OI->ReturnBlock && - // Assuming Entry set is small -- do a linear search here: - std::find(OI->Entries.begin(), OI->Entries.end(), &BB) == - OI->Entries.end()) { - OutlinedRegionCost += computeBBInlineCost(&BB); - } - } +std::tuple<int, int> +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': - int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB); + int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); // Now compute the cost of the extracted/outlined function itself: int OutlinedFunctionCost = 0; - for (BasicBlock &BB : *OutlinedFunction) { + for (BasicBlock &BB : *Cloner.OutlinedFunc) { OutlinedFunctionCost += computeBBInlineCost(&BB); } - assert(OutlinedFunctionCost >= OutlinedRegionCost && + assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); // The code extractor introduces a new root and exit stub blocks with // additional unconditional branches. Those branches will be eliminated // later with bb layout. The cost should be adjusted accordingly: OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; - int OutliningRuntimeOverhead = OutliningFuncCallCost + - (OutlinedFunctionCost - OutlinedRegionCost) + - ExtraOutliningPenalty; + int OutliningRuntimeOverhead = + OutliningFuncCallCost + + (OutlinedFunctionCost - Cloner.OutlinedRegionCost) + + ExtraOutliningPenalty; - return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, - OutlinedRegionCost); + return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead); } // Create the callsite to profile count map which is @@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( } } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - - if (F->hasAddressTaken()) - return nullptr; - - // Let inliner handle it - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return nullptr; - - if (F->hasFnAttribute(Attribute::NoInline)) - return nullptr; - - if (PSI->isFunctionEntryCold(F)) - return nullptr; - - if (F->user_begin() == F->user_end()) - return nullptr; - - std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); - - if (!OI) - return nullptr; +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, + FunctionOutliningInfo *OI) + : OrigFunc(F) { + ClonedOI = llvm::make_unique<FunctionOutliningInfo>(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; - Function *DuplicateFunction = CloneFunction(F, VMap); - BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]); - DenseSet<BasicBlock *> NewEntries; + ClonedFunc = CloneFunction(F, VMap); + + ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]); + ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]); for (BasicBlock *BB : OI->Entries) { - NewEntries.insert(cast<BasicBlock>(VMap[BB])); + ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB])); + } + for (BasicBlock *E : OI->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + ClonedOI->ReturnBlockPreds.push_back(NewE); } - // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. - F->replaceAllUsesWith(DuplicateFunction); + F->replaceAllUsesWith(ClonedFunc); +} + +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); @@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { } return FirstPhi; }; + // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. - BasicBlock *PreReturn = NewReturnBlock; + BasicBlock *PreReturn = ClonedOI->ReturnBlock; // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); - unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size(); + unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); + + if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) + return; + auto IsTrivialPhi = [](PHINode *PN) -> Value * { Value *CommonValue = PN->getIncomingValue(0); if (all_of(PN->incoming_values(), @@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { return nullptr; }; - if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { - - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); - BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); - SmallVector<Instruction *, 4> DeadPhis; - while (I != PreReturn->end()) { - PHINode *OldPhi = dyn_cast<PHINode>(I); - if (!OldPhi) - break; - - PHINode *RetPhi = - PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); - OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); + ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( + ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = PreReturn->begin(); + Instruction *Ins = &ClonedOI->ReturnBlock->front(); + SmallVector<Instruction *, 4> DeadPhis; + while (I != PreReturn->end()) { + PHINode *OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) + break; - RetPhi->addIncoming(&*I, PreReturn); - for (BasicBlock *E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast<BasicBlock>(VMap[E]); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); - OldPhi->removeIncomingValue(NewE); - } + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); + OldPhi->replaceAllUsesWith(RetPhi); + Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); - // After incoming values splitting, the old phi may become trivial. - // Keeping the trivial phi can introduce definition inside the outline - // region which is live-out, causing necessary overhead (load, store - // arg passing etc). - if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { - OldPhi->replaceAllUsesWith(OldPhiVal); - DeadPhis.push_back(OldPhi); - } - - ++I; + RetPhi->addIncoming(&*I, PreReturn); + for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); + OldPhi->removeIncomingValue(E); } + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); + } + ++I; + } for (auto *DP : DeadPhis) DP->eraseFromParent(); - for (auto E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast<BasicBlock>(VMap[E]); - NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + for (auto E : ClonedOI->ReturnBlockPreds) { + E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); } - } +} +Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { // Returns true if the block is to be partial inlined into the caller // (i.e. not to be extracted to the out of line function) - auto ToBeInlined = [&](BasicBlock *BB) { - return BB == NewReturnBlock || NewEntries.count(BB); + auto ToBeInlined = [&, this](BasicBlock *BB) { + return BB == ClonedOI->ReturnBlock || + (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) != + ClonedOI->Entries.end()); }; + // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; - ToExtract.push_back(NewNonReturnBlock); - for (BasicBlock &BB : *DuplicateFunction) - if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) + ToExtract.push_back(ClonedOI->NonReturnBlock); + OutlinedRegionCost += + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + for (BasicBlock &BB : *ClonedFunc) + if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); + // FIXME: the code extractor may hoist/sink more code + // into the outlined function which may make the outlining + // overhead (the difference of the outlined function cost + // and OutliningRegionCost) look larger. + OutlinedRegionCost += computeBBInlineCost(&BB); + } // The CodeExtractor needs a dominator tree. DominatorTree DT; - DT.recalculate(*DuplicateFunction); + DT.recalculate(*ClonedFunc); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); - BranchProbabilityInfo BPI(*DuplicateFunction, LI); - BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); // Extract the body of the if. - Function *OutlinedFunction = - CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) - .extractCodeRegion(); + OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI) + .extractCodeRegion(); + + if (OutlinedFunc) { + OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) + .getInstruction() + ->getParent(); + assert(OutliningCallBB->getParent() == ClonedFunc); + } - bool AnyInline = - tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI); + return OutlinedFunc; +} +PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - DuplicateFunction->replaceAllUsesWith(F); - DuplicateFunction->eraseFromParent(); + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + if (!IsFunctionInlined) { + // Remove the function that is speculatively created if there is no + // reference. + if (OutlinedFunc) + OutlinedFunc->eraseFromParent(); + } +} + +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + if (F->hasAddressTaken()) + return nullptr; + + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return nullptr; + + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + + if (F->user_begin() == F->user_end()) + return nullptr; + + std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); + + if (!OI) + return nullptr; + + FunctionCloner Cloner(F, OI.get()); + Cloner.NormalizeReturnBlock(); + Function *OutlinedFunction = Cloner.doFunctionOutlining(); + + bool AnyInline = tryPartialInline(Cloner); if (AnyInline) return OutlinedFunction; - // Remove the function that is speculatively created: - if (OutlinedFunction) - OutlinedFunction->eraseFromParent(); - return nullptr; } -bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, - Function *F, - FunctionOutliningInfo *OI, - Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI) { - if (OutlinedFunction == nullptr) - return false; - +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { int NonWeightedRcost; int SizeCost; - int OutlinedRegionSizeCost; - auto OutliningCallBB = - getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent(); + if (Cloner.OutlinedFunc == nullptr) + return false; + + std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); - std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) = - computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB); + auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); + auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; // The call sequence to the outlined function is larger than the original // outlined region size, it does not increase the chances of inlining - // 'F' with outlining (The inliner usies the size increase to model the - // the cost of inlining a callee). - if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) { - OptimizationRemarkEmitter ORE(F); + // the function with outlining (The inliner usies the size increase to + // model the cost of inlining a callee). + if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { + OptimizationRemarkEmitter ORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; - std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction); + std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) - << ore::NV("Function", F) + << ore::NV("Function", Cloner.OrigFunc) << " not partially inlined into callers (Original Size = " - << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost) + << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) << ", Size of call sequence to outlined function = " << ore::NV("NewSize", SizeCost) << ")"); return false; } - assert(F->user_begin() == F->user_end() && + assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() && "F's users should all be replaced!"); - std::vector<User *> Users(DuplicateFunction->user_begin(), - DuplicateFunction->user_end()); + + std::vector<User *> Users(Cloner.ClonedFunc->user_begin(), + Cloner.ClonedFunc->user_end()); DenseMap<User *, uint64_t> CallSiteToProfCountMap; - if (F->getEntryCount()) - computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap); + if (Cloner.OrigFunc->getEntryCount()) + computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - auto CalleeEntryCount = F->getEntryCount(); + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + bool AnyInline = false; for (User *User : Users) { CallSite CS = getCallSite(User); @@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, OptimizationRemarkEmitter ORE(CS.getCaller()); - if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB, - NonWeightedRcost, ORE)) + if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) continue; ORE.emit( OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) - << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); @@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, NumPartialInlined++; } - if (AnyInline && CalleeEntryCount) - F->setEntryCount(CalleeEntryCountV); + if (AnyInline) { + Cloner.IsFunctionInlined = true; + if (CalleeEntryCount) + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + } return AnyInline; } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 16fba32e98056..4bc64ab698ff9 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -141,6 +141,10 @@ static cl::opt<int> PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); +static cl::opt<bool> EnableEarlyCSEMemSSA( + "enable-earlycse-memssa", cl::init(false), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)")); + static cl::opt<bool> EnableGVNHoist( "enable-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass (default = off)")); @@ -308,7 +312,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index a7bcc7cc55325..802f470ffe1fb 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -32,7 +32,8 @@ namespace { // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. -void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { +void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, + SetVector<GlobalValue *> &PromoteExtra) { DenseMap<const Comdat *, Comdat *> RenamedComdats; for (auto &ExportGV : ExportM.global_values()) { if (!ExportGV.hasLocalLinkage()) @@ -40,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { auto Name = ExportGV.getName(); GlobalValue *ImportGV = ImportM.getNamedValue(Name); - if (!ImportGV || ImportGV->use_empty()) + if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV)) continue; std::string NewName = (Name + ModuleId).str(); @@ -53,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { ExportGV.setLinkage(GlobalValue::ExternalLinkage); ExportGV.setVisibility(GlobalValue::HiddenVisibility); - ImportGV->setName(NewName); - ImportGV->setVisibility(GlobalValue::HiddenVisibility); + if (ImportGV) { + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + } } if (!RenamedComdats.empty()) @@ -296,6 +299,11 @@ void splitAndWriteThinLTOBitcode( F.setComdat(nullptr); } + SetVector<GlobalValue *> CfiFunctions; + for (auto &F : M) + if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) + CfiFunctions.insert(&F); + // Remove all globals with type metadata, globals with comdats that live in // MergedM, and aliases pointing to such globals from the thin LTO module. filterModule(&M, [&](const GlobalValue *GV) { @@ -308,11 +316,39 @@ void splitAndWriteThinLTOBitcode( return true; }); - promoteInternals(*MergedM, M, ModuleId); - promoteInternals(M, *MergedM, ModuleId); + promoteInternals(*MergedM, M, ModuleId, CfiFunctions); + promoteInternals(M, *MergedM, ModuleId, CfiFunctions); + + SmallVector<MDNode *, 8> CfiFunctionMDs; + for (auto V : CfiFunctions) { + Function &F = *cast<Function>(V); + SmallVector<MDNode *, 2> Types; + F.getMetadata(LLVMContext::MD_type, Types); + + auto &Ctx = MergedM->getContext(); + SmallVector<Metadata *, 4> Elts; + Elts.push_back(MDString::get(Ctx, F.getName())); + CfiFunctionLinkage Linkage; + if (!F.isDeclarationForLinker()) + Linkage = CFL_Definition; + else if (F.isWeakForLinker()) + Linkage = CFL_WeakDeclaration; + else + Linkage = CFL_Declaration; + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); + for (auto Type : Types) + Elts.push_back(Type); + CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); + } - simplifyExternals(*MergedM); + if(!CfiFunctionMDs.empty()) { + NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); + for (auto MD : CfiFunctionMDs) + NMD->addOperand(MD); + } + simplifyExternals(*MergedM); // FIXME: Try to re-use BSI and PFI from the original module here. ProfileSummaryInfo PSI(M); |