summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-16 21:03:24 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-16 21:03:24 +0000
commit7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (patch)
tree99ec531924f6078534b100ab9d7696abce848099 /lib/Transforms/IPO
parent7ab83427af0f77b59941ceba41d509d7d097b065 (diff)
Notes
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp11
-rw-r--r--lib/Transforms/IPO/Inliner.cpp53
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp166
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp414
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp6
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp50
6 files changed, 467 insertions, 233 deletions
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 1b111de061576..d94aa5da85601 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
}
}
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto Func : CfiFunctionsMD->operands()) {
+ assert(Func->getNumOperands() >= 2);
+ for (unsigned I = 2; I < Func->getNumOperands(); ++I)
+ if (ConstantInt *TypeId =
+ extractNumericTypeId(cast<MDNode>(Func->getOperand(I).get())))
+ TypeIds.insert(TypeId->getZExtValue());
+ }
+ }
+
LLVMContext &Ctx = M.getContext();
Constant *C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index c0dfeede05c5a..ad89e40661c67 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -523,40 +523,47 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (!Callee || Callee->isDeclaration())
continue;
- // If this call site is dead and it is to a readonly function, we should
- // just delete the call instead of trying to inline it, regardless of
- // size. This happens because IPSCCP propagates the result out of the
- // call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
- DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction()
- << "\n");
- // Update the call graph by deleting the edge from Callee to Caller.
- CG[Caller]->removeCallEdgeFor(CS);
- CS.getInstruction()->eraseFromParent();
- ++NumCallsDeleted;
- } else {
+ Instruction *Instr = CS.getInstruction();
+
+ bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+
+ int InlineHistoryID;
+ if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- int InlineHistoryID = CallSites[CSi].second;
+ InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
+ }
+ // FIXME for new PM: because of the old PM we currently generate ORE and
+ // in turn BFI on demand. With the new PM, the ORE dependency should
+ // just become a regular analysis dependency.
+ OptimizationRemarkEmitter ORE(Caller);
+
+ // If the policy determines that we should inline this function,
+ // delete the call instead.
+ if (!shouldInline(CS, GetInlineCost, ORE))
+ continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (IsTriviallyDead) {
+ DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ Instr->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ DebugLoc DLoc = Instr->getDebugLoc();
BasicBlock *Block = CS.getParent();
- // FIXME for new PM: because of the old PM we currently generate ORE and
- // in turn BFI on demand. With the new PM, the ORE dependency should
- // just become a regular analysis dependency.
- OptimizationRemarkEmitter ORE(Caller);
-
- // If the policy determines that we should inline this function,
- // try to do so.
- if (!shouldInline(CS, GetInlineCost, ORE))
- continue;
// Attempt to inline the function.
using namespace ore;
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 90896d285f5af..b406c22c69d7a 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -206,17 +207,26 @@ struct ByteArrayInfo {
class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
GlobalObject *GO;
size_t NTypes;
+ // For functions: true if this is a definition (either in the merged module or
+ // in one of the thinlto modules).
+ bool IsDefinition;
+ // For functions: true if this function is either defined or used in a thinlto
+ // module and its jumptable entry needs to be exported to thinlto backends.
+ bool IsExported;
friend TrailingObjects;
size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
public:
static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
+ bool IsDefinition, bool IsExported,
ArrayRef<MDNode *> Types) {
auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
GTM->GO = GO;
GTM->NTypes = Types.size();
+ GTM->IsDefinition = IsDefinition;
+ GTM->IsExported = IsExported;
std::uninitialized_copy(Types.begin(), Types.end(),
GTM->getTrailingObjects<MDNode *>());
return GTM;
@@ -224,6 +234,12 @@ public:
GlobalObject *getGlobal() const {
return GO;
}
+ bool isDefinition() const {
+ return IsDefinition;
+ }
+ bool isExported() const {
+ return IsExported;
+ }
ArrayRef<MDNode *> types() const {
return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
}
@@ -294,6 +310,7 @@ class LowerTypeTestsModule {
void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
TypeIdLowering importTypeId(StringRef TypeId);
void importTypeTest(CallInst *CI);
+ void importFunction(Function *F, bool isDefinition);
BitSetInfo
buildBitSet(Metadata *TypeId,
@@ -820,6 +837,41 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
CI->eraseFromParent();
}
+// ThinLTO backend: the function F has a jump table entry; update this module
+// accordingly. isDefinition describes the type of the jump table entry.
+void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
+ assert(F->getType()->getAddressSpace() == 0);
+
+ // Declaration of a local function - nothing to do.
+ if (F->isDeclarationForLinker() && isDefinition)
+ return;
+
+ GlobalValue::VisibilityTypes Visibility = F->getVisibility();
+ std::string Name = F->getName();
+ Function *FDecl;
+
+ if (F->isDeclarationForLinker() && !isDefinition) {
+ // Declaration of an external function.
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name + ".cfi_jt", &M);
+ FDecl->setVisibility(GlobalValue::HiddenVisibility);
+ } else {
+ // Definition.
+ assert(isDefinition);
+ F->setName(Name + ".cfi");
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name, &M);
+ FDecl->setVisibility(Visibility);
+ }
+
+ if (F->isWeakForLinker())
+ replaceWeakDeclarationWithJumpTablePtr(F, FDecl);
+ else
+ F->replaceAllUsesWith(FDecl);
+}
+
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
@@ -1143,7 +1195,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// arithmetic that we normally use for globals.
// FIXME: find a better way to represent the jumptable in the IR.
-
assert(!Functions.empty());
// Build a simple layout based on the regular layout of jump tables.
@@ -1167,6 +1218,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// references to the original functions with references to the aliases.
for (unsigned I = 0; I != Functions.size(); ++I) {
Function *F = cast<Function>(Functions[I]->getGlobal());
+ bool IsDefinition = Functions[I]->isDefinition();
Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
ConstantExpr::getInBoundsGetElementPtr(
@@ -1174,7 +1226,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (F->isDeclarationForLinker()) {
+ if (Functions[I]->isExported()) {
+ if (IsDefinition) {
+ ExportSummary->cfiFunctionDefs().insert(F->getName());
+ } else {
+ GlobalAlias *JtAlias = GlobalAlias::create(
+ F->getValueType(), 0, GlobalValue::ExternalLinkage,
+ F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+ JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ ExportSummary->cfiFunctionDecls().insert(F->getName());
+ }
+ }
+ if (!IsDefinition) {
if (F->isWeakForLinker())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr);
else
@@ -1182,9 +1245,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
} else {
assert(F->getType()->getAddressSpace() == 0);
- GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0,
- F->getLinkage(), "",
- CombinedGlobalElemPtr, &M);
+ GlobalAlias *FAlias = GlobalAlias::create(
+ F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M);
FAlias->setVisibility(F->getVisibility());
FAlias->takeName(F);
if (FAlias->hasName())
@@ -1353,15 +1415,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary)
+ if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary &&
+ !ImportSummary)
return false;
if (ImportSummary) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
- importTypeTest(CI);
+ if (TypeTestFunc) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ importTypeTest(CI);
+ }
+ }
+
+ SmallVector<Function *, 8> Defs;
+ SmallVector<Function *, 8> Decls;
+ for (auto &F : M) {
+ // CFI functions are either external, or promoted. A local function may
+ // have the same name, but it's not the one we are looking for.
+ if (F.hasLocalLinkage())
+ continue;
+ if (ImportSummary->cfiFunctionDefs().count(F.getName()))
+ Defs.push_back(&F);
+ else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
+ Decls.push_back(&F);
}
+
+ for (auto F : Defs)
+ importFunction(F, /*isDefinition*/ true);
+ for (auto F : Decls)
+ importFunction(F, /*isDefinition*/ false);
+
return true;
}
@@ -1387,6 +1471,58 @@ bool LowerTypeTestsModule::lower() {
llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo;
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
+
+ struct ExportedFunctionInfo {
+ CfiFunctionLinkage Linkage;
+ MDNode *FuncMD; // {name, linkage, type[, type...]}
+ };
+ DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
+ if (ExportSummary) {
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto FuncMD : CfiFunctionsMD->operands()) {
+ assert(FuncMD->getNumOperands() >= 2);
+ StringRef FunctionName =
+ cast<MDString>(FuncMD->getOperand(0))->getString();
+ if (!ExportSummary->isGUIDLive(GlobalValue::getGUID(
+ GlobalValue::dropLLVMManglingEscape(FunctionName))))
+ continue;
+ CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
+ cast<ConstantAsMetadata>(FuncMD->getOperand(1))
+ ->getValue()
+ ->getUniqueInteger()
+ .getZExtValue());
+ auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
+ if (!P.second && P.first->second.Linkage != CFL_Definition)
+ P.first->second = {Linkage, FuncMD};
+ }
+
+ for (const auto &P : ExportedFunctions) {
+ StringRef FunctionName = P.first;
+ CfiFunctionLinkage Linkage = P.second.Linkage;
+ MDNode *FuncMD = P.second.FuncMD;
+ Function *F = M.getFunction(FunctionName);
+ if (!F)
+ F = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalVariable::ExternalLinkage, FunctionName, &M);
+
+ if (Linkage == CFL_Definition)
+ F->eraseMetadata(LLVMContext::MD_type);
+
+ if (F->isDeclaration()) {
+ if (Linkage == CFL_WeakDeclaration)
+ F->setLinkage(GlobalValue::ExternalWeakLinkage);
+
+ SmallVector<MDNode *, 2> Types;
+ for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
+ F->addMetadata(LLVMContext::MD_type,
+ *cast<MDNode>(FuncMD->getOperand(I).get()));
+ }
+ }
+ }
+ }
+
for (GlobalObject &GO : M.global_objects()) {
if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
continue;
@@ -1396,7 +1532,15 @@ bool LowerTypeTestsModule::lower() {
if (Types.empty())
continue;
- auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types);
+ bool IsDefinition = !GO.isDeclarationForLinker();
+ bool IsExported = false;
+ if (isa<Function>(GO) && ExportedFunctions.count(GO.getName())) {
+ IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition;
+ IsExported = true;
+ }
+
+ auto *GTM =
+ GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)];
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index ea805efc66b79..8840435af6421 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -103,6 +103,35 @@ struct PartialInlinerImpl {
bool run(Module &M);
Function *unswitchFunction(Function *F);
+ // This class speculatively clones the the function to be partial inlined.
+ // At the end of partial inlining, the remaining callsites to the cloned
+ // function that are not partially inlined will be fixed up to reference
+ // the original function, and the cloned function will be erased.
+ struct FunctionCloner {
+ FunctionCloner(Function *F, FunctionOutliningInfo *OI);
+ ~FunctionCloner();
+
+ // Prepare for function outlining: making sure there is only
+ // one incoming edge from the extracted/outlined region to
+ // the return block.
+ void NormalizeReturnBlock();
+
+ // Do function outlining:
+ Function *doFunctionOutlining();
+
+ Function *OrigFunc = nullptr;
+ Function *ClonedFunc = nullptr;
+ Function *OutlinedFunc = nullptr;
+ BasicBlock *OutliningCallBB = nullptr;
+ // ClonedFunc is inlined in one of its callers after function
+ // outlining.
+ bool IsFunctionInlined = false;
+ // The cost of the region to be outlined.
+ int OutlinedRegionCost = 0;
+ std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
+ };
+
private:
int NumPartialInlining = 0;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
@@ -114,27 +143,18 @@ private:
// The result is no larger than 1 and is represented using BP.
// (Note that the outlined region's 'head' block can only have incoming
// edges from the guarding entry blocks).
- BranchProbability getOutliningCallBBRelativeFreq(Function *F,
- FunctionOutliningInfo *OI,
- Function *DuplicateFunction,
- BlockFrequencyInfo *BFI,
- BasicBlock *OutliningCallBB);
+ BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
// Return true if the callee of CS should be partially inlined with
// profit.
- bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
- BlockFrequencyInfo *CalleeBFI,
- BasicBlock *OutliningCallBB,
- int OutliningCallOverhead,
+ bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+ BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE);
// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
// if there is any successful inlining.
- bool tryPartialInline(Function *DuplicateFunction,
- Function *F, /*orignal function */
- FunctionOutliningInfo *OI, Function *OutlinedFunction,
- BlockFrequencyInfo *CalleeBFI);
+ bool tryPartialInline(FunctionCloner &Cloner);
// Compute the mapping from use site of DuplicationFunction to the enclosing
// BB's profile count.
@@ -146,7 +166,7 @@ private:
NumPartialInlining >= MaxNumPartialInlining);
}
- CallSite getCallSite(User *U) {
+ static CallSite getCallSite(User *U) {
CallSite CS;
if (CallInst *CI = dyn_cast<CallInst>(U))
CS = CallSite(CI);
@@ -157,7 +177,7 @@ private:
return CS;
}
- CallSite getOneCallSiteTo(Function *F) {
+ static CallSite getOneCallSiteTo(Function *F) {
User *User = *F->user_begin();
return getCallSite(User);
}
@@ -171,20 +191,15 @@ private:
// Returns the costs associated with function outlining:
// - The first value is the non-weighted runtime cost for making the call
- // to the outlined function 'OutlinedFunction', including the addtional
- // setup cost in the outlined function itself;
+ // to the outlined function, including the addtional setup cost in the
+ // outlined function itself;
// - The second value is the estimated size of the new call sequence in
- // basic block 'OutliningCallBB';
- // - The third value is the estimated size of the original code from
- // function 'F' that is extracted into the outlined function.
- std::tuple<int, int, int>
- computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
- Function *OutlinedFunction,
- BasicBlock *OutliningCallBB);
+ // basic block Cloner.OutliningCallBB;
+ std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
- int computeBBInlineCost(BasicBlock *BB);
+ static int computeBBInlineCost(BasicBlock *BB);
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
@@ -396,19 +411,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
return false;
}
-BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
- Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
- BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
+BranchProbability
+PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
auto EntryFreq =
- BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
- auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
+ Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
+ auto OutliningCallFreq =
+ Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
auto OutlineRegionRelFreq =
BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
EntryFreq.getFrequency());
- if (hasProfileData(F, OI))
+ if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
@@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
}
bool PartialInlinerImpl::shouldPartialInline(
- CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
- BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
- int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
+ CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
+ OptimizationRemarkEmitter &ORE) {
+
using namespace ore;
if (SkipCostAnalysis)
return true;
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ assert(Callee == Cloner.ClonedFunc);
+
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
@@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialInline(
if (IC.isAlways()) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
- << NV("Callee", F)
+ << NV("Callee", Cloner.OrigFunc)
<< " should always be fully inlined, not partially");
return false;
}
if (IC.isNever()) {
ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller)
<< " because it should never be inlined (cost=never)");
return false;
@@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialInline(
if (!IC) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
}
const DataLayout &DL = Caller->getParent()->getDataLayout();
+
// The savings of eliminating the call:
int NonWeightedSavings = getCallsiteCost(CS, DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
- auto RelativeFreq =
- getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
- auto NormWeightedRcost =
- BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
-
// Weighted saving is smaller than weighted cost, return false
- if (NormWeightedSavings < NormWeightedRcost) {
+ if (NormWeightedSavings < WeightedOutliningRcost) {
ORE.emit(
OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
- << NV("Callee", F) << " not partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " runtime overhead (overhead="
- << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
+ << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
<< ", savings="
<< NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
<< " of making the outlined call is too high");
@@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialInline(
}
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
- << NV("Callee", F) << " can be partially inlined into "
+ << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
<< " (threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
@@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
return InlineCost;
}
-std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
- Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
- BasicBlock *OutliningCallBB) {
- // First compute the cost of the outlined region 'OI' in the original
- // function 'F'.
- // FIXME: The code extractor (outliner) can now do code sinking/hoisting
- // to reduce outlining cost. The hoisted/sunk code currently do not
- // incur any runtime cost so it is still OK to compare the outlined
- // function cost with the outlined region in the original function.
- // If this ever changes, we will need to introduce new extractor api
- // to pass the information.
- int OutlinedRegionCost = 0;
- for (BasicBlock &BB : *F) {
- if (&BB != OI->ReturnBlock &&
- // Assuming Entry set is small -- do a linear search here:
- std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
- OI->Entries.end()) {
- OutlinedRegionCost += computeBBInlineCost(&BB);
- }
- }
+std::tuple<int, int>
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
// Now compute the cost of the call sequence to the outlined function
// 'OutlinedFunction' in BB 'OutliningCallBB':
- int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
+ int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
// Now compute the cost of the extracted/outlined function itself:
int OutlinedFunctionCost = 0;
- for (BasicBlock &BB : *OutlinedFunction) {
+ for (BasicBlock &BB : *Cloner.OutlinedFunc) {
OutlinedFunctionCost += computeBBInlineCost(&BB);
}
- assert(OutlinedFunctionCost >= OutlinedRegionCost &&
+ assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
// The code extractor introduces a new root and exit stub blocks with
// additional unconditional branches. Those branches will be eliminated
// later with bb layout. The cost should be adjusted accordingly:
OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
- int OutliningRuntimeOverhead = OutliningFuncCallCost +
- (OutlinedFunctionCost - OutlinedRegionCost) +
- ExtraOutliningPenalty;
+ int OutliningRuntimeOverhead =
+ OutliningFuncCallCost +
+ (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
+ ExtraOutliningPenalty;
- return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
- OutlinedRegionCost);
+ return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
}
// Create the callsite to profile count map which is
@@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
}
}
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-
- if (F->hasAddressTaken())
- return nullptr;
-
- // Let inliner handle it
- if (F->hasFnAttribute(Attribute::AlwaysInline))
- return nullptr;
-
- if (F->hasFnAttribute(Attribute::NoInline))
- return nullptr;
-
- if (PSI->isFunctionEntryCold(F))
- return nullptr;
-
- if (F->user_begin() == F->user_end())
- return nullptr;
-
- std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
-
- if (!OI)
- return nullptr;
+PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
+ FunctionOutliningInfo *OI)
+ : OrigFunc(F) {
+ ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
- Function *DuplicateFunction = CloneFunction(F, VMap);
- BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
- BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
- DenseSet<BasicBlock *> NewEntries;
+ ClonedFunc = CloneFunction(F, VMap);
+
+ ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
+ ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
for (BasicBlock *BB : OI->Entries) {
- NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+ ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
+ }
+ for (BasicBlock *E : OI->ReturnBlockPreds) {
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+ ClonedOI->ReturnBlockPreds.push_back(NewE);
}
-
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
- F->replaceAllUsesWith(DuplicateFunction);
+ F->replaceAllUsesWith(ClonedFunc);
+}
+
+void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
auto getFirstPHI = [](BasicBlock *BB) {
BasicBlock::iterator I = BB->begin();
@@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
}
return FirstPhi;
};
+
// Special hackery is needed with PHI nodes that have inputs from more than
// one extracted block. For simplicity, just split the PHIs into a two-level
// sequence of PHIs, some of which will go in the extracted region, and some
// of which will go outside.
- BasicBlock *PreReturn = NewReturnBlock;
+ BasicBlock *PreReturn = ClonedOI->ReturnBlock;
// only split block when necessary:
PHINode *FirstPhi = getFirstPHI(PreReturn);
- unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+ unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
+
+ if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
+ return;
+
auto IsTrivialPhi = [](PHINode *PN) -> Value * {
Value *CommonValue = PN->getIncomingValue(0);
if (all_of(PN->incoming_values(),
@@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
return nullptr;
};
- if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
-
- NewReturnBlock = NewReturnBlock->splitBasicBlock(
- NewReturnBlock->getFirstNonPHI()->getIterator());
- BasicBlock::iterator I = PreReturn->begin();
- Instruction *Ins = &NewReturnBlock->front();
- SmallVector<Instruction *, 4> DeadPhis;
- while (I != PreReturn->end()) {
- PHINode *OldPhi = dyn_cast<PHINode>(I);
- if (!OldPhi)
- break;
-
- PHINode *RetPhi =
- PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
- OldPhi->replaceAllUsesWith(RetPhi);
- Ins = NewReturnBlock->getFirstNonPHI();
+ ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
+ ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
+ BasicBlock::iterator I = PreReturn->begin();
+ Instruction *Ins = &ClonedOI->ReturnBlock->front();
+ SmallVector<Instruction *, 4> DeadPhis;
+ while (I != PreReturn->end()) {
+ PHINode *OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi)
+ break;
- RetPhi->addIncoming(&*I, PreReturn);
- for (BasicBlock *E : OI->ReturnBlockPreds) {
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
- RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
- OldPhi->removeIncomingValue(NewE);
- }
+ PHINode *RetPhi =
+ PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+ OldPhi->replaceAllUsesWith(RetPhi);
+ Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
- // After incoming values splitting, the old phi may become trivial.
- // Keeping the trivial phi can introduce definition inside the outline
- // region which is live-out, causing necessary overhead (load, store
- // arg passing etc).
- if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
- OldPhi->replaceAllUsesWith(OldPhiVal);
- DeadPhis.push_back(OldPhi);
- }
-
- ++I;
+ RetPhi->addIncoming(&*I, PreReturn);
+ for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
+ RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
+ OldPhi->removeIncomingValue(E);
}
+ // After incoming values splitting, the old phi may become trivial.
+ // Keeping the trivial phi can introduce definition inside the outline
+ // region which is live-out, causing necessary overhead (load, store
+ // arg passing etc).
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+ OldPhi->replaceAllUsesWith(OldPhiVal);
+ DeadPhis.push_back(OldPhi);
+ }
+ ++I;
+ }
for (auto *DP : DeadPhis)
DP->eraseFromParent();
- for (auto E : OI->ReturnBlockPreds) {
- BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
- NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+ for (auto E : ClonedOI->ReturnBlockPreds) {
+ E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
}
- }
+}
+Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
// Returns true if the block is to be partial inlined into the caller
// (i.e. not to be extracted to the out of line function)
- auto ToBeInlined = [&](BasicBlock *BB) {
- return BB == NewReturnBlock || NewEntries.count(BB);
+ auto ToBeInlined = [&, this](BasicBlock *BB) {
+ return BB == ClonedOI->ReturnBlock ||
+ (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
+ ClonedOI->Entries.end());
};
+
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
- ToExtract.push_back(NewNonReturnBlock);
- for (BasicBlock &BB : *DuplicateFunction)
- if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
+ ToExtract.push_back(ClonedOI->NonReturnBlock);
+ OutlinedRegionCost +=
+ PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+ for (BasicBlock &BB : *ClonedFunc)
+ if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
+ // FIXME: the code extractor may hoist/sink more code
+ // into the outlined function which may make the outlining
+ // overhead (the difference of the outlined function cost
+ // and OutliningRegionCost) look larger.
+ OutlinedRegionCost += computeBBInlineCost(&BB);
+ }
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
- DT.recalculate(*DuplicateFunction);
+ DT.recalculate(*ClonedFunc);
// Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
LoopInfo LI(DT);
- BranchProbabilityInfo BPI(*DuplicateFunction, LI);
- BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
// Extract the body of the if.
- Function *OutlinedFunction =
- CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
- .extractCodeRegion();
+ OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
+ ClonedFuncBFI.get(), &BPI)
+ .extractCodeRegion();
+
+ if (OutlinedFunc) {
+ OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
+ .getInstruction()
+ ->getParent();
+ assert(OutliningCallBB->getParent() == ClonedFunc);
+ }
- bool AnyInline =
- tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
+ return OutlinedFunc;
+}
+PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
// Ditch the duplicate, since we're done with it, and rewrite all remaining
// users (function pointers, etc.) back to the original function.
- DuplicateFunction->replaceAllUsesWith(F);
- DuplicateFunction->eraseFromParent();
+ ClonedFunc->replaceAllUsesWith(OrigFunc);
+ ClonedFunc->eraseFromParent();
+ if (!IsFunctionInlined) {
+ // Remove the function that is speculatively created if there is no
+ // reference.
+ if (OutlinedFunc)
+ OutlinedFunc->eraseFromParent();
+ }
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+ if (F->hasAddressTaken())
+ return nullptr;
+
+ // Let inliner handle it
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return nullptr;
+
+ if (F->hasFnAttribute(Attribute::NoInline))
+ return nullptr;
+
+ if (PSI->isFunctionEntryCold(F))
+ return nullptr;
+
+ if (F->user_begin() == F->user_end())
+ return nullptr;
+
+ std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
+
+ if (!OI)
+ return nullptr;
+
+ FunctionCloner Cloner(F, OI.get());
+ Cloner.NormalizeReturnBlock();
+ Function *OutlinedFunction = Cloner.doFunctionOutlining();
+
+ bool AnyInline = tryPartialInline(Cloner);
if (AnyInline)
return OutlinedFunction;
- // Remove the function that is speculatively created:
- if (OutlinedFunction)
- OutlinedFunction->eraseFromParent();
-
return nullptr;
}
-bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
- Function *F,
- FunctionOutliningInfo *OI,
- Function *OutlinedFunction,
- BlockFrequencyInfo *CalleeBFI) {
- if (OutlinedFunction == nullptr)
- return false;
-
+bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
int NonWeightedRcost;
int SizeCost;
- int OutlinedRegionSizeCost;
- auto OutliningCallBB =
- getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
+ if (Cloner.OutlinedFunc == nullptr)
+ return false;
+
+ std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
- std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
- computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
+ auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
+ auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
// The call sequence to the outlined function is larger than the original
// outlined region size, it does not increase the chances of inlining
- // 'F' with outlining (The inliner usies the size increase to model the
- // the cost of inlining a callee).
- if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
- OptimizationRemarkEmitter ORE(F);
+ // the function with outlining (The inliner usies the size increase to
+ // model the cost of inlining a callee).
+ if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
+ OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
- std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
+ std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
- << ore::NV("Function", F)
+ << ore::NV("Function", Cloner.OrigFunc)
<< " not partially inlined into callers (Original Size = "
- << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
+ << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
<< ", Size of call sequence to outlined function = "
<< ore::NV("NewSize", SizeCost) << ")");
return false;
}
- assert(F->user_begin() == F->user_end() &&
+ assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
"F's users should all be replaced!");
- std::vector<User *> Users(DuplicateFunction->user_begin(),
- DuplicateFunction->user_end());
+
+ std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
+ Cloner.ClonedFunc->user_end());
DenseMap<User *, uint64_t> CallSiteToProfCountMap;
- if (F->getEntryCount())
- computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
+ if (Cloner.OrigFunc->getEntryCount())
+ computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
- auto CalleeEntryCount = F->getEntryCount();
+ auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
+
bool AnyInline = false;
for (User *User : Users) {
CallSite CS = getCallSite(User);
@@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
OptimizationRemarkEmitter ORE(CS.getCaller());
- if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
- NonWeightedRcost, ORE))
+ if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
continue;
ORE.emit(
OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
- << ore::NV("Callee", F) << " partially inlined into "
+ << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
<< ore::NV("Caller", CS.getCaller()));
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
@@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
NumPartialInlined++;
}
- if (AnyInline && CalleeEntryCount)
- F->setEntryCount(CalleeEntryCountV);
+ if (AnyInline) {
+ Cloner.IsFunctionInlined = true;
+ if (CalleeEntryCount)
+ Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
+ }
return AnyInline;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 16fba32e98056..4bc64ab698ff9 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -141,6 +141,10 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
+static cl::opt<bool> EnableEarlyCSEMemSSA(
+ "enable-earlycse-memssa", cl::init(false), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)"));
+
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass (default = off)"));
@@ -308,7 +312,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index a7bcc7cc55325..802f470ffe1fb 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -32,7 +32,8 @@ namespace {
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
-void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
+void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
+ SetVector<GlobalValue *> &PromoteExtra) {
DenseMap<const Comdat *, Comdat *> RenamedComdats;
for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
@@ -40,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
auto Name = ExportGV.getName();
GlobalValue *ImportGV = ImportM.getNamedValue(Name);
- if (!ImportGV || ImportGV->use_empty())
+ if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV))
continue;
std::string NewName = (Name + ModuleId).str();
@@ -53,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
ExportGV.setVisibility(GlobalValue::HiddenVisibility);
- ImportGV->setName(NewName);
- ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ if (ImportGV) {
+ ImportGV->setName(NewName);
+ ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ }
}
if (!RenamedComdats.empty())
@@ -296,6 +299,11 @@ void splitAndWriteThinLTOBitcode(
F.setComdat(nullptr);
}
+ SetVector<GlobalValue *> CfiFunctions;
+ for (auto &F : M)
+ if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
+ CfiFunctions.insert(&F);
+
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
@@ -308,11 +316,39 @@ void splitAndWriteThinLTOBitcode(
return true;
});
- promoteInternals(*MergedM, M, ModuleId);
- promoteInternals(M, *MergedM, ModuleId);
+ promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
+ promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
+
+ SmallVector<MDNode *, 8> CfiFunctionMDs;
+ for (auto V : CfiFunctions) {
+ Function &F = *cast<Function>(V);
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+
+ auto &Ctx = MergedM->getContext();
+ SmallVector<Metadata *, 4> Elts;
+ Elts.push_back(MDString::get(Ctx, F.getName()));
+ CfiFunctionLinkage Linkage;
+ if (!F.isDeclarationForLinker())
+ Linkage = CFL_Definition;
+ else if (F.isWeakForLinker())
+ Linkage = CFL_WeakDeclaration;
+ else
+ Linkage = CFL_Declaration;
+ Elts.push_back(ConstantAsMetadata::get(
+ llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
+ for (auto Type : Types)
+ Elts.push_back(Type);
+ CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
+ }
- simplifyExternals(*MergedM);
+ if(!CfiFunctionMDs.empty()) {
+ NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
+ for (auto MD : CfiFunctionMDs)
+ NMD->addOperand(MD);
+ }
+ simplifyExternals(*MergedM);
// FIXME: Try to re-use BSI and PFI from the original module here.
ProfileSummaryInfo PSI(M);