summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/WholeProgramDevirt.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/IPO/WholeProgramDevirt.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib/Transforms/IPO/WholeProgramDevirt.cpp')
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp274
1 files changed, 215 insertions, 59 deletions
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 5fbb001216a3..d65da2504db4 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -111,6 +111,12 @@ static cl::opt<std::string> ClWriteSummary(
cl::desc("Write summary to given YAML file after running pass"),
cl::Hidden);
+static cl::opt<unsigned>
+ ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
+ cl::init(10), cl::ZeroOrMore,
+ cl::desc("Maximum number of call targets per "
+ "call site to enable branch funnels"));
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -281,24 +287,11 @@ struct VirtualCallSite {
DebugLoc DLoc = CS->getDebugLoc();
BasicBlock *Block = CS.getParent();
- // In the new pass manager, we can request the optimization
- // remark emitter pass on a per-function-basis, which the
- // OREGetter will do for us.
- // In the old pass manager, this is harder, so we just build
- // a optimization remark emitter on the fly, when we need it.
- std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
- OptimizationRemarkEmitter *ORE;
- if (OREGetter)
- ORE = &OREGetter(F);
- else {
- OwnedORE = make_unique<OptimizationRemarkEmitter>(F);
- ORE = OwnedORE.get();
- }
-
using namespace ore;
- ORE->emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
- << NV("Optimization", OptName) << ": devirtualized a call to "
- << NV("FunctionName", TargetName));
+ OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
+ << NV("Optimization", OptName)
+ << ": devirtualized a call to "
+ << NV("FunctionName", TargetName));
}
void replaceAndErase(
@@ -329,12 +322,17 @@ struct CallSiteInfo {
/// cases we are directly operating on the call sites at the IR level.
std::vector<VirtualCallSite> CallSites;
+ /// Whether all call sites represented by this CallSiteInfo, including those
+ /// in summaries, have been devirtualized. This starts off as true because a
+ /// default constructed CallSiteInfo represents no call sites.
+ bool AllCallSitesDevirted = true;
+
// These fields are used during the export phase of ThinLTO and reflect
// information collected from function summaries.
/// Whether any function summary contains an llvm.assume(llvm.type.test) for
/// this slot.
- bool SummaryHasTypeTestAssumeUsers;
+ bool SummaryHasTypeTestAssumeUsers = false;
/// CFI-specific: a vector containing the list of function summaries that use
/// the llvm.type.checked.load intrinsic and therefore will require
@@ -350,8 +348,22 @@ struct CallSiteInfo {
!SummaryTypeCheckedLoadUsers.empty();
}
- /// As explained in the comment for SummaryTypeCheckedLoadUsers.
- void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); }
+ void markSummaryHasTypeTestAssumeUsers() {
+ SummaryHasTypeTestAssumeUsers = true;
+ AllCallSitesDevirted = false;
+ }
+
+ void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
+ SummaryTypeCheckedLoadUsers.push_back(FS);
+ AllCallSitesDevirted = false;
+ }
+
+ void markDevirt() {
+ AllCallSitesDevirted = true;
+
+ // As explained in the comment for SummaryTypeCheckedLoadUsers.
+ SummaryTypeCheckedLoadUsers.clear();
+ }
};
// Call site information collected for a specific VTableSlot.
@@ -386,7 +398,9 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
unsigned *NumUnsafeUses) {
- findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses});
+ auto &CSI = findCallSiteInfo(CS);
+ CSI.AllCallSitesDevirted = false;
+ CSI.CallSites.push_back({VTable, CS, NumUnsafeUses});
}
struct DevirtModule {
@@ -451,6 +465,12 @@ struct DevirtModule {
VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res);
+ void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
+ bool &IsExported);
+ void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot);
+
bool tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
ArrayRef<uint64_t> Args);
@@ -484,6 +504,8 @@ struct DevirtModule {
StringRef Name, IntegerType *IntTy,
uint32_t Storage);
+ Constant *getMemberAddr(const TypeMemberInfo *M);
+
void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
Constant *UniqueMemberAddr);
bool tryUniqueRetValOpt(unsigned BitWidth,
@@ -539,7 +561,16 @@ struct WholeProgramDevirt : public ModulePass {
if (skipModule(M))
return false;
- auto OREGetter = function_ref<OptimizationRemarkEmitter &(Function *)>();
+ // In the new pass manager, we can request the optimization
+ // remark emitter pass on a per-function-basis, which the
+ // OREGetter will do for us.
+ // In the old pass manager, this is harder, so we just build
+ // an optimization remark emitter on the fly, when we need it.
+ std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
+ ORE = make_unique<OptimizationRemarkEmitter>(F);
+ return *ORE;
+ };
if (UseCommandLine)
return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter);
@@ -580,7 +611,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};
- if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run())
+ if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary)
+ .run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -588,7 +620,7 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
- ModuleSummaryIndex Summary;
+ ModuleSummaryIndex Summary(/*HaveGVs=*/false);
// Handle the command-line summary arguments. This code is for testing
// purposes only, so we handle errors directly.
@@ -730,10 +762,9 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
if (VCallSite.NumUnsafeUses)
--*VCallSite.NumUnsafeUses;
}
- if (CSInfo.isExported()) {
+ if (CSInfo.isExported())
IsExported = true;
- CSInfo.markDevirt();
- }
+ CSInfo.markDevirt();
};
Apply(SlotInfo.CSInfo);
for (auto &P : SlotInfo.ConstCSInfo)
@@ -789,6 +820,133 @@ bool DevirtModule::trySingleImplDevirt(
return true;
}
+void DevirtModule::tryICallBranchFunnel(
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot) {
+ Triple T(M.getTargetTriple());
+ if (T.getArch() != Triple::x86_64)
+ return;
+
+ if (TargetsForSlot.size() > ClThreshold)
+ return;
+
+ bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
+ if (!HasNonDevirt)
+ for (auto &P : SlotInfo.ConstCSInfo)
+ if (!P.second.AllCallSitesDevirted) {
+ HasNonDevirt = true;
+ break;
+ }
+
+ if (!HasNonDevirt)
+ return;
+
+ FunctionType *FT =
+ FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
+ Function *JT;
+ if (isa<MDString>(Slot.TypeID)) {
+ JT = Function::Create(FT, Function::ExternalLinkage,
+ getGlobalName(Slot, {}, "branch_funnel"), &M);
+ JT->setVisibility(GlobalValue::HiddenVisibility);
+ } else {
+ JT = Function::Create(FT, Function::InternalLinkage, "branch_funnel", &M);
+ }
+ JT->addAttribute(1, Attribute::Nest);
+
+ std::vector<Value *> JTArgs;
+ JTArgs.push_back(JT->arg_begin());
+ for (auto &T : TargetsForSlot) {
+ JTArgs.push_back(getMemberAddr(T.TM));
+ JTArgs.push_back(T.Fn);
+ }
+
+ BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
+ Constant *Intr =
+ Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
+
+ auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
+ CI->setTailCallKind(CallInst::TCK_MustTail);
+ ReturnInst::Create(M.getContext(), nullptr, BB);
+
+ bool IsExported = false;
+ applyICallBranchFunnel(SlotInfo, JT, IsExported);
+ if (IsExported)
+ Res->TheKind = WholeProgramDevirtResolution::BranchFunnel;
+}
+
+void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
+ Constant *JT, bool &IsExported) {
+ auto Apply = [&](CallSiteInfo &CSInfo) {
+ if (CSInfo.isExported())
+ IsExported = true;
+ if (CSInfo.AllCallSitesDevirted)
+ return;
+ for (auto &&VCallSite : CSInfo.CallSites) {
+ CallSite CS = VCallSite.CS;
+
+ // Jump tables are only profitable if the retpoline mitigation is enabled.
+ Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features");
+ if (FSAttr.hasAttribute(Attribute::None) ||
+ !FSAttr.getValueAsString().contains("+retpoline"))
+ continue;
+
+ if (RemarksEnabled)
+ VCallSite.emitRemark("branch-funnel", JT->getName(), OREGetter);
+
+ // Pass the address of the vtable in the nest register, which is r10 on
+ // x86_64.
+ std::vector<Type *> NewArgs;
+ NewArgs.push_back(Int8PtrTy);
+ for (Type *T : CS.getFunctionType()->params())
+ NewArgs.push_back(T);
+ PointerType *NewFT = PointerType::getUnqual(
+ FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs,
+ CS.getFunctionType()->isVarArg()));
+
+ IRBuilder<> IRB(CS.getInstruction());
+ std::vector<Value *> Args;
+ Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
+ for (unsigned I = 0; I != CS.getNumArgOperands(); ++I)
+ Args.push_back(CS.getArgOperand(I));
+
+ CallSite NewCS;
+ if (CS.isCall())
+ NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args);
+ else
+ NewCS = IRB.CreateInvoke(
+ IRB.CreateBitCast(JT, NewFT),
+ cast<InvokeInst>(CS.getInstruction())->getNormalDest(),
+ cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args);
+ NewCS.setCallingConv(CS.getCallingConv());
+
+ AttributeList Attrs = CS.getAttributes();
+ std::vector<AttributeSet> NewArgAttrs;
+ NewArgAttrs.push_back(AttributeSet::get(
+ M.getContext(), ArrayRef<Attribute>{Attribute::get(
+ M.getContext(), Attribute::Nest)}));
+ for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
+ NewArgAttrs.push_back(Attrs.getParamAttributes(I));
+ NewCS.setAttributes(
+ AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
+ Attrs.getRetAttributes(), NewArgAttrs));
+
+ CS->replaceAllUsesWith(NewCS.getInstruction());
+ CS->eraseFromParent();
+
+ // This use is no longer unsafe.
+ if (VCallSite.NumUnsafeUses)
+ --*VCallSite.NumUnsafeUses;
+ }
+ // Don't mark as devirtualized because there may be callers compiled without
+ // retpoline mitigation, which would mean that they are lowered to
+ // llvm.type.test and therefore require an llvm.type.test resolution for the
+ // type identifier.
+ };
+ Apply(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ Apply(P.second);
+}
+
bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
ArrayRef<uint64_t> Args) {
@@ -909,7 +1067,7 @@ Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
// We only need to set metadata if the global is newly created, in which
// case it would not have hidden visibility.
- if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
+ if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
return C;
auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
@@ -941,6 +1099,12 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
CSInfo.markDevirt();
}
+Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
+ Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
+ return ConstantExpr::getGetElementPtr(Int8Ty, C,
+ ConstantInt::get(Int64Ty, M->Offset));
+}
+
bool DevirtModule::tryUniqueRetValOpt(
unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
@@ -960,12 +1124,7 @@ bool DevirtModule::tryUniqueRetValOpt(
// checked for a uniform return value in tryUniformRetValOpt.
assert(UniqueMember);
- Constant *UniqueMemberAddr =
- ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy);
- UniqueMemberAddr = ConstantExpr::getGetElementPtr(
- Int8Ty, UniqueMemberAddr,
- ConstantInt::get(Int64Ty, UniqueMember->Offset));
-
+ Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
if (CSInfo.isExported()) {
Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
Res->Info = IsOne;
@@ -1352,6 +1511,14 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
break;
}
}
+
+ if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
+ auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
+ Type::getVoidTy(M.getContext()));
+ bool IsExported = false;
+ applyICallBranchFunnel(SlotInfo, JT, IsExported);
+ assert(!IsExported);
+ }
}
void DevirtModule::removeRedundantTypeTests() {
@@ -1421,14 +1588,13 @@ bool DevirtModule::run() {
// FIXME: Only add live functions.
for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
- CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers =
- true;
+ CallSlots[{MD, VF.Offset}]
+ .CSInfo.markSummaryHasTypeTestAssumeUsers();
}
}
for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
- CallSlots[{MD, VF.Offset}]
- .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS);
+ CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
}
}
for (const FunctionSummary::ConstVCall &VC :
@@ -1436,7 +1602,7 @@ bool DevirtModule::run() {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
- .SummaryHasTypeTestAssumeUsers = true;
+ .markSummaryHasTypeTestAssumeUsers();
}
}
for (const FunctionSummary::ConstVCall &VC :
@@ -1444,7 +1610,7 @@ bool DevirtModule::run() {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
- .SummaryTypeCheckedLoadUsers.push_back(FS);
+ .addSummaryTypeCheckedLoadUser(FS);
}
}
}
@@ -1468,9 +1634,12 @@ bool DevirtModule::run() {
cast<MDString>(S.first.TypeID)->getString())
.WPDRes[S.first.ByteOffset];
- if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) &&
- tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first))
- DidVirtualConstProp = true;
+ if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) {
+ DidVirtualConstProp |=
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
+
+ tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
+ }
// Collect functions devirtualized at least for one call site for stats.
if (RemarksEnabled)
@@ -1499,23 +1668,10 @@ bool DevirtModule::run() {
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
- // In the new pass manager, we can request the optimization
- // remark emitter pass on a per-function-basis, which the
- // OREGetter will do for us.
- // In the old pass manager, this is harder, so we just build
- // a optimization remark emitter on the fly, when we need it.
- std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
- OptimizationRemarkEmitter *ORE;
- if (OREGetter)
- ORE = &OREGetter(F);
- else {
- OwnedORE = make_unique<OptimizationRemarkEmitter>(F);
- ORE = OwnedORE.get();
- }
-
using namespace ore;
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
- << "devirtualized " << NV("FunctionName", F->getName()));
+ OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
+ << "devirtualized "
+ << NV("FunctionName", F->getName()));
}
}