diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Transforms/IPO/WholeProgramDevirt.cpp | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) |
Notes
Diffstat (limited to 'lib/Transforms/IPO/WholeProgramDevirt.cpp')
-rw-r--r-- | lib/Transforms/IPO/WholeProgramDevirt.cpp | 274 |
1 files changed, 215 insertions, 59 deletions
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index 5fbb001216a3..d65da2504db4 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -111,6 +111,12 @@ static cl::opt<std::string> ClWriteSummary( cl::desc("Write summary to given YAML file after running pass"), cl::Hidden); +static cl::opt<unsigned> + ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, + cl::init(10), cl::ZeroOrMore, + cl::desc("Maximum number of call targets per " + "call site to enable branch funnels")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -281,24 +287,11 @@ struct VirtualCallSite { DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // a optimization remark emitter on the fly, when we need it. - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; - OptimizationRemarkEmitter *ORE; - if (OREGetter) - ORE = &OREGetter(F); - else { - OwnedORE = make_unique<OptimizationRemarkEmitter>(F); - ORE = OwnedORE.get(); - } - using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block) - << NV("Optimization", OptName) << ": devirtualized a call to " - << NV("FunctionName", TargetName)); + OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block) + << NV("Optimization", OptName) + << ": devirtualized a call to " + << NV("FunctionName", TargetName)); } void replaceAndErase( @@ -329,12 +322,17 @@ struct CallSiteInfo { /// cases we are directly operating on the call sites at the IR level. std::vector<VirtualCallSite> CallSites; + /// Whether all call sites represented by this CallSiteInfo, including those + /// in summaries, have been devirtualized. This starts off as true because a + /// default constructed CallSiteInfo represents no call sites. + bool AllCallSitesDevirted = true; + // These fields are used during the export phase of ThinLTO and reflect // information collected from function summaries. /// Whether any function summary contains an llvm.assume(llvm.type.test) for /// this slot. - bool SummaryHasTypeTestAssumeUsers; + bool SummaryHasTypeTestAssumeUsers = false; /// CFI-specific: a vector containing the list of function summaries that use /// the llvm.type.checked.load intrinsic and therefore will require @@ -350,8 +348,22 @@ struct CallSiteInfo { !SummaryTypeCheckedLoadUsers.empty(); } - /// As explained in the comment for SummaryTypeCheckedLoadUsers. - void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); } + void markSummaryHasTypeTestAssumeUsers() { + SummaryHasTypeTestAssumeUsers = true; + AllCallSitesDevirted = false; + } + + void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { + SummaryTypeCheckedLoadUsers.push_back(FS); + AllCallSitesDevirted = false; + } + + void markDevirt() { + AllCallSitesDevirted = true; + + // As explained in the comment for SummaryTypeCheckedLoadUsers. + SummaryTypeCheckedLoadUsers.clear(); + } }; // Call site information collected for a specific VTableSlot. @@ -386,7 +398,9 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) { void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses) { - findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses}); + auto &CSI = findCallSiteInfo(CS); + CSI.AllCallSitesDevirted = false; + CSI.CallSites.push_back({VTable, CS, NumUnsafeUses}); } struct DevirtModule { @@ -451,6 +465,12 @@ struct DevirtModule { VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); + void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT, + bool &IsExported); + void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, VTableSlot Slot); + bool tryEvaluateFunctionsWithArgs( MutableArrayRef<VirtualCallTarget> TargetsForSlot, ArrayRef<uint64_t> Args); @@ -484,6 +504,8 @@ struct DevirtModule { StringRef Name, IntegerType *IntTy, uint32_t Storage); + Constant *getMemberAddr(const TypeMemberInfo *M); + void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne, Constant *UniqueMemberAddr); bool tryUniqueRetValOpt(unsigned BitWidth, @@ -539,7 +561,16 @@ struct WholeProgramDevirt : public ModulePass { if (skipModule(M)) return false; - auto OREGetter = function_ref<OptimizationRemarkEmitter &(Function *)>(); + // In the new pass manager, we can request the optimization + // remark emitter pass on a per-function-basis, which the + // OREGetter will do for us. + // In the old pass manager, this is harder, so we just build + // an optimization remark emitter on the fly, when we need it. + std::unique_ptr<OptimizationRemarkEmitter> ORE; + auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { + ORE = make_unique<OptimizationRemarkEmitter>(F); + return *ORE; + }; if (UseCommandLine) return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter); @@ -580,7 +611,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); }; - if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run()) + if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -588,7 +620,7 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, bool DevirtModule::runForTesting( Module &M, function_ref<AAResults &(Function &)> AARGetter, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { - ModuleSummaryIndex Summary; + ModuleSummaryIndex Summary(/*HaveGVs=*/false); // Handle the command-line summary arguments. This code is for testing // purposes only, so we handle errors directly. @@ -730,10 +762,9 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, if (VCallSite.NumUnsafeUses) --*VCallSite.NumUnsafeUses; } - if (CSInfo.isExported()) { + if (CSInfo.isExported()) IsExported = true; - CSInfo.markDevirt(); - } + CSInfo.markDevirt(); }; Apply(SlotInfo.CSInfo); for (auto &P : SlotInfo.ConstCSInfo) @@ -789,6 +820,133 @@ bool DevirtModule::trySingleImplDevirt( return true; } +void DevirtModule::tryICallBranchFunnel( + MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, VTableSlot Slot) { + Triple T(M.getTargetTriple()); + if (T.getArch() != Triple::x86_64) + return; + + if (TargetsForSlot.size() > ClThreshold) + return; + + bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted; + if (!HasNonDevirt) + for (auto &P : SlotInfo.ConstCSInfo) + if (!P.second.AllCallSitesDevirted) { + HasNonDevirt = true; + break; + } + + if (!HasNonDevirt) + return; + + FunctionType *FT = + FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true); + Function *JT; + if (isa<MDString>(Slot.TypeID)) { + JT = Function::Create(FT, Function::ExternalLinkage, + getGlobalName(Slot, {}, "branch_funnel"), &M); + JT->setVisibility(GlobalValue::HiddenVisibility); + } else { + JT = Function::Create(FT, Function::InternalLinkage, "branch_funnel", &M); + } + JT->addAttribute(1, Attribute::Nest); + + std::vector<Value *> JTArgs; + JTArgs.push_back(JT->arg_begin()); + for (auto &T : TargetsForSlot) { + JTArgs.push_back(getMemberAddr(T.TM)); + JTArgs.push_back(T.Fn); + } + + BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr); + Constant *Intr = + Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {}); + + auto *CI = CallInst::Create(Intr, JTArgs, "", BB); + CI->setTailCallKind(CallInst::TCK_MustTail); + ReturnInst::Create(M.getContext(), nullptr, BB); + + bool IsExported = false; + applyICallBranchFunnel(SlotInfo, JT, IsExported); + if (IsExported) + Res->TheKind = WholeProgramDevirtResolution::BranchFunnel; +} + +void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, + Constant *JT, bool &IsExported) { + auto Apply = [&](CallSiteInfo &CSInfo) { + if (CSInfo.isExported()) + IsExported = true; + if (CSInfo.AllCallSitesDevirted) + return; + for (auto &&VCallSite : CSInfo.CallSites) { + CallSite CS = VCallSite.CS; + + // Jump tables are only profitable if the retpoline mitigation is enabled. + Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features"); + if (FSAttr.hasAttribute(Attribute::None) || + !FSAttr.getValueAsString().contains("+retpoline")) + continue; + + if (RemarksEnabled) + VCallSite.emitRemark("branch-funnel", JT->getName(), OREGetter); + + // Pass the address of the vtable in the nest register, which is r10 on + // x86_64. + std::vector<Type *> NewArgs; + NewArgs.push_back(Int8PtrTy); + for (Type *T : CS.getFunctionType()->params()) + NewArgs.push_back(T); + PointerType *NewFT = PointerType::getUnqual( + FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs, + CS.getFunctionType()->isVarArg())); + + IRBuilder<> IRB(CS.getInstruction()); + std::vector<Value *> Args; + Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy)); + for (unsigned I = 0; I != CS.getNumArgOperands(); ++I) + Args.push_back(CS.getArgOperand(I)); + + CallSite NewCS; + if (CS.isCall()) + NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args); + else + NewCS = IRB.CreateInvoke( + IRB.CreateBitCast(JT, NewFT), + cast<InvokeInst>(CS.getInstruction())->getNormalDest(), + cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args); + NewCS.setCallingConv(CS.getCallingConv()); + + AttributeList Attrs = CS.getAttributes(); + std::vector<AttributeSet> NewArgAttrs; + NewArgAttrs.push_back(AttributeSet::get( + M.getContext(), ArrayRef<Attribute>{Attribute::get( + M.getContext(), Attribute::Nest)})); + for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I) + NewArgAttrs.push_back(Attrs.getParamAttributes(I)); + NewCS.setAttributes( + AttributeList::get(M.getContext(), Attrs.getFnAttributes(), + Attrs.getRetAttributes(), NewArgAttrs)); + + CS->replaceAllUsesWith(NewCS.getInstruction()); + CS->eraseFromParent(); + + // This use is no longer unsafe. + if (VCallSite.NumUnsafeUses) + --*VCallSite.NumUnsafeUses; + } + // Don't mark as devirtualized because there may be callers compiled without + // retpoline mitigation, which would mean that they are lowered to + // llvm.type.test and therefore require an llvm.type.test resolution for the + // type identifier. + }; + Apply(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + Apply(P.second); +} + bool DevirtModule::tryEvaluateFunctionsWithArgs( MutableArrayRef<VirtualCallTarget> TargetsForSlot, ArrayRef<uint64_t> Args) { @@ -909,7 +1067,7 @@ Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, // We only need to set metadata if the global is newly created, in which // case it would not have hidden visibility. - if (GV->getMetadata(LLVMContext::MD_absolute_symbol)) + if (GV->hasMetadata(LLVMContext::MD_absolute_symbol)) return C; auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { @@ -941,6 +1099,12 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, CSInfo.markDevirt(); } +Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) { + Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy); + return ConstantExpr::getGetElementPtr(Int8Ty, C, + ConstantInt::get(Int64Ty, M->Offset)); +} + bool DevirtModule::tryUniqueRetValOpt( unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res, @@ -960,12 +1124,7 @@ bool DevirtModule::tryUniqueRetValOpt( // checked for a uniform return value in tryUniformRetValOpt. assert(UniqueMember); - Constant *UniqueMemberAddr = - ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy); - UniqueMemberAddr = ConstantExpr::getGetElementPtr( - Int8Ty, UniqueMemberAddr, - ConstantInt::get(Int64Ty, UniqueMember->Offset)); - + Constant *UniqueMemberAddr = getMemberAddr(UniqueMember); if (CSInfo.isExported()) { Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal; Res->Info = IsOne; @@ -1352,6 +1511,14 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { break; } } + + if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) { + auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"), + Type::getVoidTy(M.getContext())); + bool IsExported = false; + applyICallBranchFunnel(SlotInfo, JT, IsExported); + assert(!IsExported); + } } void DevirtModule::removeRedundantTypeTests() { @@ -1421,14 +1588,13 @@ bool DevirtModule::run() { // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers = - true; + CallSlots[{MD, VF.Offset}] + .CSInfo.markSummaryHasTypeTestAssumeUsers(); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1436,7 +1602,7 @@ bool DevirtModule::run() { for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .SummaryHasTypeTestAssumeUsers = true; + .markSummaryHasTypeTestAssumeUsers(); } } for (const FunctionSummary::ConstVCall &VC : @@ -1444,7 +1610,7 @@ bool DevirtModule::run() { for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .SummaryTypeCheckedLoadUsers.push_back(FS); + .addSummaryTypeCheckedLoadUser(FS); } } } @@ -1468,9 +1634,12 @@ bool DevirtModule::run() { cast<MDString>(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) && - tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first)) - DidVirtualConstProp = true; + if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + DidVirtualConstProp |= + tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); + + tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first); + } // Collect functions devirtualized at least for one call site for stats. if (RemarksEnabled) @@ -1499,23 +1668,10 @@ bool DevirtModule::run() { for (const auto &DT : DevirtTargets) { Function *F = DT.second; - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // a optimization remark emitter on the fly, when we need it. - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; - OptimizationRemarkEmitter *ORE; - if (OREGetter) - ORE = &OREGetter(F); - else { - OwnedORE = make_unique<OptimizationRemarkEmitter>(F); - ORE = OwnedORE.get(); - } - using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) - << "devirtualized " << NV("FunctionName", F->getName())); + OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) + << "devirtualized " + << NV("FunctionName", F->getName())); } } |