aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp617
1 files changed, 226 insertions, 391 deletions
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 4b72261131c1..7d2260f4c169 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,16 +29,16 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -57,8 +58,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
@@ -77,11 +80,6 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
-// This weirdly named statistic tracks the number of times that, when attempting
-// to inline a function A into B, we analyze the callers of B in order to see
-// if those would be more profitable and blocked inline steps.
-STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-
/// Flag to disable manual alloca merging.
///
/// Merging of allocas was originally done as a stack-size saving technique
@@ -112,14 +110,6 @@ static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
-/// Flag to add inline messages as callsite attributes 'inline-remark'.
-static cl::opt<bool>
- InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
- cl::Hidden,
- cl::desc("Enable adding inline-remark attribute to"
- " callsites processed by inliner but decided"
- " to be not inlined"));
-
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -158,13 +148,13 @@ using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
/// *actually make it to the backend*, which is really what we want.
///
/// Because we don't have this information, we do this simple and useful hack.
-static void mergeInlinedArrayAllocas(
- Function *Caller, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
+static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory) {
SmallPtrSet<AllocaInst *, 16> UsedAllocas;
- // When processing our SCC, check to see if CS was inlined from some other
- // call site. For example, if we're processing "A" in this code:
+ // When processing our SCC, check to see if the call site was inlined from
+ // some other call site. For example, if we're processing "A" in this code:
// A() { B() }
// B() { x = alloca ... C() }
// C() { y = alloca ... }
@@ -180,7 +170,7 @@ static void mergeInlinedArrayAllocas(
// Loop over all the allocas we have so far and see if they can be merged with
// a previously inlined alloca. If not, remember that we had it.
- for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
+ for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E;
++AllocaNo) {
AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
@@ -201,8 +191,8 @@ static void mergeInlinedArrayAllocas(
// function. Also, AllocasForType can be empty of course!
bool MergedAwayAlloca = false;
for (AllocaInst *AvailableAlloca : AllocasForType) {
- unsigned Align1 = AI->getAlignment(),
- Align2 = AvailableAlloca->getAlignment();
+ Align Align1 = AI->getAlign();
+ Align Align2 = AvailableAlloca->getAlign();
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -229,18 +219,8 @@ static void mergeInlinedArrayAllocas(
AI->replaceAllUsesWith(AvailableAlloca);
- if (Align1 != Align2) {
- if (!Align1 || !Align2) {
- const DataLayout &DL = Caller->getParent()->getDataLayout();
- unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
-
- Align1 = Align1 ? Align1 : TypeAlign;
- Align2 = Align2 ? Align2 : TypeAlign;
- }
-
- if (Align1 > Align2)
- AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
- }
+ if (Align1 > Align2)
+ AvailableAlloca->setAlignment(AI->getAlign());
AI->eraseFromParent();
MergedAwayAlloca = true;
@@ -271,20 +251,20 @@ static void mergeInlinedArrayAllocas(
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static InlineResult InlineCallIfPossible(
- CallSite CS, InlineFunctionInfo &IFI,
+static InlineResult inlineCallIfPossible(
+ CallBase &CB, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
- Function *Callee = CS.getCalledFunction();
- Function *Caller = CS.getCaller();
+ Function *Callee = CB.getCalledFunction();
+ Function *Caller = CB.getCaller();
AAResults &AAR = AARGetter(*Callee);
// Try to inline the function. Get the list of static allocas that were
// inlined.
- InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
- if (!IR)
+ InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime);
+ if (!IR.isSuccess())
return IR;
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
@@ -298,188 +278,9 @@ static InlineResult InlineCallIfPossible(
return IR; // success
}
-/// Return true if inlining of CS can block the caller from being
-/// inlined which is proved to be more beneficial. \p IC is the
-/// estimated inline cost associated with callsite \p CS.
-/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
-/// caller if \p CS is suppressed for inlining.
-static bool
-shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
- int &TotalSecondaryCost,
- function_ref<InlineCost(CallSite CS)> GetInlineCost) {
- // For now we only handle local or inline functions.
- if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
- return false;
- // If the cost of inlining CS is non-positive, it is not going to prevent the
- // caller from being inlined into its callers and hence we don't need to
- // defer.
- if (IC.getCost() <= 0)
- return false;
- // Try to detect the case where the current inlining candidate caller (call
- // it B) is a static or linkonce-ODR function and is an inlining candidate
- // elsewhere, and the current candidate callee (call it C) is large enough
- // that inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B into
- // its callers.
- //
- // This only applies to static and linkonce-ODR functions because those are
- // expected to be available for inlining in the translation units where they
- // are used. Thus we will always have the opportunity to make local inlining
- // decisions. Importantly the linkonce-ODR linkage covers inline functions
- // and templates in C++.
- //
- // FIXME: All of this logic should be sunk into getInlineCost. It relies on
- // the internal implementation of the inline cost metrics rather than
- // treating them as truly abstract units etc.
- TotalSecondaryCost = 0;
- // The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - 1;
- // If the caller has local linkage and can be inlined to all its callers, we
- // can apply a huge negative bonus to TotalSecondaryCost.
- bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
- // This bool tracks what happens if we DO inline C into B.
- bool inliningPreventsSomeOuterInline = false;
- for (User *U : Caller->users()) {
- // If the caller will not be removed (either because it does not have a
- // local linkage or because the LastCallToStaticBonus has been already
- // applied), then we can exit the loop early.
- if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
- return false;
- CallSite CS2(U);
-
- // If this isn't a call to Caller (it could be some other sort
- // of reference) skip it. Such references will prevent the caller
- // from being removed.
- if (!CS2 || CS2.getCalledFunction() != Caller) {
- ApplyLastCallBonus = false;
- continue;
- }
-
- InlineCost IC2 = GetInlineCost(CS2);
- ++NumCallerCallersAnalyzed;
- if (!IC2) {
- ApplyLastCallBonus = false;
- continue;
- }
- if (IC2.isAlways())
- continue;
-
- // See if inlining of the original callsite would erase the cost delta of
- // this callsite. We subtract off the penalty for the call instruction,
- // which we would be deleting.
- if (IC2.getCostDelta() <= CandidateCost) {
- inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += IC2.getCost();
- }
- }
- // If all outer calls to Caller would get inlined, the cost for the last
- // one is set very low by getInlineCost, in anticipation that Caller will
- // be removed entirely. We did not account for this above unless there
- // is only one caller of Caller.
- if (ApplyLastCallBonus)
- TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
-
- if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
- return true;
-
- return false;
-}
-
-static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
- const ore::NV &Arg) {
- return R << Arg.Val;
-}
-
-template <class RemarkT>
-RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
- using namespace ore;
- if (IC.isAlways()) {
- R << "(cost=always)";
- } else if (IC.isNever()) {
- R << "(cost=never)";
- } else {
- R << "(cost=" << ore::NV("Cost", IC.getCost())
- << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
- }
- if (const char *Reason = IC.getReason())
- R << ": " << ore::NV("Reason", Reason);
- return R;
-}
-
-static std::string inlineCostStr(const InlineCost &IC) {
- std::stringstream Remark;
- Remark << IC;
- return Remark.str();
-}
-
-/// Return the cost only if the inliner should attempt to inline at the given
-/// CallSite. If we return the cost, we will emit an optimisation remark later
-/// using that cost, so we won't do so from this function.
-static Optional<InlineCost>
-shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
- OptimizationRemarkEmitter &ORE) {
- using namespace ore;
-
- InlineCost IC = GetInlineCost(CS);
- Instruction *Call = CS.getInstruction();
- Function *Callee = CS.getCalledFunction();
- Function *Caller = CS.getCaller();
-
- if (IC.isAlways()) {
- LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- return IC;
- }
-
- if (IC.isNever()) {
- LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because it should never be inlined "
- << IC;
- });
- return IC;
- }
-
- if (!IC) {
- LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because too costly to inline " << IC;
- });
- return IC;
- }
-
- int TotalSecondaryCost = 0;
- if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
- LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
- << " Cost = " << IC.getCost()
- << ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
- Call)
- << "Not inlining. Cost of inlining " << NV("Callee", Callee)
- << " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts";
- });
-
- // IC does not bool() to false, so get an InlineCost that will.
- // This will not be inspected to make an error message.
- return None;
- }
-
- LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << '\n');
- return IC;
-}
-
/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
-static bool InlineHistoryIncludes(
+static bool inlineHistoryIncludes(
Function *F, int InlineHistoryID,
const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
while (InlineHistoryID != -1) {
@@ -504,33 +305,13 @@ bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
return inlineCalls(SCC);
}
-static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
- const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC) {
- ORE.emit([&]() {
- bool AlwaysInline = IC.isAlways();
- StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
- return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
- << ore::NV("Callee", &Callee) << " inlined into "
- << ore::NV("Caller", &Caller) << " with " << IC;
- });
-}
-
-static void setInlineRemark(CallSite &CS, StringRef message) {
- if (!InlineRemarkAttribute)
- return;
-
- Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
- CS.addAttribute(AttributeList::FunctionIndex, attr);
-}
-
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
ProfileSummaryInfo *PSI,
- std::function<TargetLibraryInfo &(Function &)> GetTLI,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
bool InsertLifetime,
- function_ref<InlineCost(CallSite CS)> GetInlineCost,
+ function_ref<InlineCost(CallBase &CB)> GetInlineCost,
function_ref<AAResults &(Function &)> AARGetter,
ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
SmallPtrSet<Function *, 8> SCCFunctions;
@@ -545,7 +326,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// Scan through and identify all call sites ahead of time so that we only
// inline call sites in the original functions, not call sites that result
// from inlining other functions.
- SmallVector<std::pair<CallSite, int>, 16> CallSites;
+ SmallVector<std::pair<CallBase *, int>, 16> CallSites;
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
@@ -561,31 +342,31 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
OptimizationRemarkEmitter ORE(F);
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- CallSite CS(cast<Value>(&I));
+ auto *CB = dyn_cast<CallBase>(&I);
// If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
- if (!CS || isa<IntrinsicInst>(I))
+ if (!CB || isa<IntrinsicInst>(I))
continue;
// If this is a direct call to an external function, we can never inline
// it. If it is an indirect call, inlining may resolve it to be a
// direct call, so we keep it.
- if (Function *Callee = CS.getCalledFunction())
+ if (Function *Callee = CB->getCalledFunction())
if (Callee->isDeclaration()) {
using namespace ore;
- setInlineRemark(CS, "unavailable definition");
+ setInlineRemark(*CB, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", CS.getCaller())
+ << NV("Caller", CB->getCaller())
<< " because its definition is unavailable"
<< setIsVerbose();
});
continue;
}
- CallSites.push_back(std::make_pair(CS, -1));
+ CallSites.push_back(std::make_pair(CB, -1));
}
}
@@ -598,13 +379,13 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// Now that we have all of the call sites, move the ones to functions in the
// current SCC to the end of the list.
unsigned FirstCallInSCC = CallSites.size();
- for (unsigned i = 0; i < FirstCallInSCC; ++i)
- if (Function *F = CallSites[i].first.getCalledFunction())
+ for (unsigned I = 0; I < FirstCallInSCC; ++I)
+ if (Function *F = CallSites[I].first->getCalledFunction())
if (SCCFunctions.count(F))
- std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+ std::swap(CallSites[I--], CallSites[--FirstCallInSCC]);
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
+ InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -616,31 +397,28 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// calls to become direct calls.
// CallSites may be modified inside so ranged for loop can not be used.
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
- CallSite CS = CallSites[CSi].first;
+ auto &P = CallSites[CSi];
+ CallBase &CB = *P.first;
+ const int InlineHistoryID = P.second;
- Function *Caller = CS.getCaller();
- Function *Callee = CS.getCalledFunction();
+ Function *Caller = CB.getCaller();
+ Function *Callee = CB.getCalledFunction();
// We can only inline direct calls to non-declarations.
if (!Callee || Callee->isDeclaration())
continue;
- Instruction *Instr = CS.getInstruction();
-
- bool IsTriviallyDead =
- isInstructionTriviallyDead(Instr, &GetTLI(*Caller));
+ bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller));
- int InlineHistoryID;
if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- setInlineRemark(CS, "recursive");
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(CB, "recursive");
continue;
}
}
@@ -650,56 +428,49 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// just become a regular analysis dependency.
OptimizationRemarkEmitter ORE(Caller);
- Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
+ auto OIC = shouldInline(CB, GetInlineCost, ORE);
// If the policy determines that we should inline this function,
// delete the call instead.
- if (!OIC.hasValue()) {
- setInlineRemark(CS, "deferred");
- continue;
- }
-
- if (!OIC.getValue()) {
- // shouldInline() call returned a negative inline cost that explains
- // why this callsite should not be inlined.
- setInlineRemark(CS, inlineCostStr(*OIC));
+ if (!OIC)
continue;
- }
// If this call site is dead and it is to a readonly function, we should
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
if (IsTriviallyDead) {
- LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
- setInlineRemark(CS, "trivially dead");
- CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
- Instr->eraseFromParent();
+ setInlineRemark(CB, "trivially dead");
+ CG[Caller]->removeCallEdgeFor(CB);
+ CB.eraseFromParent();
++NumCallsDeleted;
} else {
- // Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS->getDebugLoc();
- BasicBlock *Block = CS.getParent();
+ // Get DebugLoc to report. CB will be invalid after Inliner.
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *Block = CB.getParent();
// Attempt to inline the function.
using namespace ore;
- InlineResult IR = InlineCallIfPossible(
- CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
+ InlineResult IR = inlineCallIfPossible(
+ CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
InsertLifetime, AARGetter, ImportedFunctionsStats);
- if (!IR) {
- setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
+ if (!IR.isSuccess()) {
+ setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " +
+ inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
Block)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
+ << NV("Caller", Caller) << ": "
+ << NV("Reason", IR.getFailureReason());
});
continue;
}
++NumInlined;
- emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -709,8 +480,23 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
- for (Value *Ptr : InlineInfo.InlinedCalls)
- CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+#ifndef NDEBUG
+ // Make sure no dupplicates in the inline candidates. This could
+ // happen when a callsite is simpilfied to reusing the return value
+ // of another callsite during function cloning, thus the other
+ // callsite will be reconsidered here.
+ DenseSet<CallBase *> DbgCallSites;
+ for (auto &II : CallSites)
+ DbgCallSites.insert(II.first);
+#endif
+
+ for (Value *Ptr : InlineInfo.InlinedCalls) {
+#ifndef NDEBUG
+ assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0);
+#endif
+ CallSites.push_back(
+ std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID));
+ }
}
}
@@ -759,7 +545,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
+ GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
};
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
@@ -767,7 +553,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
};
return inlineCallsImpl(
SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
- [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this),
+ [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this),
ImportedFunctionsStats);
}
@@ -870,16 +656,47 @@ InlinerPass::~InlinerPass() {
}
}
+InlineAdvisor &
+InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
+ FunctionAnalysisManager &FAM, Module &M) {
+ auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IAA) {
+ // It should still be possible to run the inliner as a stand-alone SCC pass,
+ // for test scenarios. In that case, we default to the
+ // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass
+ // runs. It also uses just the default InlineParams.
+ // In this case, we need to use the provided FAM, which is valid for the
+ // duration of the inliner pass, and thus the lifetime of the owned advisor.
+ // The one we would get from the MAM can be invalidated as a result of the
+ // inliner's activity.
+ OwnedDefaultAdvisor.emplace(FAM, getInlineParams());
+ return *OwnedDefaultAdvisor;
+ }
+ assert(IAA->getAdvisor() &&
+ "Expected a present InlineAdvisorAnalysis also have an "
+ "InlineAdvisor initialized");
+ return *IAA->getAdvisor();
+}
+
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- const ModuleAnalysisManager &MAM =
- AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+ const auto &MAMProxy =
+ AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
bool Changed = false;
assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
Module &M = *InitialC.begin()->getFunction().getParent();
- ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+ ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+ .getManager();
+
+ InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
+ Advisor.onPassEntry();
+
+ auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
if (!ImportedFunctionsStats &&
InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
@@ -912,11 +729,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
+ SmallVector<std::pair<CallBase *, int>, 16> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -928,17 +741,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// FIXME: Using instructions sequence is a really bad way to do this.
// Instead we should do an actual RPO walk of the function body.
for (Instruction &I : instructions(N.getFunction()))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction()) {
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
+ Calls.push_back({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
- setInlineRemark(CS, "unavailable definition");
+ setInlineRemark(*CB, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", CS.getCaller())
+ << NV("Caller", CB->getCaller())
<< " because its definition is unavailable"
<< setIsVerbose();
});
@@ -969,68 +782,41 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Loop forward over all of the calls. Note that we cannot cache the size as
// inlining can introduce new calls that need to be processed.
- for (int i = 0; i < (int)Calls.size(); ++i) {
+ for (int I = 0; I < (int)Calls.size(); ++I) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls[i].first.getCaller();
+ Function &F = *Calls[I].first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (F.hasOptNone()) {
- setInlineRemark(Calls[i].first, "optnone attribute");
+ if (!Calls[I].first->getCalledFunction()->hasFnAttribute(
+ Attribute::AlwaysInline) &&
+ F.hasOptNone()) {
+ setInlineRemark(*Calls[I].first, "optnone attribute");
continue;
}
LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
- // Get a FunctionAnalysisManager via a proxy for this particular node. We
- // do this each time we visit a node as the SCC may have changed and as
- // we're going to mutate this particular function we want to make sure the
- // proxy is in place to forward any invalidation events. We can use the
- // manager we get here for looking up results for functions other than this
- // node however because those functions aren't going to be mutated by this
- // pass.
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
- .getManager();
-
- // Get the remarks emission analysis for the caller.
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
- return FAM.getResult<BlockFrequencyAnalysis>(F);
- };
-
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- bool RemarksEnabled =
- Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
- DEBUG_TYPE);
- return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
- CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
- RemarksEnabled ? &ORE : nullptr);
- };
- // Now process as many calls as we have within this caller in the sequnece.
+ // Now process as many calls as we have within this caller in the sequence.
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
- int InlineHistoryID;
- CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls[i];
- Function &Callee = *CS.getCalledFunction();
+ for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) {
+ auto &P = Calls[I];
+ CallBase *CB = P.first;
+ const int InlineHistoryID = P.second;
+ Function &Callee = *CB->getCalledFunction();
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
- setInlineRemark(CS, "recursive");
+ inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(*CB, "recursive");
continue;
}
@@ -1044,62 +830,53 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
- setInlineRemark(CS, "recursive SCC split");
+ setInlineRemark(*CB, "recursive SCC split");
continue;
}
- Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
+ auto Advice = Advisor.getAdvice(*CB);
// Check whether we want to inline this callsite.
- if (!OIC.hasValue()) {
- setInlineRemark(CS, "deferred");
- continue;
- }
-
- if (!OIC.getValue()) {
- // shouldInline() call returned a negative inline cost that explains
- // why this callsite should not be inlined.
- setInlineRemark(CS, inlineCostStr(*OIC));
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
continue;
}
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
- /*cg=*/nullptr, &GetAssumptionCache, PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ /*cg=*/nullptr, GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- // Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS->getDebugLoc();
- BasicBlock *Block = CS.getParent();
-
- using namespace ore;
-
- InlineResult IR = InlineFunction(CS, IFI);
- if (!IR) {
- setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", &Callee) << " will not be inlined into "
- << NV("Caller", &F) << ": " << NV("Reason", IR.message);
- });
+ InlineResult IR = InlineFunction(*CB, IFI);
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
continue;
}
+
DidInline = true;
InlinedCallees.insert(&Callee);
-
++NumInlined;
- emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
-
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back({&Callee, InlineHistoryID});
- for (CallSite &CS : reverse(IFI.InlinedCallSites))
- if (Function *NewCallee = CS.getCalledFunction())
+
+ for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (!NewCallee) {
+ // Try to promote an indirect (virtual) call without waiting for
+ // the post-inline cleanup and the next DevirtSCCRepeatedPass
+ // iteration because the next iteration may not happen and we may
+ // miss inlining it.
+ if (tryPromoteCall(*ICB))
+ NewCallee = ICB->getCalledFunction();
+ }
+ if (NewCallee)
if (!NewCallee->isDeclaration())
- Calls.push_back({CS, NewHistoryID});
+ Calls.push_back({ICB, NewHistoryID});
+ }
}
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
@@ -1112,15 +889,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// dead. In that case, we can drop the body of the function eagerly
// which may reduce the number of callers of other functions to one,
// changing inline cost thresholds.
+ bool CalleeWasDeleted = false;
if (Callee.hasLocalLinkage()) {
// To check this we also need to nuke any dead constant uses (perhaps
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
Calls.erase(
- std::remove_if(Calls.begin() + i + 1, Calls.end(),
- [&Callee](const std::pair<CallSite, int> &Call) {
- return Call.first.getCaller() == &Callee;
+ std::remove_if(Calls.begin() + I + 1, Calls.end(),
+ [&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
}),
Calls.end());
// Clear the body and queue the function itself for deletion when we
@@ -1131,13 +909,18 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
"Cannot put cause a function to become dead twice!");
DeadFunctions.push_back(&Callee);
+ CalleeWasDeleted = true;
}
}
+ if (CalleeWasDeleted)
+ Advice->recordInliningWithCalleeDeleted();
+ else
+ Advice->recordInlining();
}
// Back the call index up by one to put us in a good position to go around
// the outer loop.
- --i;
+ --I;
if (!DidInline)
continue;
@@ -1163,8 +946,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// essentially do all of the same things as a function pass and we can
// re-use the exact same logic for updating the call graph to reflect the
// change.
+
+ // Inside the update, we also update the FunctionAnalysisManager in the
+ // proxy for this particular SCC. We do this as the SCC may have changed and
+ // as we're going to mutate this particular function we want to make sure
+ // the proxy is in place to forward any invalidation events.
LazyCallGraph::SCC *OldC = C;
- C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR, FAM);
LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
@@ -1208,11 +996,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there. Also, cclear out any cached analyses.
+ // function there. Also, clear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
- .getManager();
FAM.clear(*DeadF, DeadF->getName());
AM.clear(DeadC, DeadC.getName());
auto &DeadRC = DeadC.getOuterRefSCC();
@@ -1224,7 +1009,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InvalidatedRefSCCs.insert(&DeadRC);
// And delete the actual function from the module.
- M.getFunctionList().erase(DeadF);
+ // The Advisor may use Function pointers to efficiently index various
+ // internal maps, e.g. for memoization. Function cleanup passes like
+ // argument promotion create new functions. It is possible for a new
+ // function to be allocated at the address of a deleted function. We could
+ // index using names, but that's inefficient. Alternatively, we let the
+ // Advisor free the functions when it sees fit.
+ DeadF->getBasicBlockList().clear();
+ M.getFunctionList().remove(DeadF);
+
++NumDeleted;
}
@@ -1237,3 +1030,45 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
return PA;
}
+
+ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
+ bool Debugging,
+ InliningAdvisorMode Mode,
+ unsigned MaxDevirtIterations)
+ : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
+ PM(Debugging), MPM(Debugging) {
+ // Run the inliner first. The theory is that we are walking bottom-up and so
+ // the callees have already been fully optimized, and we want to inline them
+ // into the callers so that our optimizations can reflect that.
+ // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+ // because it makes profile annotation in the backend inaccurate.
+ PM.addPass(InlinerPass());
+}
+
+PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
+ if (!IAA.tryCreate(Params, Mode)) {
+ M.getContext().emitError(
+ "Could not setup Inlining Advisor for the requested "
+ "mode and/or options");
+ return PreservedAnalyses::all();
+ }
+
+ // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+ // to detect when we devirtualize indirect calls and iterate the SCC passes
+ // in that case to try and catch knock-on inlining or function attrs
+ // opportunities. Then we add it to the module pipeline by walking the SCCs
+ // in postorder (or bottom-up).
+ // If MaxDevirtIterations is 0, we just don't use the devirtualization
+ // wrapper.
+ if (MaxDevirtIterations == 0)
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM)));
+ else
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+ auto Ret = MPM.run(M, MAM);
+
+ IAA.clear();
+ return Ret;
+}