aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp111
1 files changed, 75 insertions, 36 deletions
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82..4d32266eb9ea 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -14,21 +14,21 @@
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineOrder.h"
@@ -37,11 +37,9 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -67,8 +65,6 @@
#include <algorithm>
#include <cassert>
#include <functional>
-#include <sstream>
-#include <tuple>
#include <utility>
#include <vector>
@@ -92,11 +88,28 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
+static cl::opt<int> IntraSCCCostMultiplier(
+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+ cl::desc(
+ "Cost multiplier to multiply onto inlined call sites where the "
+ "new call was previously an intra-SCC call (not relevant when the "
+ "original call was already intra-SCC). This can accumulate over "
+ "multiple inlinings (e.g. if a call site already had a cost "
+ "multiplier and one of its inlined calls was also subject to "
+ "this, the inlined call would have the original multiplier "
+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+ "inlining through a child SCC which can cause terrible compile times"));
+
/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
cl::init(false), cl::Hidden);
+/// Allows printing the contents of the advisor after each SCC inliner pass.
+static cl::opt<bool>
+ EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing",
+ cl::init(false), cl::Hidden);
+
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
static cl::opt<std::string> CGSCCInlineReplayFile(
@@ -150,10 +163,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
-static cl::opt<bool> InlineEnablePriorityOrder(
- "inline-enable-priority-order", cl::Hidden, cl::init(false),
- cl::desc("Enable the priority inline order for the inliner"));
-
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -708,8 +717,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
- OwnedAdvisor =
- std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
+ M, FAM, getInlineParams(),
+ InlineContext{LTOPhase, InlinePass::CGSCCInliner});
if (!CGSCCInlineReplayFile.empty())
OwnedAdvisor = getReplayInlineAdvisor(
@@ -718,7 +728,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
{CGSCCInlineReplayFormat}},
- /*EmitRemarks=*/true);
+ /*EmitRemarks=*/true,
+ InlineContext{LTOPhase,
+ InlinePass::ReplayCGSCCInliner});
return *OwnedAdvisor;
}
@@ -744,7 +756,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
.getManager();
InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
- Advisor.onPassEntry();
+ Advisor.onPassEntry(&InitialC);
auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); });
@@ -773,12 +785,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
- if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
- else
- Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
- assert(Calls != nullptr && "Expected an initialized InlineOrder");
+ DefaultInlineOrder<std::pair<CallBase *, int>> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -793,7 +800,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls->push({CB, -1});
+ Calls.push({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@@ -807,7 +814,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
}
- if (Calls->empty())
+ if (Calls.empty())
return PreservedAnalyses::all();
// Capture updatable variable for the current SCC.
@@ -833,15 +840,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
SmallVector<Function *, 4> DeadFunctionsInComdats;
// Loop forward over all of the calls.
- while (!Calls->empty()) {
+ while (!Calls.empty()) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls->front().first->getCaller();
+ Function &F = *Calls.front().first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C) {
- Calls->pop();
+ Calls.pop();
continue;
}
@@ -857,8 +864,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
- auto P = Calls->pop();
+ while (!Calls.empty() && Calls.front().first->getCaller() == &F) {
+ auto P = Calls.pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
Function &Callee = *CB->getCalledFunction();
@@ -876,8 +883,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
- UR.InlinedInternalEdges.count({&N, C})) {
+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
@@ -897,6 +904,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ int CBCostMult =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+ .value_or(1);
+
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
@@ -935,9 +947,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
- if (NewCallee)
- if (!NewCallee->isDeclaration())
- Calls->push({ICB, NewHistoryID});
+ if (NewCallee) {
+ if (!NewCallee->isDeclaration()) {
+ Calls.push({ICB, NewHistoryID});
+ // Continually inlining through an SCC can result in huge compile
+ // times and bloated code since we arbitrarily stop at some point
+ // when the inliner decides it's not profitable to inline anymore.
+ // We attempt to mitigate this by making these calls exponentially
+ // more expensive.
+ // This doesn't apply to calls in the same SCC since if we do
+ // inline through the SCC the function will end up being
+ // self-recursive which the inliner bails out on, and inlining
+ // within an SCC is necessary for performance.
+ if (CalleeSCC != C &&
+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+ Attribute NewCBCostMult = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineCostMultiplierAttributeName,
+ itostr(CBCostMult * IntraSCCCostMultiplier));
+ ICB->addFnAttr(NewCBCostMult);
+ }
+ }
+ }
}
}
@@ -953,7 +984,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
!CG.isLibFunction(Callee)) {
if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
- Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ Calls.erase_if([&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
});
// Clear the body and queue the function itself for deletion when we
@@ -1083,17 +1114,24 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
bool MandatoryFirst,
+ InlineContext IC,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
- : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) {
+ : Params(Params), IC(IC), Mode(Mode),
+ MaxDevirtIterations(MaxDevirtIterations) {
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
- if (MandatoryFirst)
+ if (MandatoryFirst) {
PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
+ }
PM.addPass(InlinerPass());
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
}
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
@@ -1103,7 +1141,8 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
{CGSCCInlineReplayFile,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
- {CGSCCInlineReplayFormat}})) {
+ {CGSCCInlineReplayFormat}},
+ IC)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");