summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--lib/Transforms/IPO/Inliner.cpp229
1 files changed, 172 insertions, 57 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 317770d133b3..4449c87ddefa 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -14,29 +14,60 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "inline"
@@ -63,13 +94,16 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
namespace {
+
enum class InlinerFunctionImportStatsOpts {
No = 0,
Basic = 1,
Verbose = 2,
};
-cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
+} // end anonymous namespace
+
+static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
"inliner-function-import-stats",
cl::init(InlinerFunctionImportStatsOpts::No),
cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
@@ -77,10 +111,8 @@ cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
-} // namespace
-LegacyInlinerBase::LegacyInlinerBase(char &ID)
- : CallGraphSCCPass(ID), InsertLifetime(true) {}
+LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
: CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
@@ -96,7 +128,7 @@ void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
CallGraphSCCPass::getAnalysisUsage(AU);
}
-typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy;
+using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
/// Look at all of the allocas that we inlined through this call site. If we
/// have already inlined other allocas through other calls into this function,
@@ -161,7 +193,6 @@ static void mergeInlinedArrayAllocas(
// function. Also, AllocasForType can be empty of course!
bool MergedAwayAlloca = false;
for (AllocaInst *AvailableAlloca : AllocasForType) {
-
unsigned Align1 = AI->getAlignment(),
Align2 = AvailableAlloca->getAlignment();
@@ -267,7 +298,6 @@ static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
function_ref<InlineCost(CallSite CS)> GetInlineCost) {
-
// For now we only handle local or inline functions.
if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
return false;
@@ -335,11 +365,14 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
return false;
}
-/// Return true if the inliner should attempt to inline at the given CallSite.
-static bool shouldInline(CallSite CS,
- function_ref<InlineCost(CallSite CS)> GetInlineCost,
- OptimizationRemarkEmitter &ORE) {
+/// Return the cost only if the inliner should attempt to inline at the given
+/// CallSite. If we return the cost, we will emit an optimisation remark later
+/// using that cost, so we won't do so from this function.
+static Optional<InlineCost>
+shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
+ OptimizationRemarkEmitter &ORE) {
using namespace ore;
+
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
@@ -348,32 +381,33 @@ static bool shouldInline(CallSite CS,
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
- << NV("Callee", Callee)
- << " should always be inlined (cost=always)");
- return true;
+ return IC;
}
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
<< NV("Callee", Callee) << " not inlined into "
<< NV("Caller", Caller)
- << " because it should never be inlined (cost=never)");
- return false;
+ << " because it should never be inlined (cost=never)";
+ });
+ return None;
}
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
- << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", thres=" << IC.getThreshold()
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
<< NV("Callee", Callee) << " not inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
- << NV("Cost", IC.getCost()) << ", threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
- return false;
+ << NV("Cost", IC.getCost())
+ << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
+ });
+ return None;
}
int TotalSecondaryCost = 0;
@@ -381,23 +415,23 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts");
- return false;
+ << " in other contexts";
+ });
+
+ // IC does not bool() to false, so get an InlineCost that will.
+ // This will not be inspected to make an error message.
+ return None;
}
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
- << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", thres=" << IC.getThreshold()
<< ", Call: " << *CS.getInstruction() << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call)
- << NV("Callee", Callee) << " can be inlined into "
- << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
- << " (threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
- return true;
+ return IC;
}
/// Return true if the specified inline history ID
@@ -475,11 +509,14 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (Function *Callee = CS.getCalledFunction())
if (Callee->isDeclaration()) {
using namespace ore;
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
<< NV("Caller", CS.getCaller())
<< " because its definition is unavailable"
- << setIsVerbose());
+ << setIsVerbose();
+ });
continue;
}
@@ -545,9 +582,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// just become a regular analysis dependency.
OptimizationRemarkEmitter ORE(Caller);
+ Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// If the policy determines that we should inline this function,
// delete the call instead.
- if (!shouldInline(CS, GetInlineCost, ORE))
+ if (!OIC)
continue;
// If this call site is dead and it is to a readonly function, we should
@@ -562,26 +600,40 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
++NumCallsDeleted;
} else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = Instr->getDebugLoc();
+ DebugLoc DLoc = CS->getDebugLoc();
BasicBlock *Block = CS.getParent();
// Attempt to inline the function.
using namespace ore;
+
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
+ Block)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", Caller);
+ });
continue;
}
++NumInlined;
- // Report the inline decision.
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block)
- << NV("Callee", Callee) << " inlined into "
- << NV("Caller", Caller));
+ ORE.emit([&]() {
+ bool AlwaysInline = OIC->isAlways();
+ StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+ OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
+ R << NV("Callee", Callee) << " inlined into ";
+ R << NV("Caller", Caller);
+ if (AlwaysInline)
+ R << " with cost=always";
+ else {
+ R << " with cost=" << NV("Cost", OIC->getCost());
+ R << " (threshold=" << NV("Threshold", OIC->getThreshold());
+ R << ")";
+ }
+ return R;
+ });
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -601,7 +653,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
// TODO: Can remove if in SCC now.
!SCCFunctions.count(Callee) &&
-
// The function may be apparently dead, but if there are indirect
// callgraph references to the node, we cannot delete it yet, this
// could invalidate the CGSCC iterator.
@@ -840,6 +891,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
.getManager();
+
+ // Get the remarks emission analysis for the caller.
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
@@ -852,12 +907,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
- PSI);
+ PSI, &ORE);
};
- // Get the remarks emission analysis for the caller.
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
// Now process as many calls as we have within this caller in the sequnece.
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
@@ -872,8 +924,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
continue;
+ // Check if this inlining may repeat breaking an SCC apart that has
+ // already been split once before. In that case, inlining here may
+ // trigger infinite inlining, much like is prevented within the inliner
+ // itself by the InlineHistory above, but spread across CGSCC iterations
+ // and thus hidden from the full inline history.
+ if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
+ UR.InlinedInternalEdges.count({&N, C})) {
+ DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
+ "previously split out of this SCC by inlining: "
+ << F.getName() << " -> " << Callee.getName() << "\n");
+ continue;
+ }
+
+ Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// Check whether we want to inline this callsite.
- if (!shouldInline(CS, GetInlineCost, ORE))
+ if (!OIC)
continue;
// Setup the data structure used to plumb customization into the
@@ -883,11 +949,39 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- if (!InlineFunction(CS, IFI))
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+
+ using namespace ore;
+
+ if (!InlineFunction(CS, IFI)) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << NV("Callee", &Callee) << " will not be inlined into "
+ << NV("Caller", &F);
+ });
continue;
+ }
DidInline = true;
InlinedCallees.insert(&Callee);
+ ORE.emit([&]() {
+ bool AlwaysInline = OIC->isAlways();
+ StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+ OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
+ R << NV("Callee", &Callee) << " inlined into ";
+ R << NV("Caller", &F);
+ if (AlwaysInline)
+ R << " with cost=always";
+ else {
+ R << " with cost=" << NV("Cost", OIC->getCost());
+ R << " (threshold=" << NV("Threshold", OIC->getThreshold());
+ R << ")";
+ }
+ return R;
+ });
+
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
@@ -949,17 +1043,38 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
for (LazyCallGraph::Edge &E : *CalleeN)
RC->insertTrivialRefEdge(N, E.getNode());
}
- InlinedCallees.clear();
// At this point, since we have made changes we have at least removed
// a call instruction. However, in the process we do some incremental
// simplification of the surrounding code. This simplification can
// essentially do all of the same things as a function pass and we can
// re-use the exact same logic for updating the call graph to reflect the
- // change..
+ // change.
+ LazyCallGraph::SCC *OldC = C;
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
+
+ // If this causes an SCC to split apart into multiple smaller SCCs, there
+ // is a subtle risk we need to prepare for. Other transformations may
+ // expose an "infinite inlining" opportunity later, and because of the SCC
+ // mutation, we will revisit this function and potentially re-inline. If we
+ // do, and that re-inlining also has the potentially to mutate the SCC
+ // structure, the infinite inlining problem can manifest through infinite
+ // SCC splits and merges. To avoid this, we capture the originating caller
+ // node and the SCC containing the call edge. This is a slight over
+ // approximation of the possible inlining decisions that must be avoided,
+ // but is relatively efficient to store.
+ // FIXME: This seems like a very heavyweight way of retaining the inline
+ // history, we should look for a more efficient way of tracking it.
+ if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
+ return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
+ })) {
+ DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
+ "retaining this to avoid infinite inlining.\n");
+ UR.InlinedInternalEdges.insert({&N, OldC});
+ }
+ InlinedCallees.clear();
}
// Now that we've finished inlining all of the calls across this SCC, delete
@@ -976,8 +1091,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
.getManager();
- FAM.clear(*DeadF);
- AM.clear(DeadC);
+ FAM.clear(*DeadF, DeadF->getName());
+ AM.clear(DeadC, DeadC.getName());
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);