diff options
Diffstat (limited to 'lib/Transforms/IPO/Inliner.cpp')
| -rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 229 |
1 files changed, 172 insertions, 57 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 317770d133b3..4449c87ddefa 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -14,29 +14,60 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/Inliner.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <algorithm> +#include <cassert> +#include <functional> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "inline" @@ -63,13 +94,16 @@ static cl::opt<bool> cl::init(false), cl::Hidden); namespace { + enum class InlinerFunctionImportStatsOpts { No = 0, Basic = 1, Verbose = 2, }; -cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( +} // end anonymous namespace + +static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( "inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", @@ -77,10 +111,8 @@ cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions")); -} // namespace -LegacyInlinerBase::LegacyInlinerBase(char &ID) - : CallGraphSCCPass(ID), InsertLifetime(true) {} +LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} @@ -96,7 +128,7 @@ void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { CallGraphSCCPass::getAnalysisUsage(AU); } -typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy; +using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>; /// Look at all of the allocas that we inlined through this call site. If we /// have already inlined other allocas through other calls into this function, @@ -161,7 +193,6 @@ static void mergeInlinedArrayAllocas( // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (AllocaInst *AvailableAlloca : AllocasForType) { - unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); @@ -267,7 +298,6 @@ static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref<InlineCost(CallSite CS)> GetInlineCost) { - // For now we only handle local or inline functions. if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) return false; @@ -335,11 +365,14 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, return false; } -/// Return true if the inliner should attempt to inline at the given CallSite. -static bool shouldInline(CallSite CS, - function_ref<InlineCost(CallSite CS)> GetInlineCost, - OptimizationRemarkEmitter &ORE) { +/// Return the cost only if the inliner should attempt to inline at the given +/// CallSite. If we return the cost, we will emit an optimisation remark later +/// using that cost, so we won't do so from this function. +static Optional<InlineCost> +shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost, + OptimizationRemarkEmitter &ORE) { using namespace ore; + InlineCost IC = GetInlineCost(CS); Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); @@ -348,32 +381,33 @@ static bool shouldInline(CallSite CS, if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) - << NV("Callee", Callee) - << " should always be inlined (cost=always)"); - return true; + return IC; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) << NV("Callee", Callee) << " not inlined into " << NV("Caller", Caller) - << " because it should never be inlined (cost=never)"); - return false; + << " because it should never be inlined (cost=never)"; + }); + return None; } if (!IC) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() - << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", thres=" << IC.getThreshold() << ", Call: " << *CS.getInstruction() << "\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) << NV("Callee", Callee) << " not inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" - << NV("Cost", IC.getCost()) << ", threshold=" - << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); - return false; + << NV("Cost", IC.getCost()) + << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")"; + }); + return None; } int TotalSecondaryCost = 0; @@ -381,23 +415,23 @@ static bool shouldInline(CallSite CS, DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", Call) << "Not inlining. Cost of inlining " << NV("Callee", Callee) << " increases the cost of inlining " << NV("Caller", Caller) - << " in other contexts"); - return false; + << " in other contexts"; + }); + + // IC does not bool() to false, so get an InlineCost that will. + // This will not be inspected to make an error message. + return None; } DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() - << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", thres=" << IC.getThreshold() << ", Call: " << *CS.getInstruction() << '\n'); - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call) - << NV("Callee", Callee) << " can be inlined into " - << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) - << " (threshold=" - << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); - return true; + return IC; } /// Return true if the specified inline history ID @@ -475,11 +509,14 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (Function *Callee = CS.getCalledFunction()) if (Callee->isDeclaration()) { using namespace ore; - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) + + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CS.getCaller()) << " because its definition is unavailable" - << setIsVerbose()); + << setIsVerbose(); + }); continue; } @@ -545,9 +582,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // just become a regular analysis dependency. OptimizationRemarkEmitter ORE(Caller); + Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); // If the policy determines that we should inline this function, // delete the call instead. - if (!shouldInline(CS, GetInlineCost, ORE)) + if (!OIC) continue; // If this call site is dead and it is to a readonly function, we should @@ -562,26 +600,40 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, ++NumCallsDeleted; } else { // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = Instr->getDebugLoc(); + DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); // Attempt to inline the function. using namespace ore; + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats)) { - ORE.emit( - OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, + Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller); + }); continue; } ++NumInlined; - // Report the inline decision. - ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block) - << NV("Callee", Callee) << " inlined into " - << NV("Caller", Caller)); + ORE.emit([&]() { + bool AlwaysInline = OIC->isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); + R << NV("Callee", Callee) << " inlined into "; + R << NV("Caller", Caller); + if (AlwaysInline) + R << " with cost=always"; + else { + R << " with cost=" << NV("Cost", OIC->getCost()); + R << " (threshold=" << NV("Threshold", OIC->getThreshold()); + R << ")"; + } + return R; + }); // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. @@ -601,7 +653,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && - // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. @@ -840,6 +891,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) .getManager(); + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); @@ -852,12 +907,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, - PSI); + PSI, &ORE); }; - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. @@ -872,8 +924,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) continue; + // Check if this inlining may repeat breaking an SCC apart that has + // already been split once before. In that case, inlining here may + // trigger infinite inlining, much like is prevented within the inliner + // itself by the InlineHistory above, but spread across CGSCC iterations + // and thus hidden from the full inline history. + if (CG.lookupSCC(*CG.lookup(Callee)) == C && + UR.InlinedInternalEdges.count({&N, C})) { + DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " + "previously split out of this SCC by inlining: " + << F.getName() << " -> " << Callee.getName() << "\n"); + continue; + } + + Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); // Check whether we want to inline this callsite. - if (!shouldInline(CS, GetInlineCost, ORE)) + if (!OIC) continue; // Setup the data structure used to plumb customization into the @@ -883,11 +949,39 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), &FAM.getResult<BlockFrequencyAnalysis>(Callee)); - if (!InlineFunction(CS, IFI)) + // Get DebugLoc to report. CS will be invalid after Inliner. + DebugLoc DLoc = CS->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + + using namespace ore; + + if (!InlineFunction(CS, IFI)) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", &Callee) << " will not be inlined into " + << NV("Caller", &F); + }); continue; + } DidInline = true; InlinedCallees.insert(&Callee); + ORE.emit([&]() { + bool AlwaysInline = OIC->isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); + R << NV("Callee", &Callee) << " inlined into "; + R << NV("Caller", &F); + if (AlwaysInline) + R << " with cost=always"; + else { + R << " with cost=" << NV("Cost", OIC->getCost()); + R << " (threshold=" << NV("Threshold", OIC->getThreshold()); + R << ")"; + } + return R; + }); + // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); @@ -949,17 +1043,38 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, for (LazyCallGraph::Edge &E : *CalleeN) RC->insertTrivialRefEdge(N, E.getNode()); } - InlinedCallees.clear(); // At this point, since we have made changes we have at least removed // a call instruction. However, in the process we do some incremental // simplification of the surrounding code. This simplification can // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the - // change.. + // change. + LazyCallGraph::SCC *OldC = C; C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); + + // If this causes an SCC to split apart into multiple smaller SCCs, there + // is a subtle risk we need to prepare for. Other transformations may + // expose an "infinite inlining" opportunity later, and because of the SCC + // mutation, we will revisit this function and potentially re-inline. If we + // do, and that re-inlining also has the potentially to mutate the SCC + // structure, the infinite inlining problem can manifest through infinite + // SCC splits and merges. To avoid this, we capture the originating caller + // node and the SCC containing the call edge. This is a slight over + // approximation of the possible inlining decisions that must be avoided, + // but is relatively efficient to store. + // FIXME: This seems like a very heavyweight way of retaining the inline + // history, we should look for a more efficient way of tracking it. + if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) { + return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; + })) { + DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " + "retaining this to avoid infinite inlining.\n"); + UR.InlinedInternalEdges.insert({&N, OldC}); + } + InlinedCallees.clear(); } // Now that we've finished inlining all of the calls across this SCC, delete @@ -976,8 +1091,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) .getManager(); - FAM.clear(*DeadF); - AM.clear(DeadC); + FAM.clear(*DeadF, DeadF->getName()); + AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); CG.removeDeadFunction(*DeadF); |
