diff options
Diffstat (limited to 'lib/Passes/PassBuilder.cpp')
| -rw-r--r-- | lib/Passes/PassBuilder.cpp | 169 |
1 files changed, 117 insertions, 52 deletions
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 9e0cf27aa17b..cbae16a04ca6 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" @@ -41,7 +40,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" @@ -63,6 +62,7 @@ #include "llvm/Transforms/GCOVProfiler.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" @@ -83,15 +83,18 @@ #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/SampleProfile.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -123,28 +126,29 @@ #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" -#include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/SimplifyInstructions.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" -#include <type_traits> using namespace llvm; @@ -325,8 +329,8 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging, - bool PrepareForThinLTO) { + ThinLTOPhase Phase, + bool DebugLogging) { assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -361,6 +365,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(FPM, Level); + // For PGO use pipeline, try to optimize memory intrinsics such as memcpy + // using the size value profile. Don't perform this when optimizing for size. + if (PGOOpt && !PGOOpt->ProfileUseFile.empty() && + !isOptimizingForSize(Level)) + FPM.addPass(PGOMemOPSizeOpt()); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -389,11 +399,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, C(LPM2, Level); LPM2.addPass(LoopDeletionPass()); - // Do not enable unrolling in PrepareForThinLTO phase during sample PGO + // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. - if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty()) - LPM2.addPass(LoopUnrollPass::createFull(Level)); + if (Phase != ThinLTOPhase::PreLink || + !PGOOpt || PGOOpt->SampleProfileFile.empty()) + LPM2.addPass(LoopFullUnrollPass(Level)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); @@ -524,8 +535,8 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { ModulePassManager PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging, - bool PrepareForThinLTO) { + ThinLTOPhase Phase, + bool DebugLogging) { ModulePassManager MPM(DebugLogging); // Do basic inference of function attributes from known properties of system @@ -539,14 +550,46 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); + if (Level == O3) + EarlyFPM.addPass(CallSiteSplittingPass()); + + // In SamplePGO ThinLTO backend, we need instcombine before profile annotation + // to convert bitcast to direct calls so that they can be inlined during the + // profile annotation prepration step. + // More details about SamplePGO design can be found in: + // https://research.google.com/pubs/pub45290.html + // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. + if (PGOOpt && !PGOOpt->SampleProfileFile.empty() && + Phase == ThinLTOPhase::PostLink) + EarlyFPM.addPass(InstCombinePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + // Annotate sample profile right after early FPM to ensure freshness of + // the debug info. + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + Phase == ThinLTOPhase::PreLink)); + // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard + // for the profile annotation to be accurate in the ThinLTO backend. + if (Phase != ThinLTOPhase::PreLink) + // We perform early indirect call promotion here, before globalopt. + // This is important for the ThinLTO backend phase because otherwise + // imported available_externally functions look unreferenced and are + // removed. + MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, + true)); + } + // Interprocedural constant propagation now that basic cleanup has occured // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in // years, it should be re-analyzed. MPM.addPass(IPSCCPPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); @@ -570,22 +613,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO, if requested. - if (PGOOpt) { - assert(PGOOpt->RunProfileGen || !PGOOpt->SampleProfileFile.empty() || - !PGOOpt->ProfileUseFile.empty()); - if (PGOOpt->SampleProfileFile.empty()) - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); - else - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); - - // Indirect call promotion that promotes intra-module targes only. - // Do not enable it in PrepareForThinLTO phase during sample PGO because - // it changes IR to makes profile annotation in back compile inaccurate. - if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) - MPM.addPass(PGOIndirectCallPromotion( - false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + // Add all the requested passes for instrumentation PGO, if requested. + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { + addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); + MPM.addPass(PGOIndirectCallPromotion(false, false)); } // Require the GlobalsAA analysis for the module so we can query it within @@ -610,10 +643,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. - // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO + // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. InlineParams IP = getInlineParamsFromOptLevel(Level); - if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty()) + if (Phase == ThinLTOPhase::PreLink && + PGOOpt && !PGOOpt->SampleProfileFile.empty()) IP.HotCallSiteThreshold = 0; MainCGPipeline.addPass(InlinerPass(IP)); @@ -628,8 +662,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, DebugLogging, - PrepareForThinLTO))); + buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); @@ -641,7 +674,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // in postorder (or bottom-up). MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( - std::move(MainCGPipeline), MaxDevirtIterations, DebugLogging))); + std::move(MainCGPipeline), MaxDevirtIterations))); return MPM; } @@ -653,6 +686,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Optimize globals now that the module is fully simplified. MPM.addPass(GlobalOptPass()); + MPM.addPass(GlobalDCEPass()); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -714,16 +748,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); - // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). + forwardSwitchCondToPhi(true). + convertSwitchToLookupTable(true). + needCanonicalLoops(false). + sinkCommonInsts(true))); + // Optimize parallel scalar instruction chains into SIMD instructions. OptimizePM.addPass(SLPVectorizerPass()); - // Cleanup after all of the vectorizers. - OptimizePM.addPass(SimplifyCFGPass()); OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel @@ -732,7 +774,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level))); + OptimizePM.addPass(LoopUnrollPass(Level)); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); @@ -750,10 +792,20 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifierPass()); + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + OptimizePM.addPass(DivRemPairsPass()); + // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. OptimizePM.addPass(SimplifyCFGPass()); + // Optimize PHIs by speculating around them when profitable. Note that this + // pass needs to be run after any PRE or similar pass as it is essentially + // inserting redudnancies into the progrem. This even includes SimplifyCFG. + OptimizePM.addPass(SpeculateAroundPHIsPass()); + // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); @@ -777,9 +829,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); + if (PGOOpt && PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, - /*PrepareForThinLTO=*/false)); + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, + DebugLogging)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -797,11 +852,14 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); + if (PGOOpt && PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, - /*PrepareForThinLTO=*/true)); + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink, + DebugLogging)); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -834,13 +892,15 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, // During the ThinLTO backend phase we perform early indirect call promotion // here, before globalopt. Otherwise imported available_externally functions // look unreferenced and are removed. - MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty() && - !PGOOpt->ProfileUseFile.empty())); + // FIXME: move this into buildModuleSimplificationPipeline to merge the logic + // with SamplePGO. + if (!PGOOpt || PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + false /* SamplePGO */)); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, - /*PrepareForThinLTO=*/false)); + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink, + DebugLogging)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -873,17 +933,24 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(InferFunctionAttrsPass()); if (Level > 1) { + FunctionPassManager EarlyFPM(DebugLogging); + EarlyFPM.addPass(CallSiteSplittingPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); - // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. @@ -1277,8 +1344,7 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, if (!parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass, DebugLogging)) return false; - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM), - DebugLogging)); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); return true; } if (Name == "function") { @@ -1388,8 +1454,7 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, DebugLogging)) return false; // Add the nested pass manager with the appropriate adaptor. - CGPM.addPass( - createCGSCCToFunctionPassAdaptor(std::move(FPM), DebugLogging)); + CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); return true; } if (auto Count = parseRepeatPassName(Name)) { @@ -1405,8 +1470,8 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass, DebugLogging)) return false; - CGPM.addPass(createDevirtSCCRepeatedPass(std::move(NestedCGPM), - *MaxRepetitions, DebugLogging)); + CGPM.addPass( + createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions)); return true; } |
