diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Passes | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Passes')
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 639 | ||||
-rw-r--r-- | llvm/lib/Passes/PassRegistry.def | 46 | ||||
-rw-r--r-- | llvm/lib/Passes/StandardInstrumentations.cpp | 20 |
3 files changed, 516 insertions, 189 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 53b7db8689c4a..4db7bebcb77ce 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -33,11 +33,15 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/Analysis/InlineFeaturesAnalysis.h" +#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -49,13 +53,11 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/StackLifetime.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PreISelIntrinsicLowering.h" -#include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" @@ -67,6 +69,10 @@ #include "llvm/Support/Regex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/Coroutines/CoroCleanup.h" +#include "llvm/Transforms/Coroutines/CoroEarly.h" +#include "llvm/Transforms/Coroutines/CoroElide.h" +#include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" @@ -87,6 +93,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" +#include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" @@ -169,8 +176,10 @@ #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LCSSA.h" @@ -183,6 +192,7 @@ #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; @@ -207,6 +217,16 @@ static cl::opt<bool> EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt<InliningAdvisorMode> UseInlineAdvisor( + "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, + cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), + cl::values(clEnumValN(InliningAdvisorMode::Default, "default", + "Heuristics-based inliner version."), + clEnumValN(InliningAdvisorMode::Development, "development", + "Use development mode (runtime-loadable model)."), + clEnumValN(InliningAdvisorMode::Release, "release", + "Use release mode (AOT-compiled model)."))); + static cl::opt<bool> EnableGVNSink( "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -229,14 +249,22 @@ static cl::opt<bool> EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +/// Flag to enable inline deferral during PGO. +static cl::opt<bool> + EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), + cl::Hidden, + cl::desc("Enable inline deferral during PGO")); + PipelineTuningOptions::PipelineTuningOptions() { - LoopInterleaving = EnableLoopInterleaving; - LoopVectorization = EnableLoopVectorization; - SLPVectorization = RunSLPVectorization; + LoopInterleaving = true; + LoopVectorization = true; + SLPVectorization = false; LoopUnrolling = true; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + Coroutines = false; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; + CallGraphProfile = true; } extern cl::opt<bool> EnableHotColdSplit; @@ -244,28 +272,40 @@ extern cl::opt<bool> EnableOrderFileInstrumentation; extern cl::opt<bool> FlattenedProfileUsed; -static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { - switch (Level) { - case PassBuilder::O0: - case PassBuilder::O1: - case PassBuilder::O2: - case PassBuilder::O3: - return false; - - case PassBuilder::Os: - case PassBuilder::Oz: - return true; - } - llvm_unreachable("Invalid optimization level!"); -} +extern cl::opt<AttributorRunOption> AttributorRun; +extern cl::opt<bool> EnableKnowledgeRetention; + +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = { + /*SpeedLevel*/ 0, + /*SizeLevel*/ 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = { + /*SpeedLevel*/ 1, + /*SizeLevel*/ 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = { + /*SpeedLevel*/ 2, + /*SizeLevel*/ 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = { + /*SpeedLevel*/ 3, + /*SizeLevel*/ 0}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = { + /*SpeedLevel*/ 2, + /*SizeLevel*/ 1}; +const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = { + /*SpeedLevel*/ 2, + /*SizeLevel*/ 2}; namespace { +// The following passes/analyses have custom names, otherwise their name will +// include `(anonymous namespace)`. These are special since they are only for +// testing purposes and don't live in a header file. + /// No-op module pass which does nothing. -struct NoOpModulePass { +struct NoOpModulePass : PassInfoMixin<NoOpModulePass> { PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); } + static StringRef name() { return "NoOpModulePass"; } }; @@ -281,7 +321,7 @@ public: }; /// No-op CGSCC pass which does nothing. -struct NoOpCGSCCPass { +struct NoOpCGSCCPass : PassInfoMixin<NoOpCGSCCPass> { PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &, LazyCallGraph &, CGSCCUpdateResult &UR) { return PreservedAnalyses::all(); @@ -303,7 +343,7 @@ public: }; /// No-op function pass which does nothing. -struct NoOpFunctionPass { +struct NoOpFunctionPass : PassInfoMixin<NoOpFunctionPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &) { return PreservedAnalyses::all(); } @@ -322,7 +362,7 @@ public: }; /// No-op loop pass which does nothing. -struct NoOpLoopPass { +struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> { PreservedAnalyses run(Loop &L, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &) { return PreservedAnalyses::all(); @@ -348,7 +388,7 @@ AnalysisKey NoOpCGSCCAnalysis::Key; AnalysisKey NoOpFunctionAnalysis::Key; AnalysisKey NoOpLoopAnalysis::Key; -} // End anonymous namespace. +} // namespace void PassBuilder::invokePeepholeEPCallbacks( FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { @@ -392,11 +432,138 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { C(LAM); } +// TODO: Investigate the cost/benefit of tail call elimination on debugging. +FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { + + FunctionPassManager FPM(DebugLogging); + + // Form SSA out of local memory accesses after breaking apart aggregates into + // scalars. + FPM.addPass(SROA()); + + // Catch trivial redundancies + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + + // Hoisting of scalars and load expressions. + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + + FPM.addPass(LibCallsShrinkWrapPass()); + + invokePeepholeEPCallbacks(FPM, Level); + + FPM.addPass(SimplifyCFGPass()); + + // Form canonically associated expression trees, and simplify the trees using + // basic mathematical properties. For example, this will form (nearly) + // minimal multiplication trees. + FPM.addPass(ReassociatePass()); + + // Add the primary loop simplification pipeline. + // FIXME: Currently this is split into two loop pass pipelines because we run + // some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to + // fully replace `SimplifyCFGPass`, and the closest to the other we have is + // `LoopInstSimplify`. + LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + + LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true)); + // TODO: Investigate promotion cap for O1. + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(SimpleLoopUnswitchPass()); + LPM2.addPass(IndVarSimplifyPass()); + LPM2.addPass(LoopIdiomRecognizePass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + + LPM2.addPass(LoopDeletionPass()); + // Do not enable unrolling in PreLinkThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. The normal unroller doesn't pay attention to forced full unroll + // attributes so we need to make sure and allow the full unroll pass to pay + // attention to it. + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); + + // We provide the opt remark emitter pass for LICM to use. We only need to do + // this once as it is immutable. + FPM.addPass( + RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); + + // Delete small array after loop unroll. + FPM.addPass(SROA()); + + // Specially optimize memory movement as it doesn't look like dataflow in SSA. + FPM.addPass(MemCpyOptPass()); + + // Sparse conditional constant propagation. + // FIXME: It isn't clear why we do this *after* loop passes rather than + // before... + FPM.addPass(SCCPPass()); + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + FPM.addPass(BDCEPass()); + + // Run instcombine after redundancy and dead bit elimination to exploit + // opportunities opened up by them. + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + + if (PTO.Coroutines) + FPM.addPass(CoroElidePass()); + + for (auto &C : ScalarOptimizerLateEPCallbacks) + C(FPM, Level); + + // Finally, do an expensive DCE pass to catch all the dead code exposed by + // the simplifications and basic cleanup after all the simplifications. + // TODO: Investigate if this is too expensive. + FPM.addPass(ADCEPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + + return FPM; +} + FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations!"); + assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); + + // The O1 pipeline has a separate pipeline creation function to simplify + // construction readability. + if (Level.getSpeedupLevel() == 1) + return buildO1FunctionSimplificationPipeline(Level, Phase, DebugLogging); + FunctionPassManager FPM(DebugLogging); // Form SSA out of local memory accesses after breaking apart aggregates into @@ -405,33 +572,32 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + if (EnableKnowledgeRetention) + FPM.addPass(AssumeSimplifyPass()); // Hoisting of scalars and load expressions. - if (Level > O1) { - if (EnableGVNHoist) - FPM.addPass(GVNHoistPass()); - - // Global value numbering based sinking. - if (EnableGVNSink) { - FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); - } + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); } // Speculative execution if the target has divergent branches; otherwise nop. - if (Level > O1) { - FPM.addPass(SpeculativeExecutionPass()); + FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); + + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); - // Optimize based on known information about branches, and cleanup afterward. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - } FPM.addPass(SimplifyCFGPass()); - if (Level == O3) + if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); FPM.addPass(InstCombinePass()); - if (!isOptimizingForSize(Level)) + if (!Level.isOptimizingForSize()) FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); @@ -439,12 +605,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level) && Level > O1) + !Level.isOptimizingForSize()) FPM.addPass(PGOMemOPSizeOpt()); - // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (Level > O1) - FPM.addPass(TailCallElimPass()); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); // Form canonically associated expression trees, and simplify the trees using @@ -470,7 +634,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopSimplifyCFGPass()); // Rotate Loop - disable header duplication at -Oz - LPM1.addPass(LoopRotatePass(Level != Oz)); + LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); @@ -483,11 +647,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM2.addPass(LoopDeletionPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile - // inaccurate. - if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) && - PTO.LoopUnrolling) - LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, + // inaccurate. The normal unroller doesn't pay attention to forced full unroll + // attributes so we need to make sure and allow the full unroll pass to pay + // attention to it. + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) @@ -495,7 +661,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. - FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); + FPM.addPass( + RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); FPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); FPM.addPass(SimplifyCFGPass()); @@ -510,14 +677,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SROA()); // Eliminate redundancies. - if (Level != O1) { - // These passes add substantial compile time so skip them at O1. - FPM.addPass(MergedLoadStoreMotionPass()); - if (RunNewGVN) - FPM.addPass(NewGVNPass()); - else - FPM.addPass(GVN()); - } + FPM.addPass(MergedLoadStoreMotionPass()); + if (RunNewGVN) + FPM.addPass(NewGVNPass()); + else + FPM.addPass(GVN()); // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); @@ -539,14 +703,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - if (Level > O1) { - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - EnableMSSALoopDependency, DebugLogging)); - } + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + EnableMSSALoopDependency, DebugLogging)); + + if (PTO.Coroutines) + FPM.addPass(CoroElidePass()); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -559,7 +724,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); - if (EnableCHR && Level == O3 && PGOOpt && + if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); @@ -572,13 +737,13 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { - assert(Level != O0 && "Not expecting O0 here!"); + assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification // at -Os/Oz. We will not do this inline for context sensistive PGO (when // IsCS is true). - if (!isOptimizingForSize(Level) && !IsCS) { + if (!Level.isOptimizingForSize() && !IsCS) { InlineParams IP; IP.DefaultThreshold = PreInlineThreshold; @@ -587,10 +752,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, // This should probably be lowered after performance testing. // FIXME: this comment is cargo culted from the old pass manager, revisit). IP.HintThreshold = 325; - - CGSCCPassManager CGPipeline(DebugLogging); - - CGPipeline.addPass(InlinerPass(IP)); + ModuleInlinerWrapperPass MIWP(IP, DebugLogging); + CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; FPM.addPass(SROA()); @@ -601,7 +764,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + MPM.addPass(std::move(MIWP)); // Delete anything that is now dead to make sure that we don't instrument // dead code. Instrumentation can end up keeping dead code around and @@ -663,16 +826,74 @@ void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, static InlineParams getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { - auto O3 = PassBuilder::O3; - unsigned OptLevel = Level > O3 ? 2 : Level; - unsigned SizeLevel = Level > O3 ? Level - O3 : 0; - return getInlineParams(OptLevel, SizeLevel); + return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } -ModulePassManager -PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { +ModuleInlinerWrapperPass +PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase, + bool DebugLogging) { + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) + IP.HotCallSiteThreshold = 0; + + if (PGOOpt) + IP.EnableDeferral = EnablePGOInlineDeferral; + + ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor, + MaxDevirtIterations); + + // Require the GlobalsAA analysis for the module so we can query it within + // the CGSCC pipeline. + MIWP.addRequiredModuleAnalysis<GlobalsAA>(); + + // Require the ProfileSummaryAnalysis for the module so we can query it within + // the inliner pass. + MIWP.addRequiredModuleAnalysis<ProfileSummaryAnalysis>(); + + // Now begin the main postorder CGSCC pipeline. + // FIXME: The current CGSCC pipeline has its origins in the legacy pass + // manager and trying to emulate its precise behavior. Much of this doesn't + // make a lot of sense and we should revisit the core CGSCC structure. + CGSCCPassManager &MainCGPipeline = MIWP.getPM(); + + // Note: historically, the PruneEH pass was run first to deduce nounwind and + // generally clean up exception handling overhead. It isn't clear this is + // valuable as the inliner doesn't currently care whether it is inlining an + // invoke or a call. + + if (AttributorRun & AttributorRunOption::CGSCC) + MainCGPipeline.addPass(AttributorCGSCCPass()); + + if (PTO.Coroutines) + MainCGPipeline.addPass(CoroSplitPass()); + + // Now deduce any function attributes based in the current code. + MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); + + // When at O3 add argument promotion to the pass pipeline. + // FIXME: It isn't at all clear why this should be limited to O3. + if (Level == OptimizationLevel::O3) + MainCGPipeline.addPass(ArgumentPromotionPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) + MainCGPipeline.addPass(OpenMPOptPass()); + + // Lastly, add the core function simplification pipeline nested inside the + // CGSCC walk. + MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); + + return MIWP; +} + +ModulePassManager PassBuilder::buildModuleSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { ModulePassManager MPM(DebugLogging); bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); @@ -712,7 +933,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - if (Level == O3) + if (PTO.Coroutines) + EarlyFPM.addPass(CoroEarlyPass()); + if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); // In SamplePGO ThinLTO backend, we need instcombine before profile annotation @@ -745,6 +968,15 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, true /* SamplePGO */)); } + if (AttributorRun & AttributorRunOption::MODULE) + MPM.addPass(AttributorPass()); + + // Lower type metadata and the type.test intrinsic in the ThinLTO + // post link pipeline after ICP. This is to enable usage of the type + // tests in ICP sequences. + if (Phase == ThinLTOPhase::PostLink) + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); + // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -765,7 +997,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // constants. MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); - // Remove any dead arguments exposed by cleanups and constand folding + // Remove any dead arguments exposed by cleanups and constant folding // globals. MPM.addPass(DeadArgumentEliminationPass()); @@ -796,61 +1028,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); - // Require the GlobalsAA analysis for the module so we can query it within - // the CGSCC pipeline. - MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); - - // Require the ProfileSummaryAnalysis for the module so we can query it within - // the inliner pass. - MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); - - // Now begin the main postorder CGSCC pipeline. - // FIXME: The current CGSCC pipeline has its origins in the legacy pass - // manager and trying to emulate its precise behavior. Much of this doesn't - // make a lot of sense and we should revisit the core CGSCC structure. - CGSCCPassManager MainCGPipeline(DebugLogging); - - // Note: historically, the PruneEH pass was run first to deduce nounwind and - // generally clean up exception handling overhead. It isn't clear this is - // valuable as the inliner doesn't currently care whether it is inlining an - // invoke or a call. - - // Run the inliner first. The theory is that we are walking bottom-up and so - // the callees have already been fully optimized, and we want to inline them - // into the callers so that our optimizations can reflect that. - // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO - // because it makes profile annotation in the backend inaccurate. - InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && PGOOpt && - PGOOpt->Action == PGOOptions::SampleUse) - IP.HotCallSiteThreshold = 0; - MainCGPipeline.addPass(InlinerPass(IP)); - - // Now deduce any function attributes based in the current code. - MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); - - // When at O3 add argument promotion to the pass pipeline. - // FIXME: It isn't at all clear why this should be limited to O3. - if (Level == O3) - MainCGPipeline.addPass(ArgumentPromotionPass()); - - // Lastly, add the core function simplification pipeline nested inside the - // CGSCC walk. - MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); - - for (auto &C : CGSCCOptimizerLateEPCallbacks) - C(MainCGPipeline, Level); - - // We wrap the CGSCC pipeline in a devirtualization repeater. This will try - // to detect when we devirtualize indirect calls and iterate the SCC passes - // in that case to try and catch knock-on inlining or function attrs - // opportunities. Then we add it to the module pipeline by walking the SCCs - // in postorder (or bottom-up). - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( - std::move(MainCGPipeline), MaxDevirtIterations))); - + MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging)); return MPM; } @@ -935,6 +1113,10 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // llvm.loop.distribute=true or when -enable-loop-distribute is specified. OptimizePM.addPass(LoopDistributePass()); + // Populates the VFABI attribute with the scalar-to-vector mappings + // from the TargetLibraryInfo. + OptimizePM.addPass(InjectTLIMappings()); + // Now run the core loop vectorizer. OptimizePM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); @@ -965,6 +1147,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( if (PTO.SLPVectorization) OptimizePM.addPass(SLPVectorizerPass()); + // Enhance/cleanup vector code. + OptimizePM.addPass(VectorCombinePass()); OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel @@ -975,11 +1159,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - OptimizePM.addPass(LoopUnrollAndJamPass(Level)); + OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel())); } - OptimizePM.addPass(LoopUnrollPass( - LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions( + Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); @@ -1020,13 +1204,17 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // inserting redundancies into the program. This even includes SimplifyCFG. OptimizePM.addPass(SpeculateAroundPHIsPass()); - for (auto &C : OptimizerLastEPCallbacks) - C(OptimizePM, Level); + if (PTO.Coroutines) + OptimizePM.addPass(CoroCleanupPass()); // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); - MPM.addPass(CGProfilePass()); + for (auto &C : OptimizerLastEPCallbacks) + C(MPM, Level); + + if (PTO.CallGraphProfile) + MPM.addPass(CGProfilePass()); // Now we need to do some global optimization transforms. // FIXME: It would seem like these should come first in the optimization @@ -1041,7 +1229,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1068,7 +1257,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -1101,6 +1291,12 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, // Reduce the size of the IR as much as possible. MPM.addPass(GlobalOptPass()); + // Module simplification splits coroutines, but does not fully clean up + // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up + // on these, we schedule the cleanup here. + if (PTO.Coroutines) + MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + return MPM; } @@ -1129,7 +1325,7 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); } - if (Level == O0) + if (Level == OptimizationLevel::O0) return MPM; // Force any function attributes we want the rest of the pipeline to observe. @@ -1148,10 +1344,11 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); + assert(Level != OptimizationLevel::O0 && + "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, DebugLogging, - /* LTOPreLink */true); + /* LTOPreLink */ true); } ModulePassManager @@ -1159,11 +1356,14 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, ModuleSummaryIndex *ExportSummary) { ModulePassManager MPM(DebugLogging); - if (Level == O0) { + if (Level == OptimizationLevel::O0) { // The WPD and LowerTypeTest passes need to run at -O0 to lower type // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP. + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1188,7 +1388,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); - if (Level > 1) { + if (Level.getSpeedupLevel() > 1) { FunctionPassManager EarlyFPM(DebugLogging); EarlyFPM.addPass(CallSiteSplittingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); @@ -1202,11 +1402,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. - MPM.addPass(IPSCCPPass()); + MPM.addPass(IPSCCPPass()); - // Attach metadata to indirect call sites indicating the set of functions - // they may target at run-time. This should follow IPSCCP. - MPM.addPass(CalledValuePropagationPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. @@ -1226,10 +1426,14 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. - if (Level == 1) { + if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO + // pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); return MPM; } @@ -1251,7 +1455,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM(DebugLogging); - if (Level == O3) + if (Level == OptimizationLevel::O3) PeepholeFPM.addPass(AggressiveInstCombinePass()); PeepholeFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); @@ -1263,8 +1467,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. // Run the inliner now. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - InlinerPass(getInlineParamsFromOptLevel(Level)))); + MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level), + DebugLogging)); // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); @@ -1357,6 +1561,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). @@ -1635,6 +1842,49 @@ Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) { } return Result; } + +Expected<GVNOptions> parseGVNOptions(StringRef Params) { + GVNOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "pre") { + Result.setPRE(Enable); + } else if (ParamName == "load-pre") { + Result.setLoadPRE(Enable); + } else if (ParamName == "memdep") { + Result.setMemDep(Enable); + } else { + return make_error<StringError>( + formatv("invalid GVN pass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + +Expected<StackLifetime::LivenessType> +parseStackLifetimeOptions(StringRef Params) { + StackLifetime::LivenessType Result = StackLifetime::LivenessType::May; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName == "may") { + Result = StackLifetime::LivenessType::May; + } else if (ParamName == "must") { + Result = StackLifetime::LivenessType::Must; + } else { + return make_error<StringError>( + formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1887,13 +2137,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); - if (L == O0) { + .Case("O0", OptimizationLevel::O0) + .Case("O1", OptimizationLevel::O1) + .Case("O2", OptimizationLevel::O2) + .Case("O3", OptimizationLevel::O3) + .Case("Os", OptimizationLevel::Os) + .Case("Oz", OptimizationLevel::Oz); + if (L == OptimizationLevel::O0) { // Add instrumentation PGO passes -- at O0 we can still do PGO. if (PGOOpt && Matches[1] != "thinlto" && (PGOOpt->Action == PGOOptions::IRInstr || @@ -1903,6 +2153,20 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); + + // For IR that makes use of coroutines intrinsics, coroutine passes must + // be run, even at -O0. + if (PTO.Coroutines) { + MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); + + CGSCCPassManager CGPM(DebugLogging); + CGPM.addPass(CoroSplitPass()); + CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass())); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + + MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + } + // Do nothing else at all! return Error::success(); } @@ -1910,8 +2174,10 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, // This is consistent with old pass manager invoked via opt, but // inconsistent with clang. Clang doesn't enable loop vectorization // but does enable slp vectorization at Oz. - PTO.LoopVectorization = L > O1 && L < Oz; - PTO.SLPVectorization = L > O1 && L < Oz; + PTO.LoopVectorization = + L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; + PTO.SLPVectorization = + L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); @@ -2408,3 +2674,28 @@ Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { return Error::success(); } + +bool PassBuilder::isAAPassName(StringRef PassName) { +#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#include "PassRegistry.def" + return false; +} + +bool PassBuilder::isAnalysisPassName(StringRef PassName) { +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#define CGSSC_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; +#include "PassRegistry.def" + return false; +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 355dd6f968122..dfdfc3d05976a 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -27,6 +27,7 @@ MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) +MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ @@ -57,6 +58,7 @@ MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false)) MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true)) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) +MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfiling()) @@ -71,7 +73,6 @@ MODULE_PASS("partial-inliner", PartialInlinerPass()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) -MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass()) MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs())) MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs())) MODULE_PASS("print", PrintModulePass(dbgs())) @@ -82,6 +83,8 @@ MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass()) MODULE_PASS("sample-profile", SampleProfileLoaderPass()) +MODULE_PASS("scc-oz-module-inliner", + buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging)) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) @@ -108,7 +111,10 @@ CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) CGSCC_PASS("argpromotion", ArgumentPromotionPass()) CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) +CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("inline", InlinerPass()) +CGSCC_PASS("openmpopt", OpenMPOptPass()) +CGSCC_PASS("coro-split", CoroSplitPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS @@ -126,6 +132,8 @@ FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) +FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis()) +FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis()) FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis()) FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) @@ -160,6 +168,8 @@ FUNCTION_PASS("aa-eval", AAEvaluator()) FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass()) FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass()) +FUNCTION_PASS("assume-builder", AssumeBuilderPass()) +FUNCTION_PASS("assume-simplify", AssumeSimplifyPass()) FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) @@ -167,6 +177,9 @@ FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) +FUNCTION_PASS("coro-early", CoroEarlyPass()) +FUNCTION_PASS("coro-elide", CoroElidePass()) +FUNCTION_PASS("coro-cleanup", CoroCleanupPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) @@ -182,6 +195,7 @@ FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) @@ -193,10 +207,10 @@ FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass()) FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) -FUNCTION_PASS("gvn", GVN()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) +FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) @@ -208,7 +222,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) -FUNCTION_PASS("loop-fuse", LoopFusePass()) +FUNCTION_PASS("loop-fusion", LoopFusePass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) @@ -220,23 +234,25 @@ FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs())) +FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs())) FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs())) FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs())) FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs())) FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs())) +FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("sccp", SCCPPass()) +FUNCTION_PASS("simplifycfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) -FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) -FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass()) +FUNCTION_PASS("vector-combine", VectorCombinePass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass()) FUNCTION_PASS("verify<loops>", LoopVerifierPass()) @@ -257,7 +273,7 @@ FUNCTION_PASS("tsan", ThreadSanitizerPass()) #ifndef FUNCTION_PASS_WITH_PARAMS #define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) #endif -FUNCTION_PASS_WITH_PARAMS("unroll", +FUNCTION_PASS_WITH_PARAMS("loop-unroll", [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); }, @@ -282,6 +298,16 @@ FUNCTION_PASS_WITH_PARAMS("mldst-motion", return MergedLoadStoreMotionPass(Opts); }, parseMergedLoadStoreMotionOptions) +FUNCTION_PASS_WITH_PARAMS("gvn", + [](GVNOptions Opts) { + return GVN(Opts); + }, + parseGVNOptions) +FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>", + [](StackLifetime::LivenessType Type) { + return StackLifetimePrinterPass(dbgs(), Type); + }, + parseStackLifetimeOptions) #undef FUNCTION_PASS_WITH_PARAMS #ifndef LOOP_ANALYSIS @@ -297,6 +323,7 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #ifndef LOOP_PASS #define LOOP_PASS(NAME, CREATE_PASS) #endif +LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) @@ -306,16 +333,17 @@ LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(dbgs())) LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass()) -LOOP_PASS("strength-reduce", LoopStrengthReducePass()) +LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) -LOOP_PASS("irce", IRCEPass()) -LOOP_PASS("unroll-full", LoopFullUnrollPass()) +LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs())) +LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs())) LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("guard-widening", GuardWideningPass()) +LOOP_PASS("simple-loop-unswitch", SimpleLoopUnswitchPass()) #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 5cf0ca8e28f69..1e1a6b98a65a3 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -70,16 +70,24 @@ Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) { llvm_unreachable("Unknown IR unit"); } -void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) { - dbgs() << Banner << Extra << "\n"; - M->print(dbgs(), nullptr, false); -} void printIR(const Function *F, StringRef Banner, StringRef Extra = StringRef()) { if (!llvm::isFunctionInPrintList(F->getName())) return; dbgs() << Banner << Extra << "\n" << static_cast<const Value &>(*F); } + +void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) { + if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { + dbgs() << Banner << Extra << "\n"; + M->print(dbgs(), nullptr, false); + } else { + for (const auto &F : M->functions()) { + printIR(&F, Banner, Extra); + } + } +} + void printIR(const LazyCallGraph::SCC *C, StringRef Banner, StringRef Extra = StringRef()) { bool BannerPrinted = false; @@ -98,7 +106,7 @@ void printIR(const Loop *L, StringRef Banner) { const Function *F = L->getHeader()->getParent(); if (!llvm::isFunctionInPrintList(F->getName())) return; - llvm::printLoop(const_cast<Loop &>(*L), dbgs(), Banner); + llvm::printLoop(const_cast<Loop &>(*L), dbgs(), std::string(Banner)); } /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into @@ -127,7 +135,7 @@ void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) { if (any_isa<const LazyCallGraph::SCC *>(IR)) { const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR); assert(C && "scc should be valid for printing"); - std::string Extra = formatv(" (scc: {0})", C->getName()); + std::string Extra = std::string(formatv(" (scc: {0})", C->getName())); printIR(C, Banner, Extra); return; } |