diff options
Diffstat (limited to 'lib/Passes/PassBuilder.cpp')
-rw-r--r-- | lib/Passes/PassBuilder.cpp | 155 |
1 files changed, 108 insertions, 47 deletions
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index cbae16a04ca6..eb04dcc8b6ef 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" @@ -59,7 +60,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Regex.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/GCOVProfiler.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/Instrumentation/CGProfile.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/CalledValuePropagation.h" @@ -79,13 +81,15 @@ #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" +#include "llvm/Transforms/IPO/SampleProfile.h" #include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" -#include "llvm/Transforms/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" -#include "llvm/Transforms/PGOInstrumentation.h" -#include "llvm/Transforms/SampleProfile.h" +#include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" @@ -101,6 +105,8 @@ #include "llvm/Transforms/Scalar/GuardWidening.h" #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" @@ -116,6 +122,7 @@ #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopSink.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LowerAtomic.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" @@ -144,12 +151,10 @@ #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" -#include "llvm/Transforms/Utils/SimplifyInstructions.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" - using namespace llvm; static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations", @@ -176,6 +181,15 @@ static cl::opt<bool> EnableGVNSink( "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt<bool> EnableUnrollAndJam( + "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden, + cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)")); + +static cl::opt<bool> EnableSyntheticCounts( + "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Run synthetic function entry count generation " + "pass")); + static Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); @@ -196,7 +210,7 @@ static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { namespace { -/// \brief No-op module pass which does nothing. +/// No-op module pass which does nothing. struct NoOpModulePass { PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); @@ -204,7 +218,7 @@ struct NoOpModulePass { static StringRef name() { return "NoOpModulePass"; } }; -/// \brief No-op module analysis. +/// No-op module analysis. class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> { friend AnalysisInfoMixin<NoOpModuleAnalysis>; static AnalysisKey Key; @@ -215,7 +229,7 @@ public: static StringRef name() { return "NoOpModuleAnalysis"; } }; -/// \brief No-op CGSCC pass which does nothing. +/// No-op CGSCC pass which does nothing. struct NoOpCGSCCPass { PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &, LazyCallGraph &, CGSCCUpdateResult &UR) { @@ -224,7 +238,7 @@ struct NoOpCGSCCPass { static StringRef name() { return "NoOpCGSCCPass"; } }; -/// \brief No-op CGSCC analysis. +/// No-op CGSCC analysis. class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> { friend AnalysisInfoMixin<NoOpCGSCCAnalysis>; static AnalysisKey Key; @@ -237,7 +251,7 @@ public: static StringRef name() { return "NoOpCGSCCAnalysis"; } }; -/// \brief No-op function pass which does nothing. +/// No-op function pass which does nothing. struct NoOpFunctionPass { PreservedAnalyses run(Function &F, FunctionAnalysisManager &) { return PreservedAnalyses::all(); @@ -245,7 +259,7 @@ struct NoOpFunctionPass { static StringRef name() { return "NoOpFunctionPass"; } }; -/// \brief No-op function analysis. +/// No-op function analysis. class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> { friend AnalysisInfoMixin<NoOpFunctionAnalysis>; static AnalysisKey Key; @@ -256,7 +270,7 @@ public: static StringRef name() { return "NoOpFunctionAnalysis"; } }; -/// \brief No-op loop pass which does nothing. +/// No-op loop pass which does nothing. struct NoOpLoopPass { PreservedAnalyses run(Loop &L, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &) { @@ -265,7 +279,7 @@ struct NoOpLoopPass { static StringRef name() { return "NoOpLoopPass"; } }; -/// \brief No-op loop analysis. +/// No-op loop analysis. class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> { friend AnalysisInfoMixin<NoOpLoopAnalysis>; static AnalysisKey Key; @@ -358,6 +372,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(SimplifyCFGPass()); + if (Level == O3) + FPM.addPass(AggressiveInstCombinePass()); FPM.addPass(InstCombinePass()); if (!isOptimizingForSize(Level)) @@ -381,13 +397,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Add the primary loop simplification pipeline. // FIXME: Currently this is split into two loop pass pipelines because we run - // some function passes in between them. These can and should be replaced by - // loop pass equivalenst but those aren't ready yet. Specifically, - // `SimplifyCFGPass` and `InstCombinePass` are used. We have - // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to - // the other we have is `LoopInstSimplify`. + // some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to + // fully replace `SimplifyCFGPass`, and the closest to the other we have is + // `LoopInstSimplify`. LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); @@ -412,10 +436,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1))); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2))); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); // Eliminate redundancies. if (Level != O1) { @@ -450,7 +474,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -510,7 +534,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(PGOInstrumentationGen()); FunctionPassManager FPM; - FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + FPM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); // Add the profile lowering pass. @@ -580,7 +605,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, true)); } - // Interprocedural constant propagation now that basic cleanup has occured + // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in // years, it should be re-analyzed. @@ -621,6 +646,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion(false, false)); } + // Synthesize function entry counts for non-PGO compilation. + if (EnableSyntheticCounts && !PGOOpt) + MPM.addPass(SyntheticCountsPropagation()); + // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); @@ -730,7 +759,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, C(OptimizePM, Level); // First rotate loops that may have been un-rotated by prior passes. - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -774,10 +804,15 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. + // We do UnrollAndJam in a separate LPM to ensure it happens before unroll + if (EnableUnrollAndJam) { + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); + } OptimizePM.addPass(LoopUnrollPass(Level)); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. @@ -790,7 +825,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(LoopSinkPass()); // And finally clean up LCSSA form before generating code. - OptimizePM.addPass(InstSimplifierPass()); + OptimizePM.addPass(InstSimplifyPass()); // This hoists/decomposes div/rem ops. It should run after other sink/hoist // passes to avoid re-sinking, but before SimplifyCFG because it can allow @@ -809,6 +844,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); + MPM.addPass(CGProfilePass()); + // Now we need to do some global optimization transforms. // FIXME: It would seem like these should come first in the optimization // pipeline and maybe be the bottom of the canonicalization pipeline? Weird @@ -829,6 +866,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); + // Apply module pipeline start EP callback. + for (auto &C : PipelineStartEPCallbacks) + C(MPM); + if (PGOOpt && PGOOpt->SamplePGOSupport) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); @@ -855,6 +896,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, if (PGOOpt && PGOOpt->SamplePGOSupport) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + // Apply module pipeline start EP callback. + for (auto &C : PipelineStartEPCallbacks) + C(MPM); + // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. @@ -877,15 +922,28 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, return MPM; } -ModulePassManager -PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { - // FIXME: The summary index is not hooked in the new pass manager yet. - // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest - // here. - +ModulePassManager PassBuilder::buildThinLTODefaultPipeline( + OptimizationLevel Level, bool DebugLogging, + const ModuleSummaryIndex *ImportSummary) { ModulePassManager MPM(DebugLogging); + if (ImportSummary) { + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may + // disturb the specific instruction patterns that these passes look for, + // creating dependencies on resolutions that may not appear in the summary. + // + // For example, GVN may transform the pattern assume(type.test) appearing in + // two basic blocks into assume(phi(type.test, type.test)), which would + // transform a dependency on a WPD resolution into a dependency on a type + // identifier resolution for CFI. + // + // Also, WPD has access to more precise information than ICP and can + // devirtualize more effectively, so it should operate on the IR first. + MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); + MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); + } + // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); @@ -916,8 +974,9 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, return buildPerModuleDefaultPipeline(Level, DebugLogging); } -ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager +PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, + ModuleSummaryIndex *ExportSummary) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -967,11 +1026,15 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Run whole program optimization of virtual call when the list of callees // is fixed. - MPM.addPass(WholeProgramDevirtPass()); + MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. - if (Level == 1) + if (Level == 1) { + // The LowerTypeTestsPass needs to run to lower type metadata and the + // type.test intrinsics. The pass does nothing if CFI is disabled. + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); return MPM; + } // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); @@ -991,6 +1054,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM(DebugLogging); + if (Level == O3) + PeepholeFPM.addPass(AggressiveInstCombinePass()); PeepholeFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); @@ -1078,12 +1143,7 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. - // Enable once we add support for the summary in the new PM. -#if 0 - MPM.addPass(LowerTypeTestsPass(Summary ? PassSummaryAction::Export : - PassSummaryAction::None, - Summary)); -#endif + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. @@ -1395,12 +1455,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, } else if (Matches[1] == "thinlto-pre-link") { MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "thinlto") { - MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging)); + MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); - MPM.addPass(buildLTODefaultPipeline(L, DebugLogging)); + MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr)); } return true; } @@ -1533,7 +1593,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, DebugLogging)) return false; // Add the nested pass manager with the appropriate adaptor. - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); return true; } if (auto Count = parseRepeatPassName(Name)) { |