summaryrefslogtreecommitdiff
path: root/llvm/lib/Passes
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Passes
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Passes')
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp639
-rw-r--r--llvm/lib/Passes/PassRegistry.def46
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp20
3 files changed, 516 insertions, 189 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53b7db8689c4a..4db7bebcb77ce 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -33,11 +33,15 @@
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineFeaturesAnalysis.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -49,13 +53,11 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
-#include "llvm/CodeGen/UnreachableBlockElim.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/PassManager.h"
@@ -67,6 +69,10 @@
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
+#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/Attributor.h"
@@ -87,6 +93,7 @@
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
@@ -169,8 +176,10 @@
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LCSSA.h"
@@ -183,6 +192,7 @@
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+#include "llvm/Transforms/Vectorize/VectorCombine.h"
using namespace llvm;
@@ -207,6 +217,16 @@ static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
+ "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
+ cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
+ cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
+ "Heuristics-based inliner version."),
+ clEnumValN(InliningAdvisorMode::Development, "development",
+ "Use development mode (runtime-loadable model)."),
+ clEnumValN(InliningAdvisorMode::Release, "release",
+ "Use release mode (AOT-compiled model).")));
+
static cl::opt<bool> EnableGVNSink(
"enable-npm-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -229,14 +249,22 @@ static cl::opt<bool>
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
+/// Flag to enable inline deferral during PGO.
+static cl::opt<bool>
+ EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
+ cl::Hidden,
+ cl::desc("Enable inline deferral during PGO"));
+
PipelineTuningOptions::PipelineTuningOptions() {
- LoopInterleaving = EnableLoopInterleaving;
- LoopVectorization = EnableLoopVectorization;
- SLPVectorization = RunSLPVectorization;
+ LoopInterleaving = true;
+ LoopVectorization = true;
+ SLPVectorization = false;
LoopUnrolling = true;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+ Coroutines = false;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+ CallGraphProfile = true;
}
extern cl::opt<bool> EnableHotColdSplit;
@@ -244,28 +272,40 @@ extern cl::opt<bool> EnableOrderFileInstrumentation;
extern cl::opt<bool> FlattenedProfileUsed;
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
- switch (Level) {
- case PassBuilder::O0:
- case PassBuilder::O1:
- case PassBuilder::O2:
- case PassBuilder::O3:
- return false;
-
- case PassBuilder::Os:
- case PassBuilder::Oz:
- return true;
- }
- llvm_unreachable("Invalid optimization level!");
-}
+extern cl::opt<AttributorRunOption> AttributorRun;
+extern cl::opt<bool> EnableKnowledgeRetention;
+
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
+ /*SpeedLevel*/ 0,
+ /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
+ /*SpeedLevel*/ 1,
+ /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
+ /*SpeedLevel*/ 3,
+ /*SizeLevel*/ 0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 2};
namespace {
+// The following passes/analyses have custom names, otherwise their name will
+// include `(anonymous namespace)`. These are special since they are only for
+// testing purposes and don't live in a header file.
+
/// No-op module pass which does nothing.
-struct NoOpModulePass {
+struct NoOpModulePass : PassInfoMixin<NoOpModulePass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
+
static StringRef name() { return "NoOpModulePass"; }
};
@@ -281,7 +321,7 @@ public:
};
/// No-op CGSCC pass which does nothing.
-struct NoOpCGSCCPass {
+struct NoOpCGSCCPass : PassInfoMixin<NoOpCGSCCPass> {
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
LazyCallGraph &, CGSCCUpdateResult &UR) {
return PreservedAnalyses::all();
@@ -303,7 +343,7 @@ public:
};
/// No-op function pass which does nothing.
-struct NoOpFunctionPass {
+struct NoOpFunctionPass : PassInfoMixin<NoOpFunctionPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
return PreservedAnalyses::all();
}
@@ -322,7 +362,7 @@ public:
};
/// No-op loop pass which does nothing.
-struct NoOpLoopPass {
+struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
LoopStandardAnalysisResults &, LPMUpdater &) {
return PreservedAnalyses::all();
@@ -348,7 +388,7 @@ AnalysisKey NoOpCGSCCAnalysis::Key;
AnalysisKey NoOpFunctionAnalysis::Key;
AnalysisKey NoOpLoopAnalysis::Key;
-} // End anonymous namespace.
+} // namespace
void PassBuilder::invokePeepholeEPCallbacks(
FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
@@ -392,11 +432,138 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
C(LAM);
}
+// TODO: Investigate the cost/benefit of tail call elimination on debugging.
+FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(
+ OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
+
+ FunctionPassManager FPM(DebugLogging);
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROA());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+
+ // Hoisting of scalars and load expressions.
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
+ LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
+
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
+ LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true));
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(SimpleLoopUnswitchPass());
+ LPM2.addPass(IndVarSimplifyPass());
+ LPM2.addPass(LoopIdiomRecognizePass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
+ LPM2.addPass(LoopDeletionPass());
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging));
+
+ // Delete small array after loop unroll.
+ FPM.addPass(SROA());
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ if (PTO.Coroutines)
+ FPM.addPass(CoroElidePass());
+
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ return FPM;
+}
+
FunctionPassManager
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
ThinLTOPhase Phase,
bool DebugLogging) {
- assert(Level != O0 && "Must request optimizations!");
+ assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
+
+ // The O1 pipeline has a separate pipeline creation function to simplify
+ // construction readability.
+ if (Level.getSpeedupLevel() == 1)
+ return buildO1FunctionSimplificationPipeline(Level, Phase, DebugLogging);
+
FunctionPassManager FPM(DebugLogging);
// Form SSA out of local memory accesses after breaking apart aggregates into
@@ -405,33 +572,32 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Catch trivial redundancies
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+ if (EnableKnowledgeRetention)
+ FPM.addPass(AssumeSimplifyPass());
// Hoisting of scalars and load expressions.
- if (Level > O1) {
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
- }
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
}
// Speculative execution if the target has divergent branches; otherwise nop.
- if (Level > O1) {
- FPM.addPass(SpeculativeExecutionPass());
+ FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
+
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- }
FPM.addPass(SimplifyCFGPass());
- if (Level == O3)
+ if (Level == OptimizationLevel::O3)
FPM.addPass(AggressiveInstCombinePass());
FPM.addPass(InstCombinePass());
- if (!isOptimizingForSize(Level))
+ if (!Level.isOptimizingForSize())
FPM.addPass(LibCallsShrinkWrapPass());
invokePeepholeEPCallbacks(FPM, Level);
@@ -439,12 +605,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !isOptimizingForSize(Level) && Level > O1)
+ !Level.isOptimizingForSize())
FPM.addPass(PGOMemOPSizeOpt());
- // TODO: Investigate the cost/benefit of tail call elimination on debugging.
- if (Level > O1)
- FPM.addPass(TailCallElimPass());
+ FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
@@ -470,7 +634,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopSimplifyCFGPass());
// Rotate Loop - disable header duplication at -Oz
- LPM1.addPass(LoopRotatePass(Level != Oz));
+ LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
@@ -483,11 +647,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopDeletionPass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
// because it changes IR to makes profile annotation in back compile
- // inaccurate.
- if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse) &&
- PTO.LoopUnrolling)
- LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
@@ -495,7 +661,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
- FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM1), EnableMSSALoopDependency, DebugLogging));
FPM.addPass(SimplifyCFGPass());
@@ -510,14 +677,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(SROA());
// Eliminate redundancies.
- if (Level != O1) {
- // These passes add substantial compile time so skip them at O1.
- FPM.addPass(MergedLoadStoreMotionPass());
- if (RunNewGVN)
- FPM.addPass(NewGVNPass());
- else
- FPM.addPass(GVN());
- }
+ FPM.addPass(MergedLoadStoreMotionPass());
+ if (RunNewGVN)
+ FPM.addPass(NewGVNPass());
+ else
+ FPM.addPass(GVN());
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
FPM.addPass(MemCpyOptPass());
@@ -539,14 +703,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
- if (Level > O1) {
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
- }
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ EnableMSSALoopDependency, DebugLogging));
+
+ if (PTO.Coroutines)
+ FPM.addPass(CoroElidePass());
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
@@ -559,7 +724,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
- if (EnableCHR && Level == O3 && PGOOpt &&
+ if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
(PGOOpt->Action == PGOOptions::IRUse ||
PGOOpt->Action == PGOOptions::SampleUse))
FPM.addPass(ControlHeightReductionPass());
@@ -572,13 +737,13 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
bool RunProfileGen, bool IsCS,
std::string ProfileFile,
std::string ProfileRemappingFile) {
- assert(Level != O0 && "Not expecting O0 here!");
+ assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
// Generally running simplification passes and the inliner with an high
// threshold results in smaller executables, but there may be cases where
// the size grows, so let's be conservative here and skip this simplification
// at -Os/Oz. We will not do this inline for context sensistive PGO (when
// IsCS is true).
- if (!isOptimizingForSize(Level) && !IsCS) {
+ if (!Level.isOptimizingForSize() && !IsCS) {
InlineParams IP;
IP.DefaultThreshold = PreInlineThreshold;
@@ -587,10 +752,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
// This should probably be lowered after performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
IP.HintThreshold = 325;
-
- CGSCCPassManager CGPipeline(DebugLogging);
-
- CGPipeline.addPass(InlinerPass(IP));
+ ModuleInlinerWrapperPass MIWP(IP, DebugLogging);
+ CGSCCPassManager &CGPipeline = MIWP.getPM();
FunctionPassManager FPM;
FPM.addPass(SROA());
@@ -601,7 +764,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
+ MPM.addPass(std::move(MIWP));
// Delete anything that is now dead to make sure that we don't instrument
// dead code. Instrumentation can end up keeping dead code around and
@@ -663,16 +826,74 @@ void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
static InlineParams
getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
- auto O3 = PassBuilder::O3;
- unsigned OptLevel = Level > O3 ? 2 : Level;
- unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
- return getInlineParams(OptLevel, SizeLevel);
+ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
}
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging) {
+ModuleInlinerWrapperPass
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase,
+ bool DebugLogging) {
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+
+ if (PGOOpt)
+ IP.EnableDeferral = EnablePGOInlineDeferral;
+
+ ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor,
+ MaxDevirtIterations);
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // the CGSCC pipeline.
+ MIWP.addRequiredModuleAnalysis<GlobalsAA>();
+
+ // Require the ProfileSummaryAnalysis for the module so we can query it within
+ // the inliner pass.
+ MIWP.addRequiredModuleAnalysis<ProfileSummaryAnalysis>();
+
+ // Now begin the main postorder CGSCC pipeline.
+ // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+ // manager and trying to emulate its precise behavior. Much of this doesn't
+ // make a lot of sense and we should revisit the core CGSCC structure.
+ CGSCCPassManager &MainCGPipeline = MIWP.getPM();
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ MainCGPipeline.addPass(AttributorCGSCCPass());
+
+ if (PTO.Coroutines)
+ MainCGPipeline.addPass(CoroSplitPass());
+
+ // Now deduce any function attributes based in the current code.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // When at O3 add argument promotion to the pass pipeline.
+ // FIXME: It isn't at all clear why this should be limited to O3.
+ if (Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(ArgumentPromotionPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(OpenMPOptPass());
+
+ // Lastly, add the core function simplification pipeline nested inside the
+ // CGSCC walk.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
+
+ return MIWP;
+}
+
+ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
+ OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
ModulePassManager MPM(DebugLogging);
bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
@@ -712,7 +933,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(LowerExpectIntrinsicPass());
- if (Level == O3)
+ if (PTO.Coroutines)
+ EarlyFPM.addPass(CoroEarlyPass());
+ if (Level == OptimizationLevel::O3)
EarlyFPM.addPass(CallSiteSplittingPass());
// In SamplePGO ThinLTO backend, we need instcombine before profile annotation
@@ -745,6 +968,15 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
true /* SamplePGO */));
}
+ if (AttributorRun & AttributorRunOption::MODULE)
+ MPM.addPass(AttributorPass());
+
+ // Lower type metadata and the type.test intrinsic in the ThinLTO
+ // post link pipeline after ICP. This is to enable usage of the type
+ // tests in ICP sequences.
+ if (Phase == ThinLTOPhase::PostLink)
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
@@ -765,7 +997,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// constants.
MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
- // Remove any dead arguments exposed by cleanups and constand folding
+ // Remove any dead arguments exposed by cleanups and constant folding
// globals.
MPM.addPass(DeadArgumentEliminationPass());
@@ -796,61 +1028,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
- // Require the GlobalsAA analysis for the module so we can query it within
- // the CGSCC pipeline.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
-
- // Require the ProfileSummaryAnalysis for the module so we can query it within
- // the inliner pass.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-
- // Now begin the main postorder CGSCC pipeline.
- // FIXME: The current CGSCC pipeline has its origins in the legacy pass
- // manager and trying to emulate its precise behavior. Much of this doesn't
- // make a lot of sense and we should revisit the core CGSCC structure.
- CGSCCPassManager MainCGPipeline(DebugLogging);
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
-
- // Run the inliner first. The theory is that we are walking bottom-up and so
- // the callees have already been fully optimized, and we want to inline them
- // into the callers so that our optimizations can reflect that.
- // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
- // because it makes profile annotation in the backend inaccurate.
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
- MainCGPipeline.addPass(InlinerPass(IP));
-
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
- // When at O3 add argument promotion to the pass pipeline.
- // FIXME: It isn't at all clear why this should be limited to O3.
- if (Level == O3)
- MainCGPipeline.addPass(ArgumentPromotionPass());
-
- // Lastly, add the core function simplification pipeline nested inside the
- // CGSCC walk.
- MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
-
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
-
- // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
- // to detect when we devirtualize indirect calls and iterate the SCC passes
- // in that case to try and catch knock-on inlining or function attrs
- // opportunities. Then we add it to the module pipeline by walking the SCCs
- // in postorder (or bottom-up).
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
- std::move(MainCGPipeline), MaxDevirtIterations)));
-
+ MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging));
return MPM;
}
@@ -935,6 +1113,10 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
OptimizePM.addPass(LoopDistributePass());
+ // Populates the VFABI attribute with the scalar-to-vector mappings
+ // from the TargetLibraryInfo.
+ OptimizePM.addPass(InjectTLIMappings());
+
// Now run the core loop vectorizer.
OptimizePM.addPass(LoopVectorizePass(
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
@@ -965,6 +1147,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
if (PTO.SLPVectorization)
OptimizePM.addPass(SLPVectorizerPass());
+ // Enhance/cleanup vector code.
+ OptimizePM.addPass(VectorCombinePass());
OptimizePM.addPass(InstCombinePass());
// Unroll small loops to hide loop backedge latency and saturate any parallel
@@ -975,11 +1159,11 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- OptimizePM.addPass(LoopUnrollAndJamPass(Level));
+ OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
}
- OptimizePM.addPass(LoopUnrollPass(
- LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
+ OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
OptimizePM.addPass(WarnMissedTransformationsPass());
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
@@ -1020,13 +1204,17 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// inserting redundancies into the program. This even includes SimplifyCFG.
OptimizePM.addPass(SpeculateAroundPHIsPass());
- for (auto &C : OptimizerLastEPCallbacks)
- C(OptimizePM, Level);
+ if (PTO.Coroutines)
+ OptimizePM.addPass(CoroCleanupPass());
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
- MPM.addPass(CGProfilePass());
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (PTO.CallGraphProfile)
+ MPM.addPass(CGProfilePass());
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
@@ -1041,7 +1229,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool DebugLogging, bool LTOPreLink) {
- assert(Level != O0 && "Must request optimizations for the default pipeline!");
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
@@ -1068,7 +1257,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
ModulePassManager
PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) {
- assert(Level != O0 && "Must request optimizations for the default pipeline!");
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
@@ -1101,6 +1291,12 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
+ // Module simplification splits coroutines, but does not fully clean up
+ // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
+ // on these, we schedule the cleanup here.
+ if (PTO.Coroutines)
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
return MPM;
}
@@ -1129,7 +1325,7 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
}
- if (Level == O0)
+ if (Level == OptimizationLevel::O0)
return MPM;
// Force any function attributes we want the rest of the pipeline to observe.
@@ -1148,10 +1344,11 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
ModulePassManager
PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) {
- assert(Level != O0 && "Must request optimizations for the default pipeline!");
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
return buildPerModuleDefaultPipeline(Level, DebugLogging,
- /* LTOPreLink */true);
+ /* LTOPreLink */ true);
}
ModulePassManager
@@ -1159,11 +1356,14 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
ModuleSummaryIndex *ExportSummary) {
ModulePassManager MPM(DebugLogging);
- if (Level == O0) {
+ if (Level == OptimizationLevel::O0) {
// The WPD and LowerTypeTest passes need to run at -O0 to lower type
// metadata and intrinsics.
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
return MPM;
}
@@ -1188,7 +1388,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// libraries and other oracles.
MPM.addPass(InferFunctionAttrsPass());
- if (Level > 1) {
+ if (Level.getSpeedupLevel() > 1) {
FunctionPassManager EarlyFPM(DebugLogging);
EarlyFPM.addPass(CallSiteSplittingPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
@@ -1202,11 +1402,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
- MPM.addPass(IPSCCPPass());
+ MPM.addPass(IPSCCPPass());
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
}
// Now deduce any function attributes based in the current code.
@@ -1226,10 +1426,14 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
// Stop here at -O1.
- if (Level == 1) {
+ if (Level == OptimizationLevel::O1) {
// The LowerTypeTestsPass needs to run to lower type metadata and the
// type.test intrinsics. The pass does nothing if CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO
+ // pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
return MPM;
}
@@ -1251,7 +1455,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
FunctionPassManager PeepholeFPM(DebugLogging);
- if (Level == O3)
+ if (Level == OptimizationLevel::O3)
PeepholeFPM.addPass(AggressiveInstCombinePass());
PeepholeFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(PeepholeFPM, Level);
@@ -1263,8 +1467,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
- InlinerPass(getInlineParamsFromOptLevel(Level))));
+ MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level),
+ DebugLogging));
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
@@ -1357,6 +1561,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// to be run at link time if CFI is enabled. This pass does nothing if
// CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Enable splitting late in the FullLTO post-link pipeline. This is done in
// the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
@@ -1635,6 +1842,49 @@ Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
}
return Result;
}
+
+Expected<GVNOptions> parseGVNOptions(StringRef Params) {
+ GVNOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "pre") {
+ Result.setPRE(Enable);
+ } else if (ParamName == "load-pre") {
+ Result.setLoadPRE(Enable);
+ } else if (ParamName == "memdep") {
+ Result.setMemDep(Enable);
+ } else {
+ return make_error<StringError>(
+ formatv("invalid GVN pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+Expected<StackLifetime::LivenessType>
+parseStackLifetimeOptions(StringRef Params) {
+ StackLifetime::LivenessType Result = StackLifetime::LivenessType::May;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "may") {
+ Result = StackLifetime::LivenessType::May;
+ } else if (ParamName == "must") {
+ Result = StackLifetime::LivenessType::Must;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
@@ -1887,13 +2137,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
assert(Matches.size() == 3 && "Must capture two matched strings!");
OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
- .Case("O0", O0)
- .Case("O1", O1)
- .Case("O2", O2)
- .Case("O3", O3)
- .Case("Os", Os)
- .Case("Oz", Oz);
- if (L == O0) {
+ .Case("O0", OptimizationLevel::O0)
+ .Case("O1", OptimizationLevel::O1)
+ .Case("O2", OptimizationLevel::O2)
+ .Case("O3", OptimizationLevel::O3)
+ .Case("Os", OptimizationLevel::Os)
+ .Case("Oz", OptimizationLevel::Oz);
+ if (L == OptimizationLevel::O0) {
// Add instrumentation PGO passes -- at O0 we can still do PGO.
if (PGOOpt && Matches[1] != "thinlto" &&
(PGOOpt->Action == PGOOptions::IRInstr ||
@@ -1903,6 +2153,20 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
/* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
/* IsCS */ false, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
+
+ // For IR that makes use of coroutines intrinsics, coroutine passes must
+ // be run, even at -O0.
+ if (PTO.Coroutines) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+
+ CGSCCPassManager CGPM(DebugLogging);
+ CGPM.addPass(CoroSplitPass());
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ }
+
// Do nothing else at all!
return Error::success();
}
@@ -1910,8 +2174,10 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
// This is consistent with old pass manager invoked via opt, but
// inconsistent with clang. Clang doesn't enable loop vectorization
// but does enable slp vectorization at Oz.
- PTO.LoopVectorization = L > O1 && L < Oz;
- PTO.SLPVectorization = L > O1 && L < Oz;
+ PTO.LoopVectorization =
+ L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
+ PTO.SLPVectorization =
+ L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
if (Matches[1] == "default") {
MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
@@ -2408,3 +2674,28 @@ Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
return Error::success();
}
+
+bool PassBuilder::isAAPassName(StringRef PassName) {
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#include "PassRegistry.def"
+ return false;
+}
+
+bool PassBuilder::isAnalysisPassName(StringRef PassName) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#define CGSSC_ANALYSIS(NAME, CREATE_PASS) \
+ if (PassName == NAME) \
+ return true;
+#include "PassRegistry.def"
+ return false;
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 355dd6f968122..dfdfc3d05976a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -27,6 +27,7 @@ MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
+MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
#ifndef MODULE_ALIAS_ANALYSIS
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -57,6 +58,7 @@ MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
+MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
MODULE_PASS("instrorderfile", InstrOrderFilePass())
MODULE_PASS("instrprof", InstrProfiling())
@@ -71,7 +73,6 @@ MODULE_PASS("partial-inliner", PartialInlinerPass())
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())
-MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass())
MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs()))
MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
MODULE_PASS("print", PrintModulePass(dbgs()))
@@ -82,6 +83,8 @@ MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
+MODULE_PASS("scc-oz-module-inliner",
+ buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging))
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
@@ -108,7 +111,10 @@ CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
CGSCC_PASS("inline", InlinerPass())
+CGSCC_PASS("openmpopt", OpenMPOptPass())
+CGSCC_PASS("coro-split", CoroSplitPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS
@@ -126,6 +132,8 @@ FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
FUNCTION_ANALYSIS("loops", LoopAnalysis())
FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
FUNCTION_ANALYSIS("da", DependenceAnalysis())
+FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
+FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
@@ -160,6 +168,8 @@ FUNCTION_PASS("aa-eval", AAEvaluator())
FUNCTION_PASS("adce", ADCEPass())
FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass())
FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass())
+FUNCTION_PASS("assume-builder", AssumeBuilderPass())
+FUNCTION_PASS("assume-simplify", AssumeSimplifyPass())
FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
FUNCTION_PASS("bdce", BDCEPass())
FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
@@ -167,6 +177,9 @@ FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
FUNCTION_PASS("chr", ControlHeightReductionPass())
+FUNCTION_PASS("coro-early", CoroEarlyPass())
+FUNCTION_PASS("coro-elide", CoroElidePass())
+FUNCTION_PASS("coro-cleanup", CoroCleanupPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
@@ -182,6 +195,7 @@ FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("instcombine", InstCombinePass())
FUNCTION_PASS("instsimplify", InstSimplifyPass())
FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+FUNCTION_PASS("irce", IRCEPass())
FUNCTION_PASS("float2int", Float2IntPass())
FUNCTION_PASS("no-op-function", NoOpFunctionPass())
FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
@@ -193,10 +207,10 @@ FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass())
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
-FUNCTION_PASS("gvn", GVN())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
FUNCTION_PASS("loop-sink", LoopSinkPass())
+FUNCTION_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
@@ -208,7 +222,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
-FUNCTION_PASS("loop-fuse", LoopFusePass())
+FUNCTION_PASS("loop-fusion", LoopFusePass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
@@ -220,23 +234,25 @@ FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
+FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
+FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs()))
FUNCTION_PASS("reassociate", ReassociatePass())
FUNCTION_PASS("scalarizer", ScalarizerPass())
FUNCTION_PASS("sccp", SCCPPass())
+FUNCTION_PASS("simplifycfg", SimplifyCFGPass())
FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
-FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
-FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass())
+FUNCTION_PASS("vector-combine", VectorCombinePass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
@@ -257,7 +273,7 @@ FUNCTION_PASS("tsan", ThreadSanitizerPass())
#ifndef FUNCTION_PASS_WITH_PARAMS
#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#endif
-FUNCTION_PASS_WITH_PARAMS("unroll",
+FUNCTION_PASS_WITH_PARAMS("loop-unroll",
[](LoopUnrollOptions Opts) {
return LoopUnrollPass(Opts);
},
@@ -282,6 +298,16 @@ FUNCTION_PASS_WITH_PARAMS("mldst-motion",
return MergedLoadStoreMotionPass(Opts);
},
parseMergedLoadStoreMotionOptions)
+FUNCTION_PASS_WITH_PARAMS("gvn",
+ [](GVNOptions Opts) {
+ return GVN(Opts);
+ },
+ parseGVNOptions)
+FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
+ [](StackLifetime::LivenessType Type) {
+ return StackLifetimePrinterPass(dbgs(), Type);
+ },
+ parseStackLifetimeOptions)
#undef FUNCTION_PASS_WITH_PARAMS
#ifndef LOOP_ANALYSIS
@@ -297,6 +323,7 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#ifndef LOOP_PASS
#define LOOP_PASS(NAME, CREATE_PASS)
#endif
+LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("licm", LICMPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
@@ -306,16 +333,17 @@ LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(dbgs()))
LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
-LOOP_PASS("strength-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
-LOOP_PASS("irce", IRCEPass())
-LOOP_PASS("unroll-full", LoopFullUnrollPass())
+LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs()))
LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs()))
LOOP_PASS("loop-predication", LoopPredicationPass())
LOOP_PASS("guard-widening", GuardWideningPass())
+LOOP_PASS("simple-loop-unswitch", SimpleLoopUnswitchPass())
#undef LOOP_PASS
#ifndef LOOP_PASS_WITH_PARAMS
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 5cf0ca8e28f69..1e1a6b98a65a3 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -70,16 +70,24 @@ Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
llvm_unreachable("Unknown IR unit");
}
-void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) {
- dbgs() << Banner << Extra << "\n";
- M->print(dbgs(), nullptr, false);
-}
void printIR(const Function *F, StringRef Banner,
StringRef Extra = StringRef()) {
if (!llvm::isFunctionInPrintList(F->getName()))
return;
dbgs() << Banner << Extra << "\n" << static_cast<const Value &>(*F);
}
+
+void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) {
+ if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) {
+ dbgs() << Banner << Extra << "\n";
+ M->print(dbgs(), nullptr, false);
+ } else {
+ for (const auto &F : M->functions()) {
+ printIR(&F, Banner, Extra);
+ }
+ }
+}
+
void printIR(const LazyCallGraph::SCC *C, StringRef Banner,
StringRef Extra = StringRef()) {
bool BannerPrinted = false;
@@ -98,7 +106,7 @@ void printIR(const Loop *L, StringRef Banner) {
const Function *F = L->getHeader()->getParent();
if (!llvm::isFunctionInPrintList(F->getName()))
return;
- llvm::printLoop(const_cast<Loop &>(*L), dbgs(), Banner);
+ llvm::printLoop(const_cast<Loop &>(*L), dbgs(), std::string(Banner));
}
/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
@@ -127,7 +135,7 @@ void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) {
if (any_isa<const LazyCallGraph::SCC *>(IR)) {
const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
assert(C && "scc should be valid for printing");
- std::string Extra = formatv(" (scc: {0})", C->getName());
+ std::string Extra = std::string(formatv(" (scc: {0})", C->getName()));
printIR(C, Banner, Extra);
return;
}