summaryrefslogtreecommitdiff
path: root/lib/Passes/PassBuilder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Passes/PassBuilder.cpp')
-rw-r--r--lib/Passes/PassBuilder.cpp155
1 files changed, 108 insertions, 47 deletions
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index cbae16a04ca6..eb04dcc8b6ef 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -41,6 +41,7 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/RegionInfo.h"
@@ -59,7 +60,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/GCOVProfiler.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
@@ -79,13 +81,15 @@
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
-#include "llvm/Transforms/PGOInstrumentation.h"
-#include "llvm/Transforms/SampleProfile.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/Transforms/Scalar/BDCE.h"
@@ -101,6 +105,8 @@
#include "llvm/Transforms/Scalar/GuardWidening.h"
#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
+#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
@@ -116,6 +122,7 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
@@ -144,12 +151,10 @@
#include "llvm/Transforms/Utils/LowerInvoke.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
-#include "llvm/Transforms/Utils/SimplifyInstructions.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
-
using namespace llvm;
static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
@@ -176,6 +181,15 @@ static cl::opt<bool> EnableGVNSink(
"enable-npm-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+static cl::opt<bool> EnableUnrollAndJam(
+ "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden,
+ cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)"));
+
+static cl::opt<bool> EnableSyntheticCounts(
+ "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run synthetic function entry count generation "
+ "pass"));
+
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
@@ -196,7 +210,7 @@ static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
namespace {
-/// \brief No-op module pass which does nothing.
+/// No-op module pass which does nothing.
struct NoOpModulePass {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
@@ -204,7 +218,7 @@ struct NoOpModulePass {
static StringRef name() { return "NoOpModulePass"; }
};
-/// \brief No-op module analysis.
+/// No-op module analysis.
class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
friend AnalysisInfoMixin<NoOpModuleAnalysis>;
static AnalysisKey Key;
@@ -215,7 +229,7 @@ public:
static StringRef name() { return "NoOpModuleAnalysis"; }
};
-/// \brief No-op CGSCC pass which does nothing.
+/// No-op CGSCC pass which does nothing.
struct NoOpCGSCCPass {
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
LazyCallGraph &, CGSCCUpdateResult &UR) {
@@ -224,7 +238,7 @@ struct NoOpCGSCCPass {
static StringRef name() { return "NoOpCGSCCPass"; }
};
-/// \brief No-op CGSCC analysis.
+/// No-op CGSCC analysis.
class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
static AnalysisKey Key;
@@ -237,7 +251,7 @@ public:
static StringRef name() { return "NoOpCGSCCAnalysis"; }
};
-/// \brief No-op function pass which does nothing.
+/// No-op function pass which does nothing.
struct NoOpFunctionPass {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
return PreservedAnalyses::all();
@@ -245,7 +259,7 @@ struct NoOpFunctionPass {
static StringRef name() { return "NoOpFunctionPass"; }
};
-/// \brief No-op function analysis.
+/// No-op function analysis.
class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
static AnalysisKey Key;
@@ -256,7 +270,7 @@ public:
static StringRef name() { return "NoOpFunctionAnalysis"; }
};
-/// \brief No-op loop pass which does nothing.
+/// No-op loop pass which does nothing.
struct NoOpLoopPass {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
LoopStandardAnalysisResults &, LPMUpdater &) {
@@ -265,7 +279,7 @@ struct NoOpLoopPass {
static StringRef name() { return "NoOpLoopPass"; }
};
-/// \brief No-op loop analysis.
+/// No-op loop analysis.
class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
friend AnalysisInfoMixin<NoOpLoopAnalysis>;
static AnalysisKey Key;
@@ -358,6 +372,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(SimplifyCFGPass());
+ if (Level == O3)
+ FPM.addPass(AggressiveInstCombinePass());
FPM.addPass(InstCombinePass());
if (!isOptimizingForSize(Level))
@@ -381,13 +397,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Add the primary loop simplification pipeline.
// FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be replaced by
- // loop pass equivalenst but those aren't ready yet. Specifically,
- // `SimplifyCFGPass` and `InstCombinePass` are used. We have
- // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to
- // the other we have is `LoopInstSimplify`.
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
LPM1.addPass(LICMPass());
@@ -412,10 +436,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));
// Eliminate redundancies.
if (Level != O1) {
@@ -450,7 +474,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+ FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
@@ -510,7 +534,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(PGOInstrumentationGen());
FunctionPassManager FPM;
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
// Add the profile lowering pass.
@@ -580,7 +605,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
true));
}
- // Interprocedural constant propagation now that basic cleanup has occured
+ // Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
// years, it should be re-analyzed.
@@ -621,6 +646,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
+ // Synthesize function entry counts for non-PGO compilation.
+ if (EnableSyntheticCounts && !PGOOpt)
+ MPM.addPass(SyntheticCountsPropagation());
+
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
@@ -730,7 +759,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
C(OptimizePM, Level);
// First rotate loops that may have been un-rotated by prior passes.
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -774,10 +804,15 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// FIXME: It would be really good to use a loop-integrated instruction
// combiner for cleanup here so that the unrolling and LICM can be pipelined
// across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam) {
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
+ }
OptimizePM.addPass(LoopUnrollPass(Level));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -790,7 +825,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(LoopSinkPass());
// And finally clean up LCSSA form before generating code.
- OptimizePM.addPass(InstSimplifierPass());
+ OptimizePM.addPass(InstSimplifyPass());
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
@@ -809,6 +844,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+ MPM.addPass(CGProfilePass());
+
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
@@ -829,6 +866,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM);
+
if (PGOOpt && PGOOpt->SamplePGOSupport)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
@@ -855,6 +896,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
if (PGOOpt && PGOOpt->SamplePGOSupport)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM);
+
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
// can.
@@ -877,15 +922,28 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
return MPM;
}
-ModulePassManager
-PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
- bool DebugLogging) {
- // FIXME: The summary index is not hooked in the new pass manager yet.
- // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest
- // here.
-
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+ OptimizationLevel Level, bool DebugLogging,
+ const ModuleSummaryIndex *ImportSummary) {
ModulePassManager MPM(DebugLogging);
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+ MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
@@ -916,8 +974,9 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
return buildPerModuleDefaultPipeline(Level, DebugLogging);
}
-ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+ ModuleSummaryIndex *ExportSummary) {
assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
@@ -967,11 +1026,15 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Run whole program optimization of virtual call when the list of callees
// is fixed.
- MPM.addPass(WholeProgramDevirtPass());
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
// Stop here at -O1.
- if (Level == 1)
+ if (Level == 1) {
+ // The LowerTypeTestsPass needs to run to lower type metadata and the
+ // type.test intrinsics. The pass does nothing if CFI is disabled.
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
return MPM;
+ }
// Optimize globals to try and fold them into constants.
MPM.addPass(GlobalOptPass());
@@ -991,6 +1054,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
FunctionPassManager PeepholeFPM(DebugLogging);
+ if (Level == O3)
+ PeepholeFPM.addPass(AggressiveInstCombinePass());
PeepholeFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(PeepholeFPM, Level);
@@ -1078,12 +1143,7 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
// to be run at link time if CFI is enabled. This pass does nothing if
// CFI is disabled.
- // Enable once we add support for the summary in the new PM.
-#if 0
- MPM.addPass(LowerTypeTestsPass(Summary ? PassSummaryAction::Export :
- PassSummaryAction::None,
- Summary));
-#endif
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
// Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed.
@@ -1395,12 +1455,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
} else if (Matches[1] == "thinlto-pre-link") {
MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging));
} else if (Matches[1] == "thinlto") {
- MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging));
+ MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr));
} else if (Matches[1] == "lto-pre-link") {
MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
} else {
assert(Matches[1] == "lto" && "Not one of the matched options!");
- MPM.addPass(buildLTODefaultPipeline(L, DebugLogging));
+ MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
}
return true;
}
@@ -1533,7 +1593,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
DebugLogging))
return false;
// Add the nested pass manager with the appropriate adaptor.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
return true;
}
if (auto Count = parseRepeatPassName(Name)) {