diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/PassManagerBuilder.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 166 |
1 files changed, 115 insertions, 51 deletions
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9c992830879a..d73d42c52074 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" @@ -46,6 +47,7 @@ #include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; @@ -98,8 +100,8 @@ static cl::opt<bool> EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); -cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, - cl::desc("Enable hot-cold splitting pass")); +cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), + cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); static cl::opt<bool> UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, @@ -115,7 +117,7 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(false), cl::Hidden, + "enable-gvn-hoist", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt<bool> @@ -129,7 +131,7 @@ static cl::opt<bool> EnableSimpleLoopUnswitch( "cleanup passes integrated into the loop pass manager pipeline.")); static cl::opt<bool> EnableGVNSink( - "enable-gvn-sink", cl::init(false), cl::Hidden, + "enable-gvn-sink", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN sinking pass (default = off)")); // This option is used in simplifying testing SampleFDO optimizations for @@ -151,15 +153,29 @@ static cl::opt<bool> EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +cl::opt<AttributorRunOption> AttributorRun( + "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), + cl::desc("Enable the attributor inter-procedural deduction pass."), + cl::values(clEnumValN(AttributorRunOption::ALL, "all", + "enable all attributor runs"), + clEnumValN(AttributorRunOption::MODULE, "module", + "enable module-wide attributor runs"), + clEnumValN(AttributorRunOption::CGSCC, "cgscc", + "enable call graph SCC attributor runs"), + clEnumValN(AttributorRunOption::NONE, "none", + "disable attributor runs"))); + +extern cl::opt<bool> EnableKnowledgeRetention; + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; - SLPVectorize = RunSLPVectorization; - LoopVectorize = EnableLoopVectorization; - LoopsInterleaved = EnableLoopInterleaving; + SLPVectorize = false; + LoopVectorize = true; + LoopsInterleaved = true; RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; LicmMssaOptCap = SetLicmMssaOptCap; @@ -179,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() { PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; + CallGraphProfile = true; } PassManagerBuilder::~PassManagerBuilder() { @@ -187,8 +204,13 @@ PassManagerBuilder::~PassManagerBuilder() { } /// Set of global extensions, automatically added as part of the standard set. -static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, - PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; +static ManagedStatic< + SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn, + PassManagerBuilder::GlobalExtensionID>, + 8>> + GlobalExtensions; +static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; /// Check if GlobalExtensions is constructed and not empty. /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger @@ -197,10 +219,29 @@ static bool GlobalExtensionsNotEmpty() { return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); } -void PassManagerBuilder::addGlobalExtension( - PassManagerBuilder::ExtensionPointTy Ty, - PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); +PassManagerBuilder::GlobalExtensionID +PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + auto ExtensionID = GlobalExtensionsCounter++; + GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); + return ExtensionID; +} + +void PassManagerBuilder::removeGlobalExtension( + PassManagerBuilder::GlobalExtensionID ExtensionID) { + // RegisterStandardPasses may try to call this function after GlobalExtensions + // has already been destroyed; doing so should not generate an error. + if (!GlobalExtensions.isConstructed()) + return; + + auto GlobalExtension = + llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { + return std::get<2>(elem) == ExtensionID; + }); + assert(GlobalExtension != GlobalExtensions->end() && + "The extension ID to be removed should always be valid."); + + GlobalExtensions->erase(GlobalExtension); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { @@ -211,8 +252,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { if (GlobalExtensionsNotEmpty()) { for (auto &Ext : *GlobalExtensions) { - if (Ext.first == ETy) - Ext.second(*this, PM); + if (std::get<0>(Ext) == ETy) + std::get<1>(Ext)(*this, PM); } } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) @@ -244,12 +285,6 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( PM.add(createScopedNoAliasAAWrapperPass()); } -void PassManagerBuilder::addInstructionCombiningPass( - legacy::PassManagerBase &PM) const { - bool ExpensiveCombines = OptLevel > 2; - PM.add(createInstructionCombiningPass(ExpensiveCombines)); -} - void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -327,6 +362,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + if (EnableKnowledgeRetention) + MPM.add(createAssumeSimplifyPass()); if (OptLevel > 1) { if (EnableGVNHoist) @@ -348,7 +385,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); @@ -383,7 +420,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. MPM.add(createCFGSimplificationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. @@ -414,7 +451,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1) { MPM.add(createJumpThreadingPass()); // Thread jumps @@ -432,7 +469,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (EnableCHR && OptLevel >= 3 && @@ -478,6 +515,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -511,9 +549,11 @@ void PassManagerBuilder::populateModulePassManager( // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) + if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); + } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -526,6 +566,10 @@ void PassManagerBuilder::populateModulePassManager( // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); + // Infer attributes on declarations, call sites, arguments, etc. + if (AttributorRun & AttributorRunOption::MODULE) + MPM.add(createAttributorLegacyPass()); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); if (OptLevel > 2) @@ -534,16 +578,13 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); - // Infer attributes on declarations, call sites, arguments, etc. - MPM.add(createAttributorLegacyPass()); - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE @@ -574,6 +615,15 @@ void PassManagerBuilder::populateModulePassManager( RunInliner = true; } + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + if (AttributorRun & AttributorRunOption::CGSCC) + MPM.add(createAttributorCGSCCLegacyPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + MPM.add(createOpenMPOptLegacyPass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -705,7 +755,7 @@ void PassManagerBuilder::populateModulePassManager( // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correllated @@ -715,11 +765,11 @@ void PassManagerBuilder::populateModulePassManager( // dead (or speculatable) control flows or more combining opportunities. MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); } // Cleanup after loop vectorization, etc. Simplification passes like CVP and @@ -736,8 +786,11 @@ void PassManagerBuilder::populateModulePassManager( } } + // Enhance/cleanup vector code. + MPM.add(createVectorCombinePass()); + addExtensionsToPM(EP_Peephole, MPM); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (EnableUnrollAndJam && !DisableUnrollLoops) { // Unroll and Jam. We do this before unroll but need to be in a separate @@ -752,7 +805,7 @@ void PassManagerBuilder::populateModulePassManager( if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the @@ -785,6 +838,10 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); + // Add Module flag "CG Profile" based on Branch Frequency Information. + if (CallGraphProfile) + MPM.add(createCGProfileLegacyPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -852,7 +909,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createCalledValuePropagationPass()); // Infer attributes on declarations, call sites, arguments, etc. - PM.add(createAttributorLegacyPass()); + if (AttributorRun & AttributorRunOption::MODULE) + PM.add(createAttributorLegacyPass()); } // Infer attributes about definitions. The readnone attribute in particular is @@ -890,7 +948,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // calls, etc, so let instcombine do this. if (OptLevel > 2) PM.add(createAggressiveInstCombinerPass()); - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); // Inline small functions @@ -905,6 +963,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + if (AttributorRun & AttributorRunOption::CGSCC) + PM.add(createAttributorCGSCCLegacyPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + PM.add(createOpenMPOptLegacyPass()); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); @@ -915,7 +982,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -960,22 +1027,24 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - addInstructionCombiningPass(PM); // Initial cleanup + PM.add(createInstructionCombiningPass()); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants - addInstructionCombiningPass(PM); // Clean up again + PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information if (SLPVectorize) PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + PM.add(createVectorCombinePass()); // Clean up partial vectorization. + // After vectorization, assume intrinsics may tell us more about pointer // alignments. PM.add(createAlignmentFromAssumptionsPass()); // Cleanup and simplify the code after the scalar optimizations. - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -1013,8 +1082,8 @@ void PassManagerBuilder::populateThinLTOPassManager( PM.add(createVerifierPass()); if (ImportSummary) { - // These passes import type identifier resolutions for whole-program - // devirtualization and CFI. They must run early because other passes may + // This pass imports type identifier resolutions for whole-program + // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1062,6 +1131,9 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); @@ -1072,14 +1144,6 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { PM.add(createVerifierPass()); } -inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { - return reinterpret_cast<PassManagerBuilder*>(P); -} - -inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { - return reinterpret_cast<LLVMPassManagerBuilderRef>(P); -} - LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); |