diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Passes/PassBuilder.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Notes
Diffstat (limited to 'lib/Passes/PassBuilder.cpp')
-rw-r--r-- | lib/Passes/PassBuilder.cpp | 375 |
1 files changed, 301 insertions, 74 deletions
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 5ec94ea6f40a..e2b2a2b25268 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -1,9 +1,8 @@ //===- Parsing, selection, and construction of pass pipelines -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -57,6 +56,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/SafepointIRVerifier.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" @@ -65,6 +65,7 @@ #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" @@ -89,14 +90,18 @@ #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/CGProfile.h" #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" -#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" @@ -120,6 +125,7 @@ #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopLoadElimination.h" @@ -134,9 +140,11 @@ #include "llvm/Transforms/Scalar/LowerAtomic.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" @@ -181,10 +189,6 @@ static cl::opt<bool> cl::Hidden, cl::ZeroOrMore, cl::desc("Run NewGVN instead of GVN")); -static cl::opt<bool> EnableEarlyCSEMemSSA( - "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden, - cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)")); - static cl::opt<bool> EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -205,11 +209,26 @@ static cl::opt<bool> EnableSyntheticCounts( static Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); +// This option is used in simplifying testing SampleFDO optimizations for +// profile loading. static cl::opt<bool> EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +PipelineTuningOptions::PipelineTuningOptions() { + LoopInterleaving = EnableLoopInterleaving; + LoopVectorization = EnableLoopVectorization; + SLPVectorization = RunSLPVectorization; + LoopUnrolling = true; + ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; +} + extern cl::opt<bool> EnableHotColdSplit; +extern cl::opt<bool> EnableOrderFileInstrumentation; + +extern cl::opt<bool> FlattenedProfileUsed; static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { @@ -371,7 +390,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SROA()); // Catch trivial redundancies - FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA)); + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. if (EnableGVNHoist) @@ -401,7 +420,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. - if (PGOOpt && !PGOOpt->ProfileUseFile.empty() && + if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && !isOptimizingForSize(Level)) FPM.addPass(PGOMemOPSizeOpt()); @@ -432,7 +451,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); - LPM1.addPass(LICMPass()); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); @@ -444,9 +463,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. - if (Phase != ThinLTOPhase::PreLink || - !PGOOpt || PGOOpt->SampleProfileFile.empty()) - LPM2.addPass(LoopFullUnrollPass(Level)); + if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) && + PTO.LoopUnrolling) + LPM2.addPass( + LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); @@ -492,7 +513,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -505,7 +528,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(FPM, Level); if (EnableCHR && Level == O3 && PGOOpt && - (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty())) + (PGOOpt->Action == PGOOptions::IRUse || + PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); return FPM; @@ -513,15 +537,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, PassBuilder::OptimizationLevel Level, - bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification - // at -Os/Oz. - if (!isOptimizingForSize(Level)) { + // at -Os/Oz. We will not do this inline for context sensistive PGO (when + // IsCS is true). + if (!isOptimizingForSize(Level) && !IsCS) { InlineParams IP; // In the old pass manager, this is a cl::opt. Should still this be one? @@ -554,7 +578,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(GlobalDCEPass()); if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen()); + MPM.addPass(PGOInstrumentationGen(IsCS)); FunctionPassManager FPM; FPM.addPass( @@ -563,14 +587,17 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, // Add the profile lowering pass. InstrProfOptions Options; - if (!ProfileGenFile.empty()) - Options.InstrProfileOutput = ProfileGenFile; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; Options.DoCounterPromotion = true; - MPM.addPass(InstrProfiling(Options)); + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); + } else if (!ProfileFile.empty()) { + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); } - - if (!ProfileUseFile.empty()) - MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile)); } static InlineParams @@ -587,6 +614,32 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, bool DebugLogging) { ModulePassManager MPM(DebugLogging); + bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); + + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + bool LoadSampleProfile = + HasSampleProfile && + !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink); + + // During the ThinLTO backend phase we perform early indirect call promotion + // here, before globalopt. Otherwise imported available_externally functions + // look unreferenced and are removed. If we are going to load the sample + // profile then defer until later. + // TODO: See if we can move later and consolidate with the location where + // we perform ICP when we are loading a sample profile. + // TODO: We pass HasSampleProfile (whether there was a sample profile file + // passed to the compile) to the SamplePGO flag of ICP. This is used to + // determine whether the new direct calls are annotated with prof metadata. + // Ideally this should be determined from whether the IR is annotated with + // sample profile, and not whether the a sample profile was provided on the + // command line. E.g. for flattened profiles where we will not be reloading + // the sample profile in the ThinLTO backend, we ideally shouldn't have to + // provide the sample profile file. + if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile) + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); + // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); @@ -607,17 +660,19 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // More details about SamplePGO design can be found in: // https://research.google.com/pubs/pub45290.html // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. - if (PGOOpt && !PGOOpt->SampleProfileFile.empty() && - Phase == ThinLTOPhase::PostLink) + if (LoadSampleProfile) EarlyFPM.addPass(InstCombinePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + if (LoadSampleProfile) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase == ThinLTOPhase::PreLink)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) @@ -626,7 +681,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // imported available_externally functions look unreferenced and are // removed. MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, - true)); + true /* SamplePGO */)); } // Interprocedural constant propagation now that basic cleanup has occurred @@ -664,12 +719,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinLTOPhase::PostLink && - (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile, + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) { + addPGOInstrPasses(MPM, DebugLogging, Level, + /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, + /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); MPM.addPass(PGOIndirectCallPromotion(false, false)); } + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + PGOOpt->CSAction == PGOOptions::CSIRInstr) + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) @@ -700,8 +760,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && - PGOOpt && !PGOOpt->SampleProfileFile.empty()) + if (Phase == ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; MainCGPipeline.addPass(InlinerPass(IP)); @@ -718,11 +778,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); - // We only want to do hot cold splitting once for ThinLTO, during the - // post-link ThinLTO. - if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink) - MPM.addPass(HotColdSplittingPass()); - for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); @@ -738,9 +793,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, return MPM; } -ModulePassManager -PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { ModulePassManager MPM(DebugLogging); // Optimize globals now that the module is fully simplified. @@ -759,14 +813,34 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // available externally globals. Eventually they will be suppressed during // codegen, but eliminating here enables more opportunity for GlobalDCE as it // may make globals referenced by available external functions dead and saves - // running remaining passes on the eliminated functions. - MPM.addPass(EliminateAvailableExternallyPass()); + // running remaining passes on the eliminated functions. These should be + // preserved during prelinking for link-time inlining decisions. + if (!LTOPreLink) + MPM.addPass(EliminateAvailableExternallyPass()); + + if (EnableOrderFileInstrumentation) + MPM.addPass(InstrOrderFilePass()); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as + // cross-module inline has not been done yet. The context sensitive + // instrumentation is after all the inlines are done. + if (!LTOPreLink && PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal @@ -799,7 +873,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(LoopDistributePass()); // Now run the core loop vectorizer. - OptimizePM.addPass(LoopVectorizePass()); + OptimizePM.addPass(LoopVectorizePass( + LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. @@ -824,7 +899,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. - OptimizePM.addPass(SLPVectorizerPass()); + if (PTO.SLPVectorization) + OptimizePM.addPass(SLPVectorizerPass()); OptimizePM.addPass(InstCombinePass()); @@ -839,16 +915,26 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass( createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); } - OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level))); + if (PTO.LoopUnrolling) + OptimizePM.addPass(LoopUnrollPass( + LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. OptimizePM.addPass(AlignmentFromAssumptionsPass()); + // Split out cold code. Splitting is done late to avoid hiding context from + // other optimizations and inadvertently regressing performance. The tradeoff + // is that this has a higher code size cost than splitting early. + if (EnableHotColdSplit && !LTOPreLink) + MPM.addPass(HotColdSplittingPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -869,7 +955,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Optimize PHIs by speculating around them when profitable. Note that this // pass needs to be run after any PRE or similar pass as it is essentially - // inserting redudnancies into the progrem. This even includes SimplifyCFG. + // inserting redundancies into the program. This even includes SimplifyCFG. OptimizePM.addPass(SpeculateAroundPHIsPass()); for (auto &C : OptimizerLastEPCallbacks) @@ -892,7 +978,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, bool LTOPreLink) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -912,7 +998,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, DebugLogging)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink)); return MPM; } @@ -974,22 +1060,19 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( // // Also, WPD has access to more precise information than ICP and can // devirtualize more effectively, so it should operate on the IR first. + // + // The WPD and LowerTypeTest passes need to run at -O0 to lower type + // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); } + if (Level == O0) + return MPM; + // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); - // During the ThinLTO backend phase we perform early indirect call promotion - // here, before globalopt. Otherwise imported available_externally functions - // look unreferenced and are removed. - // FIXME: move this into buildModuleSimplificationPipeline to merge the logic - // with SamplePGO. - if (!PGOOpt || PGOOpt->SampleProfileFile.empty()) - MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, - false /* SamplePGO */)); - // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink, DebugLogging)); @@ -1005,20 +1088,31 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! - return buildPerModuleDefaultPipeline(Level, DebugLogging); + return buildPerModuleDefaultPipeline(Level, DebugLogging, + /* LTOPreLink */true); } ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, ModuleSummaryIndex *ExportSummary) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + if (Level == O0) { + // The WPD and LowerTypeTest passes need to run at -O0 to lower type + // metadata and intrinsics. + MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + return MPM; + } + + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, false /* ThinLTOPhase::PreLink */)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); } // Remove unused virtual tables to improve the quality of code generated by @@ -1042,7 +1136,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. @@ -1062,7 +1156,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); - // Use inragne annotations on GEP indices to split globals where beneficial. + // Use in-range annotations on GEP indices to split globals where beneficial. MPM.addPass(GlobalSplitPass()); // Run whole program optimization of virtual call when the list of callees @@ -1124,9 +1218,26 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, FPM.addPass(JumpThreadingPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. + if (PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Break up allocas FPM.addPass(SROA()); + // LTO provides additional opportunities for tailcall elimination due to + // link-time inlining, and visibility of nocapture attribute. + FPM.addPass(TailCallElimPass()); + // Run a few AA driver optimizations here and now to cleanup the code. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); @@ -1138,7 +1249,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // FIXME: once we fix LoopPass Manager, add LICM here. // FIXME: once we provide support for enabling MLSM, add it here. - // FIXME: once we provide support for enabling NewGVN, add it here. if (RunNewGVN) MainFPM.addPass(NewGVNPass()); else @@ -1151,7 +1261,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, MainFPM.addPass(DSEPass()); // FIXME: at this point, we run a bunch of loop passes: - // indVarSimplify, loopDeletion, loopInterchange, loopUnrool, + // indVarSimplify, loopDeletion, loopInterchange, loopUnroll, // loopVectorize. Enable them once the remaining issue with LPM // are sorted out. @@ -1186,6 +1296,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + // Enable splitting late in the FullLTO post-link pipeline. This is done in + // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). + if (EnableHotColdSplit) + MPM.addPass(HotColdSplittingPass()); + // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); @@ -1196,7 +1311,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, // Now that we have optimized the program, discard unreachable functions. MPM.addPass(GlobalDCEPass()); - // FIXME: Enable MergeFuncs, conditionally, after ported, maybe. + // FIXME: Maybe enable MergeFuncs conditionally after it's ported. return MPM; } @@ -1326,6 +1441,107 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) { return UnrollOpts; } +Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) { + MemorySanitizerOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName == "recover") { + Result.Recover = true; + } else if (ParamName == "kernel") { + Result.Kernel = true; + } else if (ParamName.consume_front("track-origins=")) { + if (ParamName.getAsInteger(0, Result.TrackOrigins)) + return make_error<StringError>( + formatv("invalid argument to MemorySanitizer pass track-origins " + "parameter: '{0}' ", + ParamName) + .str(), + inconvertibleErrorCode()); + } else { + return make_error<StringError>( + formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} + +/// Parser of parameters for SimplifyCFG pass. +Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) { + SimplifyCFGOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "forward-switch-cond") { + Result.forwardSwitchCondToPhi(Enable); + } else if (ParamName == "switch-to-lookup") { + Result.convertSwitchToLookupTable(Enable); + } else if (ParamName == "keep-loops") { + Result.needCanonicalLoops(Enable); + } else if (ParamName == "sink-common-insts") { + Result.sinkCommonInsts(Enable); + } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) { + APInt BonusInstThreshold; + if (ParamName.getAsInteger(0, BonusInstThreshold)) + return make_error<StringError>( + formatv("invalid argument to SimplifyCFG pass bonus-threshold " + "parameter: '{0}' ", + ParamName).str(), + inconvertibleErrorCode()); + Result.bonusInstThreshold(BonusInstThreshold.getSExtValue()); + } else { + return make_error<StringError>( + formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + +/// Parser of parameters for LoopVectorize pass. +Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) { + LoopVectorizeOptions Opts; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "interleave-forced-only") { + Opts.setInterleaveOnlyWhenForced(Enable); + } else if (ParamName == "vectorize-forced-only") { + Opts.setVectorizeOnlyWhenForced(Enable); + } else { + return make_error<StringError>( + formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Opts; +} + +Expected<bool> parseLoopUnswitchOptions(StringRef Params) { + bool Result = false; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "nontrivial") { + Result = Enable; + } else { + return make_error<StringError>( + formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1447,6 +1663,9 @@ static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) \ + return true; #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; @@ -1834,6 +2053,14 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, LPM.addPass(CREATE_PASS); \ return Error::success(); \ } +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + LPM.addPass(CREATE_PASS(Params.get())); \ + return Error::success(); \ + } #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ LPM.addPass(RequireAnalysisPass< \ |