diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp | 236 | 
1 files changed, 173 insertions, 63 deletions
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp index e2b2a2b25268..53b7db8689c4 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@  #include "llvm/Analysis/CFLSteensAliasAnalysis.h"  #include "llvm/Analysis/CGSCCPassManager.h"  #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DDG.h"  #include "llvm/Analysis/DemandedBits.h"  #include "llvm/Analysis/DependenceAnalysis.h"  #include "llvm/Analysis/DominanceFrontier.h" @@ -35,6 +36,7 @@  #include "llvm/Analysis/LazyCallGraph.h"  #include "llvm/Analysis/LazyValueInfo.h"  #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopCacheAnalysis.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Analysis/MemorySSA.h" @@ -51,6 +53,7 @@  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/PreISelIntrinsicLowering.h"  #include "llvm/CodeGen/UnreachableBlockElim.h"  #include "llvm/IR/Dominators.h" @@ -58,6 +61,7 @@  #include "llvm/IR/PassManager.h"  #include "llvm/IR/SafepointIRVerifier.h"  #include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/FormatVariadic.h"  #include "llvm/Support/Regex.h" @@ -82,6 +86,7 @@  #include "llvm/Transforms/IPO/Inliner.h"  #include "llvm/Transforms/IPO/Internalize.h"  #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MergeFunctions.h"  #include "llvm/Transforms/IPO/PartialInlining.h"  #include "llvm/Transforms/IPO/SCCP.h"  #include "llvm/Transforms/IPO/SampleProfile.h" @@ -101,6 +106,7 @@  #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"  #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"  #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"  #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"  #include "llvm/Transforms/Scalar/ADCE.h"  #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" @@ -138,13 +144,15 @@  #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"  #include "llvm/Transforms/Scalar/LoopUnrollPass.h"  #include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"  #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"  #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"  #include "llvm/Transforms/Scalar/LowerWidenableCondition.h"  #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"  #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" -#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"  #include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"  #include "llvm/Transforms/Scalar/NaryReassociate.h"  #include "llvm/Transforms/Scalar/NewGVN.h"  #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" @@ -164,6 +172,7 @@  #include "llvm/Transforms/Utils/BreakCriticalEdges.h"  #include "llvm/Transforms/Utils/CanonicalizeAliases.h"  #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/InjectTLIMappings.h"  #include "llvm/Transforms/Utils/LCSSA.h"  #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"  #include "llvm/Transforms/Utils/LoopSimplify.h" @@ -184,6 +193,11 @@ static cl::opt<bool>                         cl::Hidden, cl::ZeroOrMore,                         cl::desc("Run Partial inlinining pass")); +static cl::opt<int> PreInlineThreshold( +    "npm-preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, +    cl::desc("Control the amount of inlining in pre-instrumentation inliner " +             "(default = 75)")); +  static cl::opt<bool>      RunNewGVN("enable-npm-newgvn", cl::init(false),                cl::Hidden, cl::ZeroOrMore, @@ -206,7 +220,7 @@ static cl::opt<bool> EnableSyntheticCounts(      cl::desc("Run synthetic function entry count generation "               "pass")); -static Regex DefaultAliasRegex( +static const Regex DefaultAliasRegex(      "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");  // This option is used in simplifying testing SampleFDO optimizations for @@ -393,21 +407,25 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));    // Hoisting of scalars and load expressions. -  if (EnableGVNHoist) -    FPM.addPass(GVNHoistPass()); - -  // Global value numbering based sinking. -  if (EnableGVNSink) { -    FPM.addPass(GVNSinkPass()); -    FPM.addPass(SimplifyCFGPass()); +  if (Level > O1) { +    if (EnableGVNHoist) +      FPM.addPass(GVNHoistPass()); + +    // Global value numbering based sinking. +    if (EnableGVNSink) { +      FPM.addPass(GVNSinkPass()); +      FPM.addPass(SimplifyCFGPass()); +    }    }    // Speculative execution if the target has divergent branches; otherwise nop. -  FPM.addPass(SpeculativeExecutionPass()); +  if (Level > O1) { +    FPM.addPass(SpeculativeExecutionPass()); -  // Optimize based on known information about branches, and cleanup afterward. -  FPM.addPass(JumpThreadingPass()); -  FPM.addPass(CorrelatedValuePropagationPass()); +    // Optimize based on known information about branches, and cleanup afterward. +    FPM.addPass(JumpThreadingPass()); +    FPM.addPass(CorrelatedValuePropagationPass()); +  }    FPM.addPass(SimplifyCFGPass());    if (Level == O3)      FPM.addPass(AggressiveInstCombinePass()); @@ -421,10 +439,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    // For PGO use pipeline, try to optimize memory intrinsics such as memcpy    // using the size value profile. Don't perform this when optimizing for size.    if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && -      !isOptimizingForSize(Level)) +      !isOptimizingForSize(Level) && Level > O1)      FPM.addPass(PGOMemOPSizeOpt()); -  FPM.addPass(TailCallElimPass()); +  // TODO: Investigate the cost/benefit of tail call elimination on debugging. +  if (Level > O1) +    FPM.addPass(TailCallElimPass());    FPM.addPass(SimplifyCFGPass());    // Form canonically associated expression trees, and simplify the trees using @@ -451,6 +471,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    // Rotate Loop - disable header duplication at -Oz    LPM1.addPass(LoopRotatePass(Level != Oz)); +  // TODO: Investigate promotion cap for O1.    LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));    LPM1.addPass(SimpleLoopUnswitchPass());    LPM2.addPass(IndVarSimplifyPass()); @@ -466,8 +487,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||         PGOOpt->Action != PGOOptions::SampleUse) &&        PTO.LoopUnrolling) -    LPM2.addPass( -        LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll)); +    LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, +                                    PTO.ForgetAllSCEVInLoopUnroll));    for (auto &C : LoopOptimizerEndEPCallbacks)      C(LPM2, Level); @@ -475,10 +496,18 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    // We provide the opt remark emitter pass for LICM to use. We only need to do    // this once as it is immutable.    FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); -  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); +  FPM.addPass(createFunctionToLoopPassAdaptor( +      std::move(LPM1), EnableMSSALoopDependency, DebugLogging));    FPM.addPass(SimplifyCFGPass());    FPM.addPass(InstCombinePass()); -  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); +  // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass, +  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. +  // *All* loop passes must preserve it, in order to be able to use it. +  FPM.addPass(createFunctionToLoopPassAdaptor( +      std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); + +  // Delete small array after loop unroll. +  FPM.addPass(SROA());    // Eliminate redundancies.    if (Level != O1) { @@ -510,18 +539,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    // Re-consider control flow based optimizations after redundancy elimination,    // redo DCE, etc. -  FPM.addPass(JumpThreadingPass()); -  FPM.addPass(CorrelatedValuePropagationPass()); -  FPM.addPass(DSEPass()); -  FPM.addPass(createFunctionToLoopPassAdaptor( -      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), -      DebugLogging)); +  if (Level > O1) { +    FPM.addPass(JumpThreadingPass()); +    FPM.addPass(CorrelatedValuePropagationPass()); +    FPM.addPass(DSEPass()); +    FPM.addPass(createFunctionToLoopPassAdaptor( +        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), +        EnableMSSALoopDependency, DebugLogging)); +  }    for (auto &C : ScalarOptimizerLateEPCallbacks)      C(FPM, Level);    // Finally, do an expensive DCE pass to catch all the dead code exposed by    // the simplifications and basic cleanup after all the simplifications. +  // TODO: Investigate if this is too expensive.    FPM.addPass(ADCEPass());    FPM.addPass(SimplifyCFGPass());    FPM.addPass(InstCombinePass()); @@ -540,6 +572,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,                                      bool RunProfileGen, bool IsCS,                                      std::string ProfileFile,                                      std::string ProfileRemappingFile) { +  assert(Level != O0 && "Not expecting O0 here!");    // Generally running simplification passes and the inliner with an high    // threshold results in smaller executables, but there may be cases where    // the size grows, so let's be conservative here and skip this simplification @@ -548,8 +581,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,    if (!isOptimizingForSize(Level) && !IsCS) {      InlineParams IP; -    // In the old pass manager, this is a cl::opt. Should still this be one? -    IP.DefaultThreshold = 75; +    IP.DefaultThreshold = PreInlineThreshold;      // FIXME: The hint threshold has the same value used by the regular inliner.      // This should probably be lowered after performance testing. @@ -570,34 +602,63 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,      CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + +    // Delete anything that is now dead to make sure that we don't instrument +    // dead code. Instrumentation can end up keeping dead code around and +    // dramatically increase code size. +    MPM.addPass(GlobalDCEPass());    } -  // Delete anything that is now dead to make sure that we don't instrument -  // dead code. Instrumentation can end up keeping dead code around and -  // dramatically increase code size. -  MPM.addPass(GlobalDCEPass()); +  if (!RunProfileGen) { +    assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); +    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); +    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert +    // RequireAnalysisPass for PSI before subsequent non-module passes. +    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); +    return; +  } -  if (RunProfileGen) { -    MPM.addPass(PGOInstrumentationGen(IsCS)); +  // Perform PGO instrumentation. +  MPM.addPass(PGOInstrumentationGen(IsCS)); -    FunctionPassManager FPM; -    FPM.addPass( -        createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); -    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - -    // Add the profile lowering pass. -    InstrProfOptions Options; -    if (!ProfileFile.empty()) -      Options.InstrProfileOutput = ProfileFile; -    Options.DoCounterPromotion = true; -    Options.UseBFIInPromotion = IsCS; -    MPM.addPass(InstrProfiling(Options, IsCS)); -  } else if (!ProfileFile.empty()) { +  FunctionPassManager FPM; +  FPM.addPass(createFunctionToLoopPassAdaptor( +      LoopRotatePass(), EnableMSSALoopDependency, DebugLogging)); +  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + +  // Add the profile lowering pass. +  InstrProfOptions Options; +  if (!ProfileFile.empty()) +    Options.InstrProfileOutput = ProfileFile; +  // Do counter promotion at Level greater than O0. +  Options.DoCounterPromotion = true; +  Options.UseBFIInPromotion = IsCS; +  MPM.addPass(InstrProfiling(Options, IsCS)); +} + +void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, +                                         bool DebugLogging, bool RunProfileGen, +                                         bool IsCS, std::string ProfileFile, +                                         std::string ProfileRemappingFile) { +  if (!RunProfileGen) { +    assert(!ProfileFile.empty() && "Profile use expecting a profile file!");      MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));      // Cache ProfileSummaryAnalysis once to avoid the potential need to insert      // RequireAnalysisPass for PSI before subsequent non-module passes.      MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); +    return;    } + +  // Perform PGO instrumentation. +  MPM.addPass(PGOInstrumentationGen(IsCS)); +  // Add the profile lowering pass. +  InstrProfOptions Options; +  if (!ProfileFile.empty()) +    Options.InstrProfileOutput = ProfileFile; +  // Do not do counter promotion at O0. +  Options.DoCounterPromotion = false; +  Options.UseBFIInPromotion = IsCS; +  MPM.addPass(InstrProfiling(Options, IsCS));  }  static InlineParams @@ -852,6 +913,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(    FunctionPassManager OptimizePM(DebugLogging);    OptimizePM.addPass(Float2IntPass()); +  OptimizePM.addPass(LowerConstantIntrinsicsPass()); +    // FIXME: We need to run some loop optimizations to re-rotate loops after    // simplify-cfg and others undo their rotation. @@ -863,8 +926,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(      C(OptimizePM, Level);    // First rotate loops that may have been un-rotated by prior passes. -  OptimizePM.addPass( -      createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); +  OptimizePM.addPass(createFunctionToLoopPassAdaptor( +      LoopRotatePass(), EnableMSSALoopDependency, DebugLogging));    // Distribute loops to allow partial vectorization.  I.e. isolate dependences    // into separate loop that would otherwise inhibit vectorization.  This is @@ -911,19 +974,18 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(    // combiner for cleanup here so that the unrolling and LICM can be pipelined    // across the loop nests.    // We do UnrollAndJam in a separate LPM to ensure it happens before unroll -  if (EnableUnrollAndJam) { -    OptimizePM.addPass( -        createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); +  if (EnableUnrollAndJam && PTO.LoopUnrolling) { +    OptimizePM.addPass(LoopUnrollAndJamPass(Level));    } -  if (PTO.LoopUnrolling) -    OptimizePM.addPass(LoopUnrollPass( -        LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll))); +  OptimizePM.addPass(LoopUnrollPass( +      LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, +                        PTO.ForgetAllSCEVInLoopUnroll)));    OptimizePM.addPass(WarnMissedTransformationsPass());    OptimizePM.addPass(InstCombinePass());    OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());    OptimizePM.addPass(createFunctionToLoopPassAdaptor(        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), -      DebugLogging)); +      EnableMSSALoopDependency, DebugLogging));    // Now that we've vectorized and unrolled loops, we may have more refined    // alignment information, try to re-derive it here. @@ -1403,7 +1465,7 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,    Expected<ParametersT> Result = Parser(Params);    assert((Result || Result.template errorIsA<StringError>()) &&           "Pass parameter parser can only return StringErrors."); -  return std::move(Result); +  return Result;  }  /// Parser of parameters for LoopUnroll pass. @@ -1422,12 +1484,23 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {        UnrollOpts.setOptLevel(OptLevel);        continue;      } +    if (ParamName.consume_front("full-unroll-max=")) { +      int Count; +      if (ParamName.getAsInteger(0, Count)) +        return make_error<StringError>( +            formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), +            inconvertibleErrorCode()); +      UnrollOpts.setFullUnrollMaxCount(Count); +      continue; +    }      bool Enable = !ParamName.consume_front("no-");      if (ParamName == "partial") {        UnrollOpts.setPartial(Enable);      } else if (ParamName == "peeling") {        UnrollOpts.setPeeling(Enable); +    } else if (ParamName == "profile-peeling") { +      UnrollOpts.setProfileBasedPeeling(Enable);      } else if (ParamName == "runtime") {        UnrollOpts.setRuntime(Enable);      } else if (ParamName == "upperbound") { @@ -1542,6 +1615,26 @@ Expected<bool> parseLoopUnswitchOptions(StringRef Params) {    }    return Result;  } + +Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) { +  bool Result = false; +  while (!Params.empty()) { +    StringRef ParamName; +    std::tie(ParamName, Params) = Params.split(';'); + +    bool Enable = !ParamName.consume_front("no-"); +    if (ParamName == "split-footer-bb") { +      Result = Enable; +    } else { +      return make_error<StringError>( +          formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ", +                  ParamName) +              .str(), +          inconvertibleErrorCode()); +    } +  } +  return Result; +}  } // namespace  /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1629,7 +1722,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {    // Explicitly handle pass manager names.    if (Name == "function")      return true; -  if (Name == "loop") +  if (Name == "loop" || Name == "loop-mssa")      return true;    // Explicitly handle custom-parsed pass names. @@ -1653,7 +1746,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {  template <typename CallbacksT>  static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {    // Explicitly handle pass manager names. -  if (Name == "loop") +  if (Name == "loop" || Name == "loop-mssa")      return true;    // Explicitly handle custom-parsed pass names. @@ -1800,9 +1893,25 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,                                .Case("O3", O3)                                .Case("Os", Os)                                .Case("Oz", Oz); -    if (L == O0) -      // At O0 we do nothing at all! +    if (L == O0) { +      // Add instrumentation PGO passes -- at O0 we can still do PGO. +      if (PGOOpt && Matches[1] != "thinlto" && +          (PGOOpt->Action == PGOOptions::IRInstr || +           PGOOpt->Action == PGOOptions::IRUse)) +        addPGOInstrPassesForO0( +            MPM, DebugLogging, +            /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), +            /* IsCS */ false, PGOOpt->ProfileFile, +            PGOOpt->ProfileRemappingFile); +      // Do nothing else at all!        return Error::success(); +    } + +    // This is consistent with old pass manager invoked via opt, but +    // inconsistent with clang. Clang doesn't enable loop vectorization +    // but does enable slp vectorization at Oz. +    PTO.LoopVectorization = L > O1 && L < Oz; +    PTO.SLPVectorization = L > O1 && L < Oz;      if (Matches[1] == "default") {        MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); @@ -1947,14 +2056,15 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,        FPM.addPass(std::move(NestedFPM));        return Error::success();      } -    if (Name == "loop") { +    if (Name == "loop" || Name == "loop-mssa") {        LoopPassManager LPM(DebugLogging);        if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,                                             DebugLogging))          return Err;        // Add the nested pass manager with the appropriate adaptor. -      FPM.addPass( -          createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); +      bool UseMemorySSA = (Name == "loop-mssa"); +      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, +                                                  DebugLogging));        return Error::success();      }      if (auto Count = parseRepeatPassName(Name)) {  | 
