diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
16 files changed, 287 insertions, 130 deletions
| diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp index d5411d916c77..cd5314e7a17a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp @@ -133,8 +133,6 @@ static cl::opt<bool> DisableGEPConstOperand(      cl::desc("Disables evaluation of GetElementPtr with constant operands"));  namespace { -class InlineCostCallAnalyzer; -  /// This function behaves more like CallBase::hasFnAttr: when it looks for the  /// requested attribute, it check both the call instruction and the called  /// function (if it's available and operand bundles don't prohibit that). @@ -151,7 +149,9 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {    return {};  } +} // namespace +namespace llvm {  Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {    Attribute Attr = getFnAttr(CB, AttrKind);    int AttrValue; @@ -159,6 +159,10 @@ Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {      return None;    return AttrValue;  } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer;  // This struct is used to store information about inline cost of a  // particular instruction @@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {              getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))        Cost = *AttrCost; +    if (Optional<int> AttrCostMult = getStringFnAttrAsInt( +            CandidateCall, +            InlineConstants::FunctionInlineCostMultiplierAttributeName)) +      Cost *= *AttrCostMult; +      if (Optional<int> AttrThreshold =              getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))        Threshold = *AttrThreshold; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..bc03776bde19 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -18,12 +18,14 @@  #include "llvm/ADT/DenseSet.h"  #include "llvm/ADT/MapVector.h"  #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/SparseBitVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"  #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    RegClassInfo.runOnMachineFunction(MF); +  // MachineSink currently uses MachineLoopInfo, which only recognizes natural +  // loops. As such, we could sink instructions into irreducible cycles, which +  // would be non-profitable. +  // WARNING: The current implementation of hasStoreBetween() is incorrect for +  // sinking into irreducible cycles (PR53990), this bailout is currently +  // necessary for correctness, not just profitability. +  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); +  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI)) +    return false; +    bool EverMadeChange = false;    while (true) { diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp index 636c1d238932..a016b7085a00 100644 --- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp @@ -140,36 +140,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {  }  #endif -// Write X as an (unsigned) LEB value at offset Offset in Stream, padded +// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded  // to allow patching. -template <int W> -void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +template <typename T, int W> +void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {    uint8_t Buffer[W]; -  unsigned SizeLen = encodeULEB128(X, Buffer, W); +  unsigned SizeLen = encodeULEB128(Value, Buffer, W);    assert(SizeLen == W);    Stream.pwrite((char *)Buffer, SizeLen, Offset);  } -// Write X as an signed LEB value at offset Offset in Stream, padded +// Write Value as an signed LEB value at offset Offset in Stream, padded  // to allow patching. -template <int W> -void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) { +template <typename T, int W> +void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {    uint8_t Buffer[W]; -  unsigned SizeLen = encodeSLEB128(X, Buffer, W); +  unsigned SizeLen = encodeSLEB128(Value, Buffer, W);    assert(SizeLen == W);    Stream.pwrite((char *)Buffer, SizeLen, Offset);  } -// Write X as a plain integer value at offset Offset in Stream. -static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value, +                              uint64_t Offset) { +  writePatchableULEB<uint32_t, 5>(Stream, Value, Offset); +} + +static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value, +                              uint64_t Offset) { +  writePatchableSLEB<int32_t, 5>(Stream, Value, Offset); +} + +static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value, +                              uint64_t Offset) { +  writePatchableSLEB<uint64_t, 10>(Stream, Value, Offset); +} + +static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value, +                              uint64_t Offset) { +  writePatchableSLEB<int64_t, 10>(Stream, Value, Offset); +} + +// Write Value as a plain integer value at offset Offset in Stream. +static void patchI32(raw_pwrite_stream &Stream, uint32_t Value, +                     uint64_t Offset) {    uint8_t Buffer[4]; -  support::endian::write32le(Buffer, X); +  support::endian::write32le(Buffer, Value);    Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);  } -static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +static void patchI64(raw_pwrite_stream &Stream, uint64_t Value, +                     uint64_t Offset) {    uint8_t Buffer[8]; -  support::endian::write64le(Buffer, X); +  support::endian::write64le(Buffer, Value);    Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);  } @@ -423,8 +445,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {    // Write the final section size to the payload_len field, which follows    // the section id byte. -  writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W->OS), Size, -                       Section.SizeOffset); +  writePatchableU32(static_cast<raw_pwrite_stream &>(W->OS), Size, +                    Section.SizeOffset);  }  // Emit the Wasm header. @@ -755,7 +777,7 @@ void WasmObjectWriter::applyRelocations(                        RelEntry.Offset;      LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); -    auto Value = getProvisionalValue(RelEntry, Layout); +    uint64_t Value = getProvisionalValue(RelEntry, Layout);      switch (RelEntry.Type) {      case wasm::R_WASM_FUNCTION_INDEX_LEB: @@ -764,10 +786,10 @@ void WasmObjectWriter::applyRelocations(      case wasm::R_WASM_MEMORY_ADDR_LEB:      case wasm::R_WASM_TAG_INDEX_LEB:      case wasm::R_WASM_TABLE_NUMBER_LEB: -      writePatchableLEB<5>(Stream, Value, Offset); +      writePatchableU32(Stream, Value, Offset);        break;      case wasm::R_WASM_MEMORY_ADDR_LEB64: -      writePatchableLEB<10>(Stream, Value, Offset); +      writePatchableU64(Stream, Value, Offset);        break;      case wasm::R_WASM_TABLE_INDEX_I32:      case wasm::R_WASM_MEMORY_ADDR_I32: @@ -787,14 +809,14 @@ void WasmObjectWriter::applyRelocations(      case wasm::R_WASM_MEMORY_ADDR_SLEB:      case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:      case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: -      writePatchableSLEB<5>(Stream, Value, Offset); +      writePatchableS32(Stream, Value, Offset);        break;      case wasm::R_WASM_TABLE_INDEX_SLEB64:      case wasm::R_WASM_TABLE_INDEX_REL_SLEB64:      case wasm::R_WASM_MEMORY_ADDR_SLEB64:      case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:      case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: -      writePatchableSLEB<10>(Stream, Value, Offset); +      writePatchableS64(Stream, Value, Offset);        break;      default:        llvm_unreachable("invalid relocation type"); diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp index 015ca1eec4df..dedfc81f11bb 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp @@ -679,6 +679,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {      bool Enable = !ParamName.consume_front("no-");      if (ParamName == "forward-switch-cond") {        Result.forwardSwitchCondToPhi(Enable); +    } else if (ParamName == "switch-range-to-icmp") { +      Result.convertSwitchRangeToICmp(Enable);      } else if (ParamName == "switch-to-lookup") {        Result.convertSwitchToLookupTable(Enable);      } else if (ParamName == "keep-loops") { diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp index 93637c890c4f..e838665eb9ce 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -259,14 +259,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));    // Hoisting of scalars and load expressions. -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    FPM.addPass(InstCombinePass());    FPM.addPass(LibCallsShrinkWrapPass());    invokePeepholeEPCallbacks(FPM, Level); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    // Form canonically associated expression trees, and simplify the trees using    // basic mathematical properties. For example, this will form (nearly) @@ -291,14 +293,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,    LPM1.addPass(LoopSimplifyCFGPass());    // Try to remove as much code from the loop header as possible, -  // to reduce amount of IR that will have to be duplicated. +  // to reduce amount of IR that will have to be duplicated. However, +  // do not perform speculative hoisting the first time as LICM +  // will destroy metadata that may not need to be destroyed if run +  // after loop rotation.    // TODO: Investigate promotion cap for O1. -  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); +  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                        /*AllowSpeculation=*/false));    LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,                                isLTOPreLink(Phase)));    // TODO: Investigate promotion cap for O1. -  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); +  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                        /*AllowSpeculation=*/true));    LPM1.addPass(SimpleLoopUnswitchPass());    if (EnableLoopFlatten)      LPM1.addPass(LoopFlattenPass()); @@ -335,7 +342,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),                                                /*UseMemorySSA=*/true,                                                /*UseBlockFrequencyInfo=*/true)); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    FPM.addPass(InstCombinePass());    // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.    // *All* loop passes must preserve it, in order to be able to use it. @@ -373,7 +381,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,    // the simplifications and basic cleanup after all the simplifications.    // TODO: Investigate if this is too expensive.    FPM.addPass(ADCEPass()); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    FPM.addPass(InstCombinePass());    invokePeepholeEPCallbacks(FPM, Level); @@ -408,7 +417,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    // Global value numbering based sinking.    if (EnableGVNSink) {      FPM.addPass(GVNSinkPass()); -    FPM.addPass(SimplifyCFGPass()); +    FPM.addPass( +        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    }    if (EnableConstraintElimination) @@ -421,7 +431,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(JumpThreadingPass());    FPM.addPass(CorrelatedValuePropagationPass()); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    FPM.addPass(InstCombinePass());    if (Level == OptimizationLevel::O3)      FPM.addPass(AggressiveInstCombinePass()); @@ -438,7 +449,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,      FPM.addPass(PGOMemOPSizeOpt());    FPM.addPass(TailCallElimPass()); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    // Form canonically associated expression trees, and simplify the trees using    // basic mathematical properties. For example, this will form (nearly) @@ -463,15 +475,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    LPM1.addPass(LoopSimplifyCFGPass());    // Try to remove as much code from the loop header as possible, -  // to reduce amount of IR that will have to be duplicated. +  // to reduce amount of IR that will have to be duplicated. However, +  // do not perform speculative hoisting the first time as LICM +  // will destroy metadata that may not need to be destroyed if run +  // after loop rotation.    // TODO: Investigate promotion cap for O1. -  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); +  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                        /*AllowSpeculation=*/false));    // Disable header duplication in loop rotation at -Oz.    LPM1.addPass(        LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));    // TODO: Investigate promotion cap for O1. -  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); +  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                        /*AllowSpeculation=*/true));    LPM1.addPass(        SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&                               EnableO3NonTrivialUnswitching)); @@ -510,7 +527,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),                                                /*UseMemorySSA=*/true,                                                /*UseBlockFrequencyInfo=*/true)); -  FPM.addPass(SimplifyCFGPass()); +  FPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    FPM.addPass(InstCombinePass());    // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,    // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. @@ -567,7 +585,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    FPM.addPass(DSEPass());    FPM.addPass(createFunctionToLoopPassAdaptor( -      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), +      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +               /*AllowSpeculation=*/true),        /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));    FPM.addPass(CoroElidePass()); @@ -575,8 +594,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,    for (auto &C : ScalarOptimizerLateEPCallbacks)      C(FPM, Level); -  FPM.addPass(SimplifyCFGPass( -      SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); +  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() +                                  .convertSwitchRangeToICmp(true) +                                  .hoistCommonInsts(true) +                                  .sinkCommonInsts(true)));    FPM.addPass(InstCombinePass());    invokePeepholeEPCallbacks(FPM, Level); @@ -614,7 +635,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,      FunctionPassManager FPM;      FPM.addPass(SROAPass());      FPM.addPass(EarlyCSEPass());    // Catch trivial redundancies. -    FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. +    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( +        true)));                    // Merge & remove basic blocks.      FPM.addPass(InstCombinePass()); // Combine silly sequences.      invokePeepholeEPCallbacks(FPM, Level); @@ -928,7 +950,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,    GlobalCleanupPM.addPass(InstCombinePass());    invokePeepholeEPCallbacks(GlobalCleanupPM, Level); -  GlobalCleanupPM.addPass(SimplifyCFGPass()); +  GlobalCleanupPM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),                                                  PTO.EagerlyInvalidateAnalyses)); @@ -1007,7 +1030,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,      ExtraPasses.addPass(CorrelatedValuePropagationPass());      ExtraPasses.addPass(InstCombinePass());      LoopPassManager LPM; -    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); +    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                         /*AllowSpeculation=*/true));      LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==                                         OptimizationLevel::O3));      ExtraPasses.addPass( @@ -1015,7 +1039,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,      ExtraPasses.addPass(          createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,                                          /*UseBlockFrequencyInfo=*/true)); -    ExtraPasses.addPass(SimplifyCFGPass()); +    ExtraPasses.addPass( +        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));      ExtraPasses.addPass(InstCombinePass());      FPM.addPass(std::move(ExtraPasses));    } @@ -1031,6 +1056,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,    // before SLP vectorization.    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()                                    .forwardSwitchCondToPhi(true) +                                  .convertSwitchRangeToICmp(true)                                    .convertSwitchToLookupTable(true)                                    .needCanonicalLoops(false)                                    .hoistCommonInsts(true) @@ -1073,7 +1099,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,      FPM.addPass(          RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());      FPM.addPass(createFunctionToLoopPassAdaptor( -        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), +        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +                 /*AllowSpeculation=*/true),          /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));    } @@ -1202,7 +1229,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,    // LoopSink (and other loop passes since the last simplifyCFG) might have    // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. -  OptimizePM.addPass(SimplifyCFGPass()); +  OptimizePM.addPass( +      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    OptimizePM.addPass(CoroCleanupPass()); @@ -1612,7 +1640,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,    FunctionPassManager MainFPM;    MainFPM.addPass(createFunctionToLoopPassAdaptor( -      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), +      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, +               /*AllowSpeculation=*/true),        /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));    if (RunNewGVN) @@ -1676,8 +1705,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,    // Add late LTO optimization passes.    // Delete basic blocks, which optimization passes may have killed. -  MPM.addPass(createModuleToFunctionPassAdaptor( -      SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)))); +  MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass( +      SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( +          true))));    // Drop bodies of available eternally objects to improve GlobalDCE.    MPM.addPass(EliminateAvailableExternallyPass()); diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def index 8e0af11b854d..69d8d8c43267 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def +++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def @@ -423,6 +423,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg",                             },                            parseSimplifyCFGOptions,                            "no-forward-switch-cond;forward-switch-cond;" +                          "no-switch-range-to-icmp;switch-range-to-icmp;"                            "no-switch-to-lookup;switch-to-lookup;"                            "no-keep-loops;keep-loops;"                            "no-hoist-common-insts;hoist-common-insts;" diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4af28fc070dd..6a751da7ad55 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -531,6 +531,7 @@ void AArch64PassConfig::addIRPasses() {    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)      addPass(createCFGSimplificationPass(SimplifyCFGOptions()                                              .forwardSwitchCondToPhi(true) +                                            .convertSwitchRangeToICmp(true)                                              .convertSwitchToLookupTable(true)                                              .needCanonicalLoops(false)                                              .hoistCommonInsts(true) diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0ba75a544c04..14b4f7c56c57 100755 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -118,9 +118,10 @@ HexagonTargetLowering::initializeHVXLowering() {      setOperationAction(ISD::SPLAT_VECTOR,      MVT::v32f32, Legal);      // Vector shuffle is always promoted to ByteV and a bitcast to f16 is      // generated. -    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); -    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); -    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); +    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); +    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16,  ByteV); +    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32,  ByteW); +    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32,  ByteV);      // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-      // independent) handling of it would convert it to a load, which is @@ -780,7 +781,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,    SDValue N = HalfV0;    SDValue M = HalfV1;    for (unsigned i = 0; i != NumWords/2; ++i) { -      // Rotate by element count since last insertion.      if (Words[i] != Words[n] || VecHist[n] <= 1) {        Sn = DAG.getConstant(Rn, dl, MVT::i32); diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index c6703bb8a62a..08acf81961a3 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -345,6 +345,7 @@ void HexagonPassConfig::addIRPasses() {      if (EnableInitialCFGCleanup)        addPass(createCFGSimplificationPass(SimplifyCFGOptions()                                                .forwardSwitchCondToPhi(true) +                                              .convertSwitchRangeToICmp(true)                                                .convertSwitchToLookupTable(true)                                                .needCanonicalLoops(false)                                                .hoistCommonInsts(true) diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0c2e129b8f1f..8534a0ad886e 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4732,18 +4732,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,  Register  MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,                                        const MachineFunction &MF) const { -  // Named registers is expected to be fairly rare. For now, just support $28 -  // since the linux kernel uses it. +  // The Linux kernel uses $28 and sp.    if (Subtarget.isGP64bit()) {      Register Reg = StringSwitch<Register>(RegName) -                         .Case("$28", Mips::GP_64) -                         .Default(Register()); +                       .Case("$28", Mips::GP_64) +                       .Case("sp", Mips::SP_64) +                       .Default(Register());      if (Reg)        return Reg;    } else {      Register Reg = StringSwitch<Register>(RegName) -                         .Case("$28", Mips::GP) -                         .Default(Register()); +                       .Case("$28", Mips::GP) +                       .Case("sp", Mips::SP) +                       .Default(Register());      if (Reg)        return Reg;    } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp index 49babc24cb82..10abea7ebd32 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp @@ -22,6 +22,7 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/StringRef.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -92,6 +93,18 @@ static cl::opt<bool>      DisableInlinedAllocaMerging("disable-inlined-alloca-merging",                                  cl::init(false), cl::Hidden); +static cl::opt<int> IntraSCCCostMultiplier( +    "intra-scc-cost-multiplier", cl::init(2), cl::Hidden, +    cl::desc( +        "Cost multiplier to multiply onto inlined call sites where the " +        "new call was previously an intra-SCC call (not relevant when the " +        "original call was already intra-SCC). This can accumulate over " +        "multiple inlinings (e.g. if a call site already had a cost " +        "multiplier and one of its inlined calls was also subject to " +        "this, the inlined call would have the original multiplier " +        "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " +        "inlining through a child SCC which can cause terrible compile times")); +  /// A flag for test, so we can print the content of the advisor when running it  /// as part of the default (e.g. -O3) pipeline.  static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing", @@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,        // trigger infinite inlining, much like is prevented within the inliner        // itself by the InlineHistory above, but spread across CGSCC iterations        // and thus hidden from the full inline history. -      if (CG.lookupSCC(*CG.lookup(Callee)) == C && -          UR.InlinedInternalEdges.count({&N, C})) { +      LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); +      if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {          LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "                               "previously split out of this SCC by inlining: "                            << F.getName() << " -> " << Callee.getName() << "\n"); @@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,          continue;        } +      int CBCostMult = +          getStringFnAttrAsInt( +              *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) +              .getValueOr(1); +        // Setup the data structure used to plumb customization into the        // `InlineFunction` routine.        InlineFunctionInfo IFI( @@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,              if (tryPromoteCall(*ICB))                NewCallee = ICB->getCalledFunction();            } -          if (NewCallee) -            if (!NewCallee->isDeclaration()) +          if (NewCallee) { +            if (!NewCallee->isDeclaration()) {                Calls->push({ICB, NewHistoryID}); +              // Continually inlining through an SCC can result in huge compile +              // times and bloated code since we arbitrarily stop at some point +              // when the inliner decides it's not profitable to inline anymore. +              // We attempt to mitigate this by making these calls exponentially +              // more expensive. +              // This doesn't apply to calls in the same SCC since if we do +              // inline through the SCC the function will end up being +              // self-recursive which the inliner bails out on, and inlining +              // within an SCC is necessary for performance. +              if (CalleeSCC != C && +                  CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { +                Attribute NewCBCostMult = Attribute::get( +                    M.getContext(), +                    InlineConstants::FunctionInlineCostMultiplierAttributeName, +                    itostr(CBCostMult * IntraSCCCostMultiplier)); +                ICB->addFnAttr(NewCBCostMult); +              } +            } +          }          }        } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 5113c0c67acc..7205ae178d21 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3712,9 +3712,9 @@ struct AAKernelInfoFunction : AAKernelInfo {      //                         __kmpc_get_hardware_num_threads_in_block();      //                       WarpSize = __kmpc_get_warp_size();      //                       BlockSize = BlockHwSize - WarpSize; -    //                       if (InitCB >= BlockSize) return; -    // IsWorkerCheckBB:      bool IsWorker = InitCB >= 0; +    // IsWorkerCheckBB:      bool IsWorker = InitCB != -1;      //                       if (IsWorker) { +    //                         if (InitCB >= BlockSize) return;      // SMBeginBB:               __kmpc_barrier_simple_generic(...);      //                         void *WorkFn;      //                         bool Active = __kmpc_kernel_parallel(&WorkFn); @@ -3771,6 +3771,13 @@ struct AAKernelInfoFunction : AAKernelInfo {      ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);      InitBB->getTerminator()->eraseFromParent(); +    Instruction *IsWorker = +        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, +                         ConstantInt::get(KernelInitCB->getType(), -1), +                         "thread.is_worker", InitBB); +    IsWorker->setDebugLoc(DLoc); +    BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); +      Module &M = *Kernel->getParent();      auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());      FunctionCallee BlockHwSizeFn = @@ -3780,29 +3787,22 @@ struct AAKernelInfoFunction : AAKernelInfo {          OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(              M, OMPRTL___kmpc_get_warp_size);      CallInst *BlockHwSize = -        CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); +        CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);      OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);      BlockHwSize->setDebugLoc(DLoc); -    CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); +    CallInst *WarpSize = +        CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);      OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);      WarpSize->setDebugLoc(DLoc); -    Instruction *BlockSize = -        BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); +    Instruction *BlockSize = BinaryOperator::CreateSub( +        BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);      BlockSize->setDebugLoc(DLoc); -    Instruction *IsMainOrWorker = -        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, -                         BlockSize, "thread.is_main_or_worker", InitBB); +    Instruction *IsMainOrWorker = ICmpInst::Create( +        ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, +        "thread.is_main_or_worker", IsWorkerCheckBB);      IsMainOrWorker->setDebugLoc(DLoc); -    BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, -                       InitBB); - -    Instruction *IsWorker = -        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, -                         ConstantInt::get(KernelInitCB->getType(), -1), -                         "thread.is_worker", IsWorkerCheckBB); -    IsWorker->setDebugLoc(DLoc); -    BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, -                       IsWorkerCheckBB); +    BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, +                       IsMainOrWorker, IsWorkerCheckBB);      // Create local storage for the work function pointer.      const DataLayout &DL = M.getDataLayout(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 74f68531b89a..6e5aeb9c41f6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -365,7 +365,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,      MPM.add(createFunctionInliningPass(IP));      MPM.add(createSROAPass());      MPM.add(createEarlyCSEPass());             // Catch trivial redundancies -    MPM.add(createCFGSimplificationPass());    // Merge & remove BBs +    MPM.add(createCFGSimplificationPass( +        SimplifyCFGOptions().convertSwitchRangeToICmp( +            true)));                           // Merge & remove BBs      MPM.add(createInstructionCombiningPass()); // Combine silly seq's      addExtensionsToPM(EP_Peephole, MPM);    } @@ -404,7 +406,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(        MPM.add(createGVNHoistPass());      if (EnableGVNSink) {        MPM.add(createGVNSinkPass()); -      MPM.add(createCFGSimplificationPass()); +      MPM.add(createCFGSimplificationPass( +          SimplifyCFGOptions().convertSwitchRangeToICmp(true)));      }    } @@ -418,7 +421,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(      MPM.add(createJumpThreadingPass());         // Thread jumps.      MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals    } -  MPM.add(createCFGSimplificationPass());     // Merge & remove BBs +  MPM.add( +      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( +          true))); // Merge & remove BBs    // Combine silly seq's    if (OptLevel > 2)      MPM.add(createAggressiveInstCombinerPass()); @@ -434,7 +439,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(    // TODO: Investigate the cost/benefit of tail call elimination on debugging.    if (OptLevel > 1)      MPM.add(createTailCallEliminationPass()); // Eliminate tail calls -  MPM.add(createCFGSimplificationPass());      // Merge & remove BBs +  MPM.add( +      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( +          true)));                            // Merge & remove BBs    MPM.add(createReassociatePass());           // Reassociate expressions    // The matrix extension can introduce large vector operations early, which can @@ -451,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses(      MPM.add(createLoopSimplifyCFGPass());    }    // Try to remove as much code from the loop header as possible, -  // to reduce amount of IR that will have to be duplicated. +  // to reduce amount of IR that will have to be duplicated. However, +  // do not perform speculative hoisting the first time as LICM +  // will destroy metadata that may not need to be destroyed if run +  // after loop rotation.    // TODO: Investigate promotion cap for O1. -  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                         /*AllowSpeculation=*/false));    // Rotate Loop - disable header duplication at -Oz    MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));    // TODO: Investigate promotion cap for O1. -  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                         /*AllowSpeculation=*/true));    if (EnableSimpleLoopUnswitch)      MPM.add(createSimpleLoopUnswitchLegacyPass());    else @@ -465,7 +477,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(    // FIXME: We break the loop pass pipeline here in order to do full    // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the    // need for this. -  MPM.add(createCFGSimplificationPass()); +  MPM.add(createCFGSimplificationPass( +      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    MPM.add(createInstructionCombiningPass());    // We resume loop passes creating a second loop pipeline here.    if (EnableLoopFlatten) { @@ -521,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(    // TODO: Investigate if this is too expensive at O1.    if (OptLevel > 1) {      MPM.add(createDeadStoreEliminationPass());  // Delete dead stores -    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                           /*AllowSpeculation=*/true));    }    addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -580,9 +594,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,      PM.add(createEarlyCSEPass());      PM.add(createCorrelatedValuePropagationPass());      PM.add(createInstructionCombiningPass()); -    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                          /*AllowSpeculation=*/true));      PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); -    PM.add(createCFGSimplificationPass()); +    PM.add(createCFGSimplificationPass( +        SimplifyCFGOptions().convertSwitchRangeToICmp(true)));      PM.add(createInstructionCombiningPass());    } @@ -597,6 +613,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,    // before SLP vectorization.    PM.add(createCFGSimplificationPass(SimplifyCFGOptions()                                           .forwardSwitchCondToPhi(true) +                                         .convertSwitchRangeToICmp(true)                                           .convertSwitchToLookupTable(true)                                           .needCanonicalLoops(false)                                           .hoistCommonInsts(true) @@ -641,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,        // unrolled loop is a inner loop, then the prologue will be inside the        // outer loop. LICM pass can help to promote the runtime check out if the        // checked value is loop invariant. -      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                            /*AllowSpeculation=*/true));      }      PM.add(createWarnMissedTransformationsPass()); @@ -772,7 +790,9 @@ void PassManagerBuilder::populateModulePassManager(    MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE    addExtensionsToPM(EP_Peephole, MPM); -  MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE +  MPM.add( +      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( +          true))); // Clean up after IPCP & DAE    // For SamplePGO in ThinLTO compile phase, we do not want to do indirect    // call promotion as it will change the CFG too much to make the 2nd @@ -886,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager(    // later might get benefit of no-alias assumption in clone loop.    if (UseLoopVersioningLICM) {      MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM -    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                           /*AllowSpeculation=*/true));    }    // We add a fresh GlobalsModRef run at this point. This is particularly @@ -972,7 +993,8 @@ void PassManagerBuilder::populateModulePassManager(    // LoopSink (and other loop passes since the last simplifyCFG) might have    // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. -  MPM.add(createCFGSimplificationPass()); +  MPM.add(createCFGSimplificationPass( +      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));    addExtensionsToPM(EP_OptimizerLast, MPM); @@ -1120,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {    // Run a few AA driven optimizations here and now, to cleanup the code.    PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. -  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); +  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                        /*AllowSpeculation=*/true));    PM.add(NewGVN ? createNewGVNPass()                  : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.    PM.add(createMemCpyOptPass());            // Remove dead memcpys. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp index 7fb1a25bdf13..6372ce19f8ee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp @@ -149,13 +149,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,                   BlockFrequencyInfo *BFI, const Loop *CurLoop,                   ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,                   OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, -                                           const DominatorTree *DT, -                                           const TargetLibraryInfo *TLI, -                                           const Loop *CurLoop, -                                           const LoopSafetyInfo *SafetyInfo, -                                           OptimizationRemarkEmitter *ORE, -                                           const Instruction *CtxI = nullptr); +static bool isSafeToExecuteUnconditionally( +    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, +    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, +    OptimizationRemarkEmitter *ORE, const Instruction *CtxI, +    bool AllowSpeculation);  static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,                                       AliasSetTracker *CurAST, Loop *CurLoop,                                       AAResults *AA); @@ -188,21 +186,26 @@ struct LoopInvariantCodeMotion {                   OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);    LoopInvariantCodeMotion(unsigned LicmMssaOptCap, -                          unsigned LicmMssaNoAccForPromotionCap) +                          unsigned LicmMssaNoAccForPromotionCap, +                          bool LicmAllowSpeculation)        : LicmMssaOptCap(LicmMssaOptCap), -        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} +        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), +        LicmAllowSpeculation(LicmAllowSpeculation) {}  private:    unsigned LicmMssaOptCap;    unsigned LicmMssaNoAccForPromotionCap; +  bool LicmAllowSpeculation;  };  struct LegacyLICMPass : public LoopPass {    static char ID; // Pass identification, replacement for typeid    LegacyLICMPass(        unsigned LicmMssaOptCap = SetLicmMssaOptCap, -      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) -      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { +      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, +      bool LicmAllowSpeculation = true) +      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                           LicmAllowSpeculation) {      initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());    } @@ -265,7 +268,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,    // but ORE cannot be preserved (see comment before the pass definition).    OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); -  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); +  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                               LicmAllowSpeculation);    if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,                        &AR.SE, AR.MSSA, &ORE))      return PreservedAnalyses::all(); @@ -290,7 +294,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,    // but ORE cannot be preserved (see comment before the pass definition).    OptimizationRemarkEmitter ORE(LN.getParent()); -  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); +  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                               LicmAllowSpeculation);    Loop &OutermostLoop = LN.getOutermostLoop();    bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -321,8 +326,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,  Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }  Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, -                           unsigned LicmMssaNoAccForPromotionCap) { -  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); +                           unsigned LicmMssaNoAccForPromotionCap, +                           bool LicmAllowSpeculation) { +  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, +                            LicmAllowSpeculation);  }  llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, @@ -418,7 +425,8 @@ bool LoopInvariantCodeMotion::runOnLoop(    Flags.setIsSink(false);    if (Preheader)      Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, -                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); +                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, +                           LicmAllowSpeculation);    // Now that all loop invariants have been removed from the loop, promote any    // memory references to scalars that we can. @@ -460,8 +468,8 @@ bool LoopInvariantCodeMotion::runOnLoop(          for (const SmallSetVector<Value *, 8> &PointerMustAliases :               collectPromotionCandidates(MSSA, AA, L)) {            LocalPromoted |= promoteLoopAccessesToScalars( -              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, -              LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); +              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, +              DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);          }          Promoted |= LocalPromoted;        } while (LocalPromoted); @@ -825,7 +833,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,                         MemorySSAUpdater *MSSAU, ScalarEvolution *SE,                         ICFLoopSafetyInfo *SafetyInfo,                         SinkAndHoistLICMFlags &Flags, -                       OptimizationRemarkEmitter *ORE, bool LoopNestMode) { +                       OptimizationRemarkEmitter *ORE, bool LoopNestMode, +                       bool AllowSpeculation) {    // Verify inputs.    assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&           CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && @@ -877,7 +886,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,                               true, &Flags, ORE) &&            isSafeToExecuteUnconditionally(                I, DT, TLI, CurLoop, SafetyInfo, ORE, -              CurLoop->getLoopPreheader()->getTerminator())) { +              CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {          hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,                MSSAU, SE, ORE);          HoistedInstructions.push_back(&I); @@ -1774,14 +1783,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,  /// Only sink or hoist an instruction if it is not a trapping instruction,  /// or if the instruction is known not to trap when moved to the preheader.  /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, -                                           const DominatorTree *DT, -                                           const TargetLibraryInfo *TLI, -                                           const Loop *CurLoop, -                                           const LoopSafetyInfo *SafetyInfo, -                                           OptimizationRemarkEmitter *ORE, -                                           const Instruction *CtxI) { -  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( +    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, +    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, +    OptimizationRemarkEmitter *ORE, const Instruction *CtxI, +    bool AllowSpeculation) { +  if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))      return true;    bool GuaranteedToExecute = @@ -1949,7 +1956,7 @@ bool llvm::promoteLoopAccessesToScalars(      SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,      LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,      Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, -    OptimizationRemarkEmitter *ORE) { +    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {    // Verify inputs.    assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&           SafetyInfo != nullptr && @@ -2054,9 +2061,9 @@ bool llvm::promoteLoopAccessesToScalars(          // to execute does as well.  Thus we can increase our guaranteed          // alignment as well.          if (!DereferenceableInPH || (InstAlignment > Alignment)) -          if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, -                                             SafetyInfo, ORE, -                                             Preheader->getTerminator())) { +          if (isSafeToExecuteUnconditionally( +                  *Load, DT, TLI, CurLoop, SafetyInfo, ORE, +                  Preheader->getTerminator(), AllowSpeculation)) {              DereferenceableInPH = true;              Alignment = std::max(Alignment, InstAlignment);            } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ee17da1875e5..b8972751066d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,6 +59,11 @@ static cl::opt<bool> UserKeepLoops(      "keep-loops", cl::Hidden, cl::init(true),      cl::desc("Preserve canonical loop structure (default = true)")); +static cl::opt<bool> UserSwitchRangeToICmp( +    "switch-range-to-icmp", cl::Hidden, cl::init(false), +    cl::desc( +        "Convert switches into an integer range comparison (default = false)")); +  static cl::opt<bool> UserSwitchToLookup(      "switch-to-lookup", cl::Hidden, cl::init(false),      cl::desc("Convert switches to lookup tables (default = false)")); @@ -311,6 +316,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {      Options.BonusInstThreshold = UserBonusInstThreshold;    if (UserForwardSwitchCond.getNumOccurrences())      Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; +  if (UserSwitchRangeToICmp.getNumOccurrences()) +    Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;    if (UserSwitchToLookup.getNumOccurrences())      Options.ConvertSwitchToLookupTable = UserSwitchToLookup;    if (UserKeepLoops.getNumOccurrences()) @@ -337,6 +344,8 @@ void SimplifyCFGPass::printPipeline(    OS << "<";    OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";    OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; +  OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") +     << "switch-range-to-icmp;";    OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")       << "switch-to-lookup;";    OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 335ac03ccb52..8c4e1b381b4d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6211,7 +6211,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {    }    // Try to transform the switch into an icmp and a branch. -  if (TurnSwitchRangeIntoICmp(SI, Builder)) +  // The conversion from switch to comparison may lose information on +  // impossible switch values, so disable it early in the pipeline. +  if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))      return requestResimplify();    // Remove unreachable cases. | 
