aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp4
6 files changed, 150 insertions, 72 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82..10abea7ebd32 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -92,6 +93,18 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
+static cl::opt<int> IntraSCCCostMultiplier(
+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+ cl::desc(
+ "Cost multiplier to multiply onto inlined call sites where the "
+ "new call was previously an intra-SCC call (not relevant when the "
+ "original call was already intra-SCC). This can accumulate over "
+ "multiple inlinings (e.g. if a call site already had a cost "
+ "multiplier and one of its inlined calls was also subject to "
+ "this, the inlined call would have the original multiplier "
+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+ "inlining through a child SCC which can cause terrible compile times"));
+
/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
@@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
- UR.InlinedInternalEdges.count({&N, C})) {
+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
@@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ int CBCostMult =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+ .getValueOr(1);
+
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
@@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
- if (NewCallee)
- if (!NewCallee->isDeclaration())
+ if (NewCallee) {
+ if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
+ // Continually inlining through an SCC can result in huge compile
+ // times and bloated code since we arbitrarily stop at some point
+ // when the inliner decides it's not profitable to inline anymore.
+ // We attempt to mitigate this by making these calls exponentially
+ // more expensive.
+ // This doesn't apply to calls in the same SCC since if we do
+ // inline through the SCC the function will end up being
+ // self-recursive which the inliner bails out on, and inlining
+ // within an SCC is necessary for performance.
+ if (CalleeSCC != C &&
+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+ Attribute NewCBCostMult = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineCostMultiplierAttributeName,
+ itostr(CBCostMult * IntraSCCCostMultiplier));
+ ICB->addFnAttr(NewCBCostMult);
+ }
+ }
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5113c0c67acc..7205ae178d21 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3712,9 +3712,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
// __kmpc_get_hardware_num_threads_in_block();
// WarpSize = __kmpc_get_warp_size();
// BlockSize = BlockHwSize - WarpSize;
- // if (InitCB >= BlockSize) return;
- // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
+ // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
// if (IsWorker) {
+ // if (InitCB >= BlockSize) return;
// SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
@@ -3771,6 +3771,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
InitBB->getTerminator()->eraseFromParent();
+ Instruction *IsWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
+ ConstantInt::get(KernelInitCB->getType(), -1),
+ "thread.is_worker", InitBB);
+ IsWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
+
Module &M = *Kernel->getParent();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
@@ -3780,29 +3787,22 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_warp_size);
CallInst *BlockHwSize =
- CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
BlockHwSize->setDebugLoc(DLoc);
- CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ CallInst *WarpSize =
+ CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
WarpSize->setDebugLoc(DLoc);
- Instruction *BlockSize =
- BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ Instruction *BlockSize = BinaryOperator::CreateSub(
+ BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
BlockSize->setDebugLoc(DLoc);
- Instruction *IsMainOrWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
- BlockSize, "thread.is_main_or_worker", InitBB);
+ Instruction *IsMainOrWorker = ICmpInst::Create(
+ ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
+ "thread.is_main_or_worker", IsWorkerCheckBB);
IsMainOrWorker->setDebugLoc(DLoc);
- BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
- InitBB);
-
- Instruction *IsWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
- ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", IsWorkerCheckBB);
- IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
- IsWorkerCheckBB);
+ BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
+ IsMainOrWorker, IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 74f68531b89a..6e5aeb9c41f6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -365,7 +365,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
MPM.add(createFunctionInliningPass(IP));
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
addExtensionsToPM(EP_Peephole, MPM);
}
@@ -404,7 +406,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
MPM.add(createGVNSinkPass());
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
}
}
@@ -418,7 +421,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createJumpThreadingPass()); // Thread jumps.
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
}
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
// Combine silly seq's
if (OptLevel > 2)
MPM.add(createAggressiveInstCombinerPass());
@@ -434,7 +439,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
if (OptLevel > 1)
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// The matrix extension can introduce large vector operations early, which can
@@ -451,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLoopSimplifyCFGPass());
}
// Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
+ // to reduce amount of IR that will have to be duplicated. However,
+ // do not perform speculative hoisting the first time as LICM
+ // will destroy metadata that may not need to be destroyed if run
+ // after loop rotation.
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/false));
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
@@ -465,7 +477,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// FIXME: We break the loop pass pipeline here in order to do full
// simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
if (EnableLoopFlatten) {
@@ -521,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// TODO: Investigate if this is too expensive at O1.
if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -580,9 +594,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
PM.add(createEarlyCSEPass());
PM.add(createCorrelatedValuePropagationPass());
PM.add(createInstructionCombiningPass());
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
- PM.add(createCFGSimplificationPass());
+ PM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
PM.add(createInstructionCombiningPass());
}
@@ -597,6 +613,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// before SLP vectorization.
PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
@@ -641,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
PM.add(createWarnMissedTransformationsPass());
@@ -772,7 +790,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Clean up after IPCP & DAE
// For SamplePGO in ThinLTO compile phase, we do not want to do indirect
// call promotion as it will change the CFG too much to make the 2nd
@@ -886,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
// We add a fresh GlobalsModRef run at this point. This is particularly
@@ -972,7 +993,8 @@ void PassManagerBuilder::populateModulePassManager(
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
addExtensionsToPM(EP_OptimizerLast, MPM);
@@ -1120,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index 7fb1a25bdf13..6372ce19f8ee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -149,13 +149,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
OptimizationRemarkEmitter *ORE);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI = nullptr);
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation);
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA);
@@ -188,21 +186,26 @@ struct LoopInvariantCodeMotion {
OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap)
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+ LicmAllowSpeculation(LicmAllowSpeculation) {}
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
+ bool LicmAllowSpeculation;
};
struct LegacyLICMPass : public LoopPass {
static char ID; // Pass identification, replacement for typeid
LegacyLICMPass(
unsigned LicmMssaOptCap = SetLicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
- : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation = true)
+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
@@ -265,7 +268,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
&AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
@@ -290,7 +294,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(LN.getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation);
Loop &OutermostLoop = LN.getOutermostLoop();
bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
@@ -321,8 +326,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap) {
- return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation) {
+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation);
}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
@@ -418,7 +425,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
+ &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
+ LicmAllowSpeculation);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -460,8 +468,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
for (const SmallSetVector<Value *, 8> &PointerMustAliases :
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+ DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@@ -825,7 +833,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
+ OptimizationRemarkEmitter *ORE, bool LoopNestMode,
+ bool AllowSpeculation) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
@@ -877,7 +886,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
true, &Flags, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator())) {
+ CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
@@ -1774,14 +1783,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI) {
- if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation) {
+ if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
bool GuaranteedToExecute =
@@ -1949,7 +1956,7 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2054,9 +2061,9 @@ bool llvm::promoteLoopAccessesToScalars(
// to execute does as well. Thus we can increase our guaranteed
// alignment as well.
if (!DereferenceableInPH || (InstAlignment > Alignment))
- if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
- SafetyInfo, ORE,
- Preheader->getTerminator())) {
+ if (isSafeToExecuteUnconditionally(
+ *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
+ Preheader->getTerminator(), AllowSpeculation)) {
DereferenceableInPH = true;
Alignment = std::max(Alignment, InstAlignment);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ee17da1875e5..b8972751066d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -59,6 +59,11 @@ static cl::opt<bool> UserKeepLoops(
"keep-loops", cl::Hidden, cl::init(true),
cl::desc("Preserve canonical loop structure (default = true)"));
+static cl::opt<bool> UserSwitchRangeToICmp(
+ "switch-range-to-icmp", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Convert switches into an integer range comparison (default = false)"));
+
static cl::opt<bool> UserSwitchToLookup(
"switch-to-lookup", cl::Hidden, cl::init(false),
cl::desc("Convert switches to lookup tables (default = false)"));
@@ -311,6 +316,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.BonusInstThreshold = UserBonusInstThreshold;
if (UserForwardSwitchCond.getNumOccurrences())
Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+ if (UserSwitchRangeToICmp.getNumOccurrences())
+ Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;
if (UserSwitchToLookup.getNumOccurrences())
Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
if (UserKeepLoops.getNumOccurrences())
@@ -337,6 +344,8 @@ void SimplifyCFGPass::printPipeline(
OS << "<";
OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")
+ << "switch-range-to-icmp;";
OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 335ac03ccb52..8c4e1b381b4d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6211,7 +6211,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
}
// Try to transform the switch into an icmp and a branch.
- if (TurnSwitchRangeIntoICmp(SI, Builder))
+ // The conversion from switch to comparison may lose information on
+ // impossible switch values, so disable it early in the pipeline.
+ if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
return requestResimplify();
// Remove unreachable cases.