aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-09 13:28:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-09 13:28:42 +0000
commitb1c73532ee8997fe5dfbeb7d223027bdf99758a0 (patch)
tree7d6e51c294ab6719475d660217aa0c0ad0526292 /llvm/lib/Transforms/Utils
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/AddDiscriminators.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp35
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp143
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp171
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp66
-rw-r--r--llvm/lib/Transforms/Utils/CloneModule.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp122
-rw-r--r--llvm/lib/Transforms/Utils/CodeLayout.cpp878
-rw-r--r--llvm/lib/Transforms/Utils/CodeMoverUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/CtorUtils.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/DXILUpgrade.cpp36
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp73
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/EscapeEnumerator.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp94
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp303
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp21
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp743
-rw-r--r--llvm/lib/Transforms/Utils/LoopConstrainer.cpp904
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp229
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp125
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp210
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp52
-rw-r--r--llvm/lib/Transforms/Utils/MetaRenamer.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp17
-rw-r--r--llvm/lib/Transforms/Utils/MoveAutoInit.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp33
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp92
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp78
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp37
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileInference.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SanitizerStats.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp390
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp627
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp187
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp55
-rw-r--r--llvm/lib/Transforms/Utils/StripGCRelocates.cpp20
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp34
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp46
56 files changed, 4399 insertions, 1632 deletions
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 2195406c144c..6ca737df49b9 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -153,19 +153,17 @@ static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
Value *Length, bool isLast) {
auto Int64Ty = Builder.getInt64Ty();
- auto CharPtrTy = Builder.getInt8PtrTy();
+ auto PtrTy = Builder.getPtrTy();
auto Int32Ty = Builder.getInt32Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
- Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
+ Int64Ty, PtrTy, Int64Ty, Int32Ty);
auto IsLastInt32 = Builder.getInt32(isLast);
return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
}
static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
bool IsLast) {
- Arg = Builder.CreateBitCast(
- Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
auto Length = getStrlenWithNull(Builder, Arg);
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
}
@@ -299,9 +297,9 @@ static Value *callBufferedPrintfStart(
Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
Type *Tys_alloc[1] = {Builder.getInt32Ty()};
- Type *I8Ptr =
- Builder.getInt8PtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
- FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
+ Type *PtrTy =
+ Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
+ FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false);
auto PrintfAllocFn =
M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 7d127400651e..f95d5e23c9c8 100644
--- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -63,13 +63,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <utility>
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 45cf98e65a5a..efa8e874b955 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -587,37 +586,3 @@ PreservedAnalyses AssumeBuilderPass::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-class AssumeBuilderPassLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- AssumeBuilderPassLegacyPass() : FunctionPass(ID) {
- initializeAssumeBuilderPassLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override {
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- for (Instruction &I : instructions(F))
- salvageKnowledge(&I, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
- return true;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
-
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char AssumeBuilderPassLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AssumeBuilderPassLegacyPass, "assume-builder",
- "Assume Builder", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(AssumeBuilderPassLegacyPass, "assume-builder",
- "Assume Builder", false, false)
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index f06ea89cc61d..b700edf8ea6c 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -194,7 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Don't break unwinding instructions or terminators with other side-effects.
Instruction *PTI = PredBB->getTerminator();
- if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects())
+ if (PTI->isSpecialTerminator() || PTI->mayHaveSideEffects())
return false;
// Can't merge if there are multiple distinct successors.
@@ -300,7 +300,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
PredBB->back().eraseFromParent();
// Move terminator instruction.
- PredBB->splice(PredBB->end(), BB);
+ BB->back().moveBeforePreserving(*PredBB, PredBB->end());
// Terminator may be a memory accessing instruction too.
if (MSSAU)
@@ -382,7 +382,39 @@ bool llvm::MergeBlockSuccessorsIntoGivenBlocks(
/// - Check fully overlapping fragments and not only identical fragments.
/// - Support dbg.declare. dbg.label, and possibly other meta instructions being
/// part of the sequence of consecutive instructions.
+static bool DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ SmallVector<DPValue *, 8> ToBeRemoved;
+ SmallDenseSet<DebugVariable> VariableSet;
+ for (auto &I : reverse(*BB)) {
+ for (DPValue &DPV : reverse(I.getDbgValueRange())) {
+ DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
+ DPV.getDebugLoc()->getInlinedAt());
+ auto R = VariableSet.insert(Key);
+ // If the same variable fragment is described more than once it is enough
+ // to keep the last one (i.e. the first found since we for reverse
+ // iteration).
+ // FIXME: add assignment tracking support (see parallel implementation
+ // below).
+ if (!R.second)
+ ToBeRemoved.push_back(&DPV);
+ continue;
+ }
+ // Sequence with consecutive dbg.value instrs ended. Clear the map to
+ // restart identifying redundant instructions if case we find another
+ // dbg.value sequence.
+ VariableSet.clear();
+ }
+
+ for (auto &DPV : ToBeRemoved)
+ DPV->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+ if (BB->IsNewDbgInfoFormat)
+ return DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BB);
+
SmallVector<DbgValueInst *, 8> ToBeRemoved;
SmallDenseSet<DebugVariable> VariableSet;
for (auto &I : reverse(*BB)) {
@@ -440,7 +472,38 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
///
/// Possible improvements:
/// - Keep track of non-overlapping fragments.
+static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ SmallVector<DPValue *, 8> ToBeRemoved;
+ DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
+ VariableMap;
+ for (auto &I : *BB) {
+ for (DPValue &DPV : I.getDbgValueRange()) {
+ DebugVariable Key(DPV.getVariable(), std::nullopt,
+ DPV.getDebugLoc()->getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ SmallVector<Value *, 4> Values(DPV.location_ops());
+ if (VMI == VariableMap.end() || VMI->second.first != Values ||
+ VMI->second.second != DPV.getExpression()) {
+ VariableMap[Key] = {Values, DPV.getExpression()};
+ continue;
+ }
+ // Found an identical mapping. Remember the instruction for later removal.
+ ToBeRemoved.push_back(&DPV);
+ }
+ }
+
+ for (auto *DPV : ToBeRemoved)
+ DPV->eraseFromParent();
+
+ return !ToBeRemoved.empty();
+}
+
static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+ if (BB->IsNewDbgInfoFormat)
+ return DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BB);
+
SmallVector<DbgValueInst *, 8> ToBeRemoved;
DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
VariableMap;
@@ -852,9 +915,11 @@ void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
continue;
// Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN = PHINode::Create(
- PN.getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
+ PHINode *NewPN = PHINode::Create(PN.getType(), Preds.size(), "split");
+ BasicBlock::iterator InsertPos =
+ SplitBB->isLandingPad() ? SplitBB->begin()
+ : SplitBB->getTerminator()->getIterator();
+ NewPN->insertBefore(InsertPos);
for (BasicBlock *BB : Preds)
NewPN->addIncoming(V, BB);
@@ -877,7 +942,7 @@ llvm::SplitAllCriticalEdges(Function &F,
return NumBroken;
}
-static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
+static BasicBlock *SplitBlockImpl(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, DominatorTree *DT,
LoopInfo *LI, MemorySSAUpdater *MSSAU,
const Twine &BBName, bool Before) {
@@ -887,7 +952,7 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU,
BBName);
}
- BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) {
++SplitIt;
assert(SplitIt != SplitPt->getParent()->end());
@@ -933,14 +998,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
return New;
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt,
DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU, const Twine &BBName,
bool Before) {
return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName,
Before);
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU, const Twine &BBName,
bool Before) {
@@ -948,12 +1013,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
Before);
}
-BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
const Twine &BBName) {
- BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
std::string Name = BBName.str();
@@ -1137,14 +1202,11 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
-
- // NOTE! This loop walks backwards for a reason! First off, this minimizes
- // the cost of removal if we end up removing a large number of values, and
- // second off, this ensures that the indices for the incoming values
- // aren't invalidated when we remove one.
- for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
- if (PredSet.count(PN->getIncomingBlock(i)))
- PN->removeIncomingValue(i, false);
+ PN->removeIncomingValueIf(
+ [&](unsigned Idx) {
+ return PredSet.contains(PN->getIncomingBlock(Idx));
+ },
+ /* DeletePHIIfEmpty */ false);
// Add an incoming value to the PHI node in the loop for the preheader
// edge.
@@ -1394,17 +1456,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- return SplitLandingPadPredecessorsImpl(
- OrigBB, Preds, Suffix1, Suffix2, NewBBs,
- /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA);
-}
-void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix1, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
DomTreeUpdater *DTU, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
@@ -1472,7 +1523,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
}
Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
+ BasicBlock::iterator SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
DomTreeUpdater *DTU, LoopInfo *LI,
@@ -1485,7 +1536,7 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
}
Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond,
- Instruction *SplitBefore,
+ BasicBlock::iterator SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
DomTreeUpdater *DTU, LoopInfo *LI,
@@ -1497,7 +1548,7 @@ Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond,
return ElseBlock->getTerminator();
}
-void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
MDNode *BranchWeights,
@@ -1513,7 +1564,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
}
void llvm::SplitBlockAndInsertIfThenElse(
- Value *Cond, Instruction *SplitBefore, BasicBlock **ThenBlock,
+ Value *Cond, BasicBlock::iterator SplitBefore, BasicBlock **ThenBlock,
BasicBlock **ElseBlock, bool UnreachableThen, bool UnreachableElse,
MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI) {
assert((ThenBlock || ElseBlock) &&
@@ -1530,7 +1581,7 @@ void llvm::SplitBlockAndInsertIfThenElse(
}
LLVMContext &C = Head->getContext();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
BasicBlock *TrueBlock = Tail;
BasicBlock *FalseBlock = Tail;
bool ThenToTailEdge = false;
@@ -2077,3 +2128,25 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
+
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
+ for (auto &BB : F) {
+ auto *Term = BB.getTerminator();
+ if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+ isa<BranchInst>(Term)))
+ return false;
+ }
+ return true;
+}
+
+bool llvm::isPresplitCoroSuspendExitEdge(const BasicBlock &Src,
+ const BasicBlock &Dest) {
+ assert(Src.getParent() == Dest.getParent());
+ if (!Src.getParent()->isPresplitCoroutine())
+ return false;
+ if (auto *SW = dyn_cast<SwitchInst>(Src.getTerminator()))
+ if (auto *Intr = dyn_cast<IntrinsicInst>(SW->getCondition()))
+ return Intr->getIntrinsicID() == Intrinsic::coro_suspend &&
+ SW->getDefaultDest() == &Dest;
+ return false;
+}
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index ddb35756030f..5fb796cc3db6 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -387,7 +387,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
if (ShouldUpdateAnalysis) {
// Copy the BFI/BPI from Target to BodyBlock.
BPI->setEdgeProbability(BodyBlock, EdgeProbabilities);
- BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+ BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target));
}
// It's possible Target was its own successor through an indirectbr.
// In this case, the indirectbr now comes from BodyBlock.
@@ -411,10 +411,10 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
BPI->getEdgeProbability(Src, DirectSucc);
}
if (ShouldUpdateAnalysis) {
- BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+ BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc);
BlockFrequency NewBlockFreqForTarget =
BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
- BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+ BFI->setBlockFreq(Target, NewBlockFreqForTarget);
}
// Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
@@ -449,8 +449,8 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
// Create a PHI in the body block, to merge the direct and indirect
// predecessors.
- PHINode *MergePHI =
- PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ PHINode *MergePHI = PHINode::Create(IndPHI->getType(), 2, "merge");
+ MergePHI->insertBefore(MergeInsert);
MergePHI->addIncoming(NewIndPHI, Target);
MergePHI->addIncoming(DirPHI, DirectSucc);
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 5de8ff84de77..12741dc5af5a 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1425,11 +1425,6 @@ StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI,
//- Emit LibCalls ------------------------------------------------------------//
-Value *llvm::castToCStr(Value *V, IRBuilderBase &B) {
- unsigned AS = V->getType()->getPointerAddressSpace();
- return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
-}
-
static IntegerType *getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI) {
return B.getIntNTy(TLI->getIntSize());
}
@@ -1461,63 +1456,64 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strlen, SizeTTy,
- B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+ return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI);
}
Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(),
- castToCStr(Ptr, B), B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_strdup, CharPtrTy, CharPtrTy, Ptr, B, TLI);
}
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
- return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, IntTy},
- {castToCStr(Ptr, B), ConstantInt::get(IntTy, C)}, B, TLI);
+ return emitLibCall(LibFunc_strchr, CharPtrTy, {CharPtrTy, IntTy},
+ {Ptr, ConstantInt::get(IntTy, C)}, B, TLI);
}
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(
LibFunc_strncmp, IntTy,
- {B.getInt8PtrTy(), B.getInt8PtrTy(), SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = Dst->getType();
- return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = Dst->getType();
+ return emitLibCall(LibFunc_strcpy, CharPtrTy, {CharPtrTy, CharPtrTy},
+ {Dst, Src}, B, TLI);
}
Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_stpcpy, CharPtrTy, {CharPtrTy, CharPtrTy},
+ {Dst, Src}, B, TLI);
}
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+ return emitLibCall(LibFunc_strncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dst, Src, Len}, B, TLI);
}
Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+ return emitLibCall(LibFunc_stpncpy, CharPtrTy, {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dst, Src, Len}, B, TLI);
}
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -1530,13 +1526,11 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AttributeList AS;
AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk,
- AttributeList::get(M->getContext(), AS), I8Ptr,
- I8Ptr, I8Ptr, SizeTTy, SizeTTy);
- Dst = castToCStr(Dst, B);
- Src = castToCStr(Src, B);
+ AttributeList::get(M->getContext(), AS), VoidPtrTy,
+ VoidPtrTy, VoidPtrTy, SizeTTy, SizeTTy);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
if (const Function *F =
dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
@@ -1546,140 +1540,141 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_mempcpy, I8Ptr,
- {I8Ptr, I8Ptr, SizeTTy},
+ return emitLibCall(LibFunc_mempcpy, VoidPtrTy,
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
{Dst, Src, Len}, B, TLI);
}
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memchr, I8Ptr,
- {I8Ptr, IntTy, SizeTTy},
- {castToCStr(Ptr, B), Val, Len}, B, TLI);
+ return emitLibCall(LibFunc_memchr, VoidPtrTy,
+ {VoidPtrTy, IntTy, SizeTTy},
+ {Ptr, Val, Len}, B, TLI);
}
Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memrchr, I8Ptr,
- {I8Ptr, IntTy, SizeTTy},
- {castToCStr(Ptr, B), Val, Len}, B, TLI);
+ return emitLibCall(LibFunc_memrchr, VoidPtrTy,
+ {VoidPtrTy, IntTy, SizeTTy},
+ {Ptr, Val, Len}, B, TLI);
}
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_memcmp, IntTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_bcmp, IntTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+ {VoidPtrTy, VoidPtrTy, SizeTTy},
+ {Ptr1, Ptr2, Len}, B, TLI);
}
Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *VoidPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_memccpy, I8Ptr,
- {I8Ptr, I8Ptr, IntTy, SizeTTy},
+ return emitLibCall(LibFunc_memccpy, VoidPtrTy,
+ {VoidPtrTy, VoidPtrTy, IntTy, SizeTTy},
{Ptr1, Ptr2, Val, Len}, B, TLI);
}
Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+ SmallVector<Value *, 8> Args{Dest, Size, Fmt};
llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_snprintf, IntTy,
- {I8Ptr, SizeTTy, I8Ptr},
+ {CharPtrTy, SizeTTy, CharPtrTy},
Args, B, TLI, /*IsVaArgs=*/true);
}
Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+ SmallVector<Value *, 8> Args{Dest, Fmt};
llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_sprintf, IntTy,
- {I8Ptr, I8Ptr}, Args, B, TLI,
+ {CharPtrTy, CharPtrTy}, Args, B, TLI,
/*IsVaArgs=*/true);
}
Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt8PtrTy()},
- {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+ Type *CharPtrTy = B.getPtrTy();
+ return emitLibCall(LibFunc_strcat, CharPtrTy,
+ {CharPtrTy, CharPtrTy},
+ {Dest, Src}, B, TLI);
}
Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_strlcpy, SizeTTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(LibFunc_strlcat, SizeTTy,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *SizeTTy = getSizeTTy(B, TLI);
- return emitLibCall(LibFunc_strncat, I8Ptr,
- {I8Ptr, I8Ptr, SizeTTy},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+ return emitLibCall(LibFunc_strncat, CharPtrTy,
+ {CharPtrTy, CharPtrTy, SizeTTy},
+ {Dest, Src, Size}, B, TLI);
}
Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
Type *SizeTTy = getSizeTTy(B, TLI);
return emitLibCall(
LibFunc_vsnprintf, IntTy,
- {I8Ptr, SizeTTy, I8Ptr, VAList->getType()},
- {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+ {CharPtrTy, SizeTTy, CharPtrTy, VAList->getType()},
+ {Dest, Size, Fmt, VAList}, B, TLI);
}
Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
IRBuilderBase &B, const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *CharPtrTy = B.getPtrTy();
Type *IntTy = getIntTy(B, TLI);
return emitLibCall(LibFunc_vsprintf, IntTy,
- {I8Ptr, I8Ptr, VAList->getType()},
- {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
+ {CharPtrTy, CharPtrTy, VAList->getType()},
+ {Dest, Fmt, VAList}, B, TLI);
}
/// Append a suffix to the function name according to the type of 'Op'.
@@ -1829,9 +1824,9 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
Type *IntTy = getIntTy(B, TLI);
StringRef PutsName = TLI->getName(LibFunc_puts);
FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, IntTy,
- B.getInt8PtrTy());
+ B.getPtrTy());
inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI);
- CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
+ CallInst *CI = B.CreateCall(PutS, Str, PutsName);
if (const Function *F =
dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1867,10 +1862,10 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
Type *IntTy = getIntTy(B, TLI);
StringRef FPutsName = TLI->getName(LibFunc_fputs);
FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, IntTy,
- B.getInt8PtrTy(), File->getType());
+ B.getPtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
+ CallInst *CI = B.CreateCall(F, {Str, File}, FPutsName);
if (const Function *Fn =
dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
@@ -1887,13 +1882,13 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
Type *SizeTTy = getSizeTTy(B, TLI);
StringRef FWriteName = TLI->getName(LibFunc_fwrite);
FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite,
- SizeTTy, B.getInt8PtrTy(), SizeTTy,
+ SizeTTy, B.getPtrTy(), SizeTTy,
SizeTTy, File->getType());
if (File->getType()->isPointerTy())
inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI);
CallInst *CI =
- B.CreateCall(F, {castToCStr(Ptr, B), Size,
+ B.CreateCall(F, {Ptr, Size,
ConstantInt::get(SizeTTy, 1), File});
if (const Function *Fn =
@@ -1911,7 +1906,7 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
StringRef MallocName = TLI->getName(LibFunc_malloc);
Type *SizeTTy = getSizeTTy(B, TLI);
FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc,
- B.getInt8PtrTy(), SizeTTy);
+ B.getPtrTy(), SizeTTy);
inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI);
CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
@@ -1931,7 +1926,7 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
StringRef CallocName = TLI.getName(LibFunc_calloc);
Type *SizeTTy = getSizeTTy(B, &TLI);
FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc,
- B.getInt8PtrTy(), SizeTTy, SizeTTy);
+ B.getPtrTy(), SizeTTy, SizeTTy);
inferNonMandatoryLibFuncAttrs(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
@@ -1950,7 +1945,7 @@ Value *llvm::emitHotColdNew(Value *Num, IRBuilderBase &B,
return nullptr;
StringRef Name = TLI->getName(NewFunc);
- FunctionCallee Func = M->getOrInsertFunction(Name, B.getInt8PtrTy(),
+ FunctionCallee Func = M->getOrInsertFunction(Name, B.getPtrTy(),
Num->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, B.getInt8(HotCold)}, Name);
@@ -1971,7 +1966,7 @@ Value *llvm::emitHotColdNewNoThrow(Value *Num, Value *NoThrow, IRBuilderBase &B,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func =
- M->getOrInsertFunction(Name, B.getInt8PtrTy(), Num->getType(),
+ M->getOrInsertFunction(Name, B.getPtrTy(), Num->getType(),
NoThrow->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, NoThrow, B.getInt8(HotCold)}, Name);
@@ -1992,7 +1987,7 @@ Value *llvm::emitHotColdNewAligned(Value *Num, Value *Align, IRBuilderBase &B,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func = M->getOrInsertFunction(
- Name, B.getInt8PtrTy(), Num->getType(), Align->getType(), B.getInt8Ty());
+ Name, B.getPtrTy(), Num->getType(), Align->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Func, {Num, Align, B.getInt8(HotCold)}, Name);
@@ -2013,7 +2008,7 @@ Value *llvm::emitHotColdNewAlignedNoThrow(Value *Num, Value *Align,
StringRef Name = TLI->getName(NewFunc);
FunctionCallee Func = M->getOrInsertFunction(
- Name, B.getInt8PtrTy(), Num->getType(), Align->getType(),
+ Name, B.getPtrTy(), Num->getType(), Align->getType(),
NoThrow->getType(), B.getInt8Ty());
inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI =
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index b488e3bb0cbd..e42cdab64446 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -111,7 +111,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
return;
- Builder.SetInsertPoint(&MergeBlock->front());
+ Builder.SetInsertPoint(MergeBlock, MergeBlock->begin());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
for (User *U : UsersToUpdate)
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index a1ee3df907ec..fb4d82885377 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -30,6 +30,7 @@
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index d55208602b71..c0f333364fa5 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -44,6 +44,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo,
DebugInfoFinder *DIFinder) {
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat;
if (BB->hasName())
NewBB->setName(BB->getName() + NameSuffix);
@@ -58,7 +59,10 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
Instruction *NewInst = I.clone();
if (I.hasName())
NewInst->setName(I.getName() + NameSuffix);
- NewInst->insertInto(NewBB, NewBB->end());
+
+ NewInst->insertBefore(*NewBB, NewBB->end());
+ NewInst->cloneDebugInfoFrom(&I);
+
VMap[&I] = NewInst; // Add instruction map to value.
if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) {
@@ -90,6 +94,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
+ NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -267,9 +272,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
BE = NewFunc->end();
BB != BE; ++BB)
- // Loop over all instructions, fixing each one as we find it...
- for (Instruction &II : *BB)
+ // Loop over all instructions, fixing each one as we find it, and any
+ // attached debug-info records.
+ for (Instruction &II : *BB) {
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+ RemapDPValueRange(II.getModule(), II.getDbgValueRange(), VMap, RemapFlag,
+ TypeMapper, Materializer);
+ }
// Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
// same module, the compile unit will already be listed (or not). When
@@ -327,6 +336,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
// Create the new function...
Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
F->getName(), F->getParent());
+ NewF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
@@ -472,6 +482,7 @@ void PruningFunctionCloner::CloneBlock(
BasicBlock *NewBB;
Twine NewName(BB->hasName() ? Twine(BB->getName()) + NameSuffix : "");
BBEntry = NewBB = BasicBlock::Create(BB->getContext(), NewName, NewFunc);
+ NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat;
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
@@ -491,6 +502,22 @@ void PruningFunctionCloner::CloneBlock(
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
bool hasMemProfMetadata = false;
+ // Keep a cursor pointing at the last place we cloned debug-info records from.
+ BasicBlock::const_iterator DbgCursor = StartingInst;
+ auto CloneDbgRecordsToHere =
+ [NewBB, &DbgCursor](Instruction *NewInst, BasicBlock::const_iterator II) {
+ if (!NewBB->IsNewDbgInfoFormat)
+ return;
+
+ // Clone debug-info records onto this instruction. Iterate through any
+ // source-instructions we've cloned and then subsequently optimised
+ // away, so that their debug-info doesn't go missing.
+ for (; DbgCursor != II; ++DbgCursor)
+ NewInst->cloneDebugInfoFrom(&*DbgCursor, std::nullopt, false);
+ NewInst->cloneDebugInfoFrom(&*II);
+ DbgCursor = std::next(II);
+ };
+
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
@@ -540,6 +567,8 @@ void PruningFunctionCloner::CloneBlock(
hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof);
}
+ CloneDbgRecordsToHere(NewInst, II);
+
if (CodeInfo) {
CodeInfo->OrigVMap[&*II] = NewInst;
if (auto *CB = dyn_cast<CallBase>(&*II))
@@ -597,6 +626,9 @@ void PruningFunctionCloner::CloneBlock(
if (OldTI->hasName())
NewInst->setName(OldTI->getName() + NameSuffix);
NewInst->insertInto(NewBB, NewBB->end());
+
+ CloneDbgRecordsToHere(NewInst, OldTI->getIterator());
+
VMap[OldTI] = NewInst; // Add instruction map to value.
if (CodeInfo) {
@@ -608,6 +640,13 @@ void PruningFunctionCloner::CloneBlock(
// Recursively clone any reachable successor blocks.
append_range(ToClone, successors(BB->getTerminator()));
+ } else {
+ // If we didn't create a new terminator, clone DPValues from the old
+ // terminator onto the new terminator.
+ Instruction *NewInst = NewBB->getTerminator();
+ assert(NewInst);
+
+ CloneDbgRecordsToHere(NewInst, OldTI->getIterator());
}
if (CodeInfo) {
@@ -845,12 +884,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
+ // Do the same for DPValues, touching all the instructions in the cloned
+ // range of blocks.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ for (BasicBlock &BB : make_range(Begin, NewFunc->end())) {
+ for (Instruction &I : BB) {
+ RemapDPValueRange(I.getModule(), I.getDbgValueRange(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ }
+ }
+
// Simplify conditional branches and switches with a constant operand. We try
// to prune these out when cloning, but if the simplification required
// looking through PHI nodes, those are only available after forming the full
// basic block. That may leave some here, and we still want to prune the dead
// code as early as possible.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
ConstantFoldTerminator(&BB);
@@ -939,10 +988,14 @@ void llvm::CloneAndPruneFunctionInto(
void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
- for (auto *BB : Blocks)
- for (auto &Inst : *BB)
+ for (auto *BB : Blocks) {
+ for (auto &Inst : *BB) {
+ RemapDPValueRange(Inst.getModule(), Inst.getDbgValueRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&Inst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
}
/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
@@ -1066,6 +1119,7 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
Instruction *New = BI->clone();
New->setName(BI->getName());
New->insertBefore(NewTerm);
+ New->cloneDebugInfoFrom(&*BI);
ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp
index 55e051298a9a..00e40fe73d90 100644
--- a/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -34,6 +34,8 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
///
+/// Cloning un-materialized modules is not currently supported, so any
+/// modules initialized via lazy loading should be materialized before cloning
std::unique_ptr<Module> llvm::CloneModule(const Module &M) {
// Create the value map that maps things from the old module over to the new
// module.
@@ -49,6 +51,9 @@ std::unique_ptr<Module> llvm::CloneModule(const Module &M,
std::unique_ptr<Module> llvm::CloneModule(
const Module &M, ValueToValueMapTy &VMap,
function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
+
+ assert(M.isMaterialized() && "Module must be materialized before cloning!");
+
// First off, we need to create the new module.
std::unique_ptr<Module> New =
std::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
@@ -56,6 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(
New->setDataLayout(M.getDataLayout());
New->setTargetTriple(M.getTargetTriple());
New->setModuleInlineAsm(M.getModuleInlineAsm());
+ New->IsNewDbgInfoFormat = M.IsNewDbgInfoFormat;
// Loop over all of the global variables, making corresponding globals in the
// new module. Here we add them to the VMap and to the new Module. We
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index c390af351a69..9c1186232e02 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -245,12 +245,13 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
bool AllowVarArgs, bool AllowAlloca,
- BasicBlock *AllocationBlock, std::string Suffix)
+ BasicBlock *AllocationBlock, std::string Suffix,
+ bool ArgsInZeroAddressSpace)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
- Suffix(Suffix) {}
+ Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
@@ -567,7 +568,7 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
for (Instruction *I : LifetimeBitcastUsers) {
Module *M = AIFunc->getParent();
LLVMContext &Ctx = M->getContext();
- auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto *Int8PtrTy = PointerType::getUnqual(Ctx);
CastInst *CastI =
CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
I->replaceUsesOfWith(I->getOperand(1), CastI);
@@ -721,7 +722,8 @@ void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
- PN->getName() + ".ce", &NewBB->front());
+ PN->getName() + ".ce");
+ NewPN->insertBefore(NewBB->begin());
PN->replaceAllUsesWith(NewPN);
NewPN->addIncoming(PN, OldPred);
@@ -766,6 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits(
NewBB = BasicBlock::Create(ExitBB->getContext(),
ExitBB->getName() + ".split",
ExitBB->getParent(), ExitBB);
+ NewBB->IsNewDbgInfoFormat = ExitBB->IsNewDbgInfoFormat;
SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
for (BasicBlock *PredBB : Preds)
if (Blocks.count(PredBB))
@@ -775,9 +778,9 @@ void CodeExtractor::severSplitPHINodesOfExits(
}
// Split this PHI.
- PHINode *NewPN =
- PHINode::Create(PN.getType(), IncomingVals.size(),
- PN.getName() + ".ce", NewBB->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), IncomingVals.size(),
+ PN.getName() + ".ce");
+ NewPN->insertBefore(NewBB->getFirstNonPHIIt());
for (unsigned i : IncomingVals)
NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
for (unsigned i : reverse(IncomingVals))
@@ -865,7 +868,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
StructType *StructTy = nullptr;
if (AggregateArgs && !AggParamTy.empty()) {
StructTy = StructType::get(M->getContext(), AggParamTy);
- ParamTy.push_back(PointerType::get(StructTy, DL.getAllocaAddrSpace()));
+ ParamTy.push_back(PointerType::get(
+ StructTy, ArgsInZeroAddressSpace ? 0 : DL.getAllocaAddrSpace()));
}
LLVM_DEBUG({
@@ -886,6 +890,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Function *newFunction = Function::Create(
funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
oldFunction->getName() + "." + SuffixToUse, M);
+ newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
// Inherit all of the target dependent attributes and white-listed
// target independent attributes.
@@ -919,6 +924,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::PresplitCoroutine:
case Attribute::Memory:
case Attribute::NoFPClass:
+ case Attribute::CoroDestroyOnlyWhenComplete:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -940,6 +946,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NoSanitizeBounds:
case Attribute::NoSanitizeCoverage:
case Attribute::NullPointerIsValid:
+ case Attribute::OptimizeForDebugging:
case Attribute::OptForFuzzing:
case Attribute::OptimizeNone:
case Attribute::OptimizeForSize:
@@ -990,6 +997,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::ImmArg:
case Attribute::ByRef:
case Attribute::WriteOnly:
+ case Attribute::Writable:
// These are not really attributes.
case Attribute::None:
case Attribute::EndAttrKinds:
@@ -1185,8 +1193,15 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
: &codeReplacer->getParent()->front().front());
- params.push_back(Struct);
+ if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
+ auto *StructSpaceCast = new AddrSpaceCastInst(
+ Struct, PointerType ::get(Context, 0), "structArg.ascast");
+ StructSpaceCast->insertAfter(Struct);
+ params.push_back(StructSpaceCast);
+ } else {
+ params.push_back(Struct);
+ }
// Store aggregated inputs in the struct.
for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
if (inputs.contains(StructValues[i])) {
@@ -1492,10 +1507,14 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
for (Instruction &I : instructions(F)) {
SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
- findDbgUsers(DbgUsers, &I);
+ SmallVector<DPValue *, 4> DPValues;
+ findDbgUsers(DbgUsers, &I, &DPValues);
for (DbgVariableIntrinsic *DVI : DbgUsers)
if (DVI->getFunction() != &F)
DVI->eraseFromParent();
+ for (DPValue *DPV : DPValues)
+ if (DPV->getFunction() != &F)
+ DPV->eraseFromParent();
}
}
@@ -1531,6 +1550,16 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
/*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
NewFunc.setSubprogram(NewSP);
+ auto IsInvalidLocation = [&NewFunc](Value *Location) {
+ // Location is invalid if it isn't a constant or an instruction, or is an
+ // instruction but isn't in the new function.
+ if (!Location ||
+ (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+ return true;
+ Instruction *LocationInst = dyn_cast<Instruction>(Location);
+ return LocationInst && LocationInst->getFunction() != &NewFunc;
+ };
+
// Debug intrinsics in the new function need to be updated in one of two
// ways:
// 1) They need to be deleted, because they describe a value in the old
@@ -1539,8 +1568,41 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// point to a variable in the wrong scope.
SmallDenseMap<DINode *, DINode *> RemappedMetadata;
SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
+ SmallVector<DPValue *, 4> DPVsToDelete;
DenseMap<const MDNode *, MDNode *> Cache;
+
+ auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar) {
+ DINode *&NewVar = RemappedMetadata[OldVar];
+ if (!NewVar) {
+ DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
+ *OldVar->getScope(), *NewSP, Ctx, Cache);
+ NewVar = DIB.createAutoVariable(
+ NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
+ OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
+ OldVar->getAlignInBits());
+ }
+ return cast<DILocalVariable>(NewVar);
+ };
+
+ auto UpdateDPValuesOnInst = [&](Instruction &I) -> void {
+ for (auto &DPV : I.getDbgValueRange()) {
+ // Apply the two updates that dbg.values get: invalid operands, and
+ // variable metadata fixup.
+ // FIXME: support dbg.assign form of DPValues.
+ if (any_of(DPV.location_ops(), IsInvalidLocation)) {
+ DPVsToDelete.push_back(&DPV);
+ continue;
+ }
+ if (!DPV.getDebugLoc().getInlinedAt())
+ DPV.setVariable(GetUpdatedDIVariable(DPV.getVariable()));
+ DPV.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DPV.getDebugLoc(),
+ *NewSP, Ctx, Cache));
+ }
+ };
+
for (Instruction &I : instructions(NewFunc)) {
+ UpdateDPValuesOnInst(I);
+
auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
if (!DII)
continue;
@@ -1562,41 +1624,28 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
continue;
}
- auto IsInvalidLocation = [&NewFunc](Value *Location) {
- // Location is invalid if it isn't a constant or an instruction, or is an
- // instruction but isn't in the new function.
- if (!Location ||
- (!isa<Constant>(Location) && !isa<Instruction>(Location)))
- return true;
- Instruction *LocationInst = dyn_cast<Instruction>(Location);
- return LocationInst && LocationInst->getFunction() != &NewFunc;
- };
-
auto *DVI = cast<DbgVariableIntrinsic>(DII);
// If any of the used locations are invalid, delete the intrinsic.
if (any_of(DVI->location_ops(), IsInvalidLocation)) {
DebugIntrinsicsToDelete.push_back(DVI);
continue;
}
+ // DbgAssign intrinsics have an extra Value argument:
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI);
+ DAI && IsInvalidLocation(DAI->getAddress())) {
+ DebugIntrinsicsToDelete.push_back(DVI);
+ continue;
+ }
// If the variable was in the scope of the old function, i.e. it was not
// inlined, point the intrinsic to a fresh variable within the new function.
- if (!DVI->getDebugLoc().getInlinedAt()) {
- DILocalVariable *OldVar = DVI->getVariable();
- DINode *&NewVar = RemappedMetadata[OldVar];
- if (!NewVar) {
- DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram(
- *OldVar->getScope(), *NewSP, Ctx, Cache);
- NewVar = DIB.createAutoVariable(
- NewScope, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
- OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
- OldVar->getAlignInBits());
- }
- DVI->setVariable(cast<DILocalVariable>(NewVar));
- }
+ if (!DVI->getDebugLoc().getInlinedAt())
+ DVI->setVariable(GetUpdatedDIVariable(DVI->getVariable()));
}
for (auto *DII : DebugIntrinsicsToDelete)
DII->eraseFromParent();
+ for (auto *DPV : DPVsToDelete)
+ DPV->getMarker()->MarkedInstr->dropOneDbgValue(DPV);
DIB.finalizeSubprogram(NewSP);
// Fix up the scope information attached to the line locations in the new
@@ -1702,11 +1751,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
"codeRepl", oldFunction,
header);
+ codeReplacer->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
// The new function needs a root node because other nodes can branch to the
// head of the region, but the entry node of a function cannot have preds.
BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
"newFuncRoot");
+ newFuncRoot->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
+
auto *BranchI = BranchInst::Create(header);
// If the original function has debug info, we have to add a debug location
// to the new branch instruction from the artificial entry block.
@@ -1772,11 +1824,11 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
// Update the entry count of the function.
if (BFI) {
- auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+ auto Count = BFI->getProfileCountFromFreq(EntryFreq);
if (Count)
newFunction->setEntryCount(
ProfileCount(*Count, Function::PCT_Real)); // FIXME
- BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
+ BFI->setBlockFreq(codeReplacer, EntryFreq);
}
CallInst *TheCall =
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index ac74a1c116cc..95edd27c675d 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -45,8 +45,11 @@
#include "llvm/Support/Debug.h"
#include <cmath>
+#include <set>
using namespace llvm;
+using namespace llvm::codelayout;
+
#define DEBUG_TYPE "code-layout"
namespace llvm {
@@ -61,8 +64,8 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
cl::init(true), cl::Hidden);
} // namespace llvm
-// Algorithm-specific params. The values are tuned for the best performance
-// of large-scale front-end bound binaries.
+// Algorithm-specific params for Ext-TSP. The values are tuned for the best
+// performance of large-scale front-end bound binaries.
static cl::opt<double> ForwardWeightCond(
"ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
cl::desc("The weight of conditional forward jumps for ExtTSP value"));
@@ -96,10 +99,10 @@ static cl::opt<unsigned> BackwardDistance(
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
// The maximum size of a chain created by the algorithm. The size is bounded
-// so that the algorithm can efficiently process extremely large instance.
+// so that the algorithm can efficiently process extremely large instances.
static cl::opt<unsigned>
- MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
- cl::desc("The maximum size of a chain to create."));
+ MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
+ cl::desc("The maximum size of a chain to create"));
// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
@@ -107,11 +110,29 @@ static cl::opt<unsigned> ChainSplitThreshold(
"ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
cl::desc("The maximum size of a chain to apply splitting"));
-// The option enables splitting (large) chains along in-coming and out-going
-// jumps. This typically results in a better quality.
-static cl::opt<bool> EnableChainSplitAlongJumps(
- "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
- cl::desc("The maximum size of a chain to apply splitting"));
+// The maximum ratio between densities of two chains for merging.
+static cl::opt<double> MaxMergeDensityRatio(
+ "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100),
+ cl::desc("The maximum ratio between densities of two chains for merging"));
+
+// Algorithm-specific options for CDSort.
+static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden,
+ cl::desc("The size of the cache"));
+
+static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden,
+ cl::desc("The size of a line in the cache"));
+
+static cl::opt<unsigned>
+ CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
+ cl::desc("The maximum size of a chain to create"));
+
+static cl::opt<double> DistancePower(
+ "cdsort-distance-power", cl::ReallyHidden,
+ cl::desc("The power exponent for the distance-based locality"));
+
+static cl::opt<double> FrequencyScale(
+ "cdsort-frequency-scale", cl::ReallyHidden,
+ cl::desc("The scale factor for the frequency-based locality"));
namespace {
@@ -199,11 +220,14 @@ struct NodeT {
NodeT &operator=(const NodeT &) = delete;
NodeT &operator=(NodeT &&) = default;
- explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
- : Index(Index), Size(Size), ExecutionCount(EC) {}
+ explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
+ : Index(Index), Size(Size), ExecutionCount(Count) {}
bool isEntry() const { return Index == 0; }
+ // Check if Other is a successor of the node.
+ bool isSuccessor(const NodeT *Other) const;
+
// The total execution count of outgoing jumps.
uint64_t outCount() const;
@@ -267,7 +291,7 @@ struct ChainT {
size_t numBlocks() const { return Nodes.size(); }
- double density() const { return static_cast<double>(ExecutionCount) / Size; }
+ double density() const { return ExecutionCount / Size; }
bool isEntry() const { return Nodes[0]->Index == 0; }
@@ -280,9 +304,9 @@ struct ChainT {
}
ChainEdge *getEdge(ChainT *Other) const {
- for (auto It : Edges) {
- if (It.first == Other)
- return It.second;
+ for (const auto &[Chain, ChainEdge] : Edges) {
+ if (Chain == Other)
+ return ChainEdge;
}
return nullptr;
}
@@ -302,13 +326,13 @@ struct ChainT {
Edges.push_back(std::make_pair(Other, Edge));
}
- void merge(ChainT *Other, const std::vector<NodeT *> &MergedBlocks) {
- Nodes = MergedBlocks;
- // Update the chain's data
+ void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {
+ Nodes = std::move(MergedBlocks);
+ // Update the chain's data.
ExecutionCount += Other->ExecutionCount;
Size += Other->Size;
Id = Nodes[0]->Index;
- // Update the node's data
+ // Update the node's data.
for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
Nodes[Idx]->CurChain = this;
Nodes[Idx]->CurIndex = Idx;
@@ -328,8 +352,9 @@ struct ChainT {
uint64_t Id;
// Cached ext-tsp score for the chain.
double Score{0};
- // The total execution count of the chain.
- uint64_t ExecutionCount{0};
+ // The total execution count of the chain. Since the execution count of
+ // a basic block is uint64_t, using doubles here to avoid overflow.
+ double ExecutionCount{0};
// The total size of the chain.
uint64_t Size{0};
// Nodes of the chain.
@@ -340,7 +365,7 @@ struct ChainT {
/// An edge in the graph representing jumps between two chains.
/// When nodes are merged into chains, the edges are combined too so that
-/// there is always at most one edge between a pair of chains
+/// there is always at most one edge between a pair of chains.
struct ChainEdge {
ChainEdge(const ChainEdge &) = delete;
ChainEdge(ChainEdge &&) = default;
@@ -424,53 +449,57 @@ private:
bool CacheValidBackward{false};
};
+bool NodeT::isSuccessor(const NodeT *Other) const {
+ for (JumpT *Jump : OutJumps)
+ if (Jump->Target == Other)
+ return true;
+ return false;
+}
+
uint64_t NodeT::outCount() const {
uint64_t Count = 0;
- for (JumpT *Jump : OutJumps) {
+ for (JumpT *Jump : OutJumps)
Count += Jump->ExecutionCount;
- }
return Count;
}
uint64_t NodeT::inCount() const {
uint64_t Count = 0;
- for (JumpT *Jump : InJumps) {
+ for (JumpT *Jump : InJumps)
Count += Jump->ExecutionCount;
- }
return Count;
}
void ChainT::mergeEdges(ChainT *Other) {
- // Update edges adjacent to chain Other
- for (auto EdgeIt : Other->Edges) {
- ChainT *DstChain = EdgeIt.first;
- ChainEdge *DstEdge = EdgeIt.second;
+ // Update edges adjacent to chain Other.
+ for (const auto &[DstChain, DstEdge] : Other->Edges) {
ChainT *TargetChain = DstChain == Other ? this : DstChain;
ChainEdge *CurEdge = getEdge(TargetChain);
if (CurEdge == nullptr) {
DstEdge->changeEndpoint(Other, this);
this->addEdge(TargetChain, DstEdge);
- if (DstChain != this && DstChain != Other) {
+ if (DstChain != this && DstChain != Other)
DstChain->addEdge(this, DstEdge);
- }
} else {
CurEdge->moveJumps(DstEdge);
}
- // Cleanup leftover edge
- if (DstChain != Other) {
+ // Cleanup leftover edge.
+ if (DstChain != Other)
DstChain->removeEdge(Other);
- }
}
}
using NodeIter = std::vector<NodeT *>::const_iterator;
+static std::vector<NodeT *> EmptyList;
-/// A wrapper around three chains of nodes; it is used to avoid extra
-/// instantiation of the vectors.
-struct MergedChain {
- MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
- NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
- NodeIter End3 = NodeIter())
+/// A wrapper around three concatenated vectors (chains) of nodes; it is used
+/// to avoid extra instantiation of the vectors.
+struct MergedNodesT {
+ MergedNodesT(NodeIter Begin1, NodeIter End1,
+ NodeIter Begin2 = EmptyList.begin(),
+ NodeIter End2 = EmptyList.end(),
+ NodeIter Begin3 = EmptyList.begin(),
+ NodeIter End3 = EmptyList.end())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}
@@ -504,15 +533,35 @@ private:
NodeIter End3;
};
+/// A wrapper around two concatenated vectors (chains) of jumps.
+struct MergedJumpsT {
+ MergedJumpsT(const std::vector<JumpT *> *Jumps1,
+ const std::vector<JumpT *> *Jumps2 = nullptr) {
+ assert(!Jumps1->empty() && "cannot merge empty jump list");
+ JumpArray[0] = Jumps1;
+ JumpArray[1] = Jumps2;
+ }
+
+ template <typename F> void forEach(const F &Func) const {
+ for (auto Jumps : JumpArray)
+ if (Jumps != nullptr)
+ for (JumpT *Jump : *Jumps)
+ Func(Jump);
+ }
+
+private:
+ std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
+};
+
/// Merge two chains of nodes respecting a given 'type' and 'offset'.
///
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
-MergedChain mergeNodes(const std::vector<NodeT *> &X,
- const std::vector<NodeT *> &Y, size_t MergeOffset,
- MergeTypeT MergeType) {
- // Split the first chain, X, into X1 and X2
+MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
+ const std::vector<NodeT *> &Y, size_t MergeOffset,
+ MergeTypeT MergeType) {
+ // Split the first chain, X, into X1 and X2.
NodeIter BeginX1 = X.begin();
NodeIter EndX1 = X.begin() + MergeOffset;
NodeIter BeginX2 = X.begin() + MergeOffset;
@@ -520,18 +569,18 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X,
NodeIter BeginY = Y.begin();
NodeIter EndY = Y.end();
- // Construct a new chain from the three existing ones
+ // Construct a new chain from the three existing ones.
switch (MergeType) {
case MergeTypeT::X_Y:
- return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
case MergeTypeT::Y_X:
- return MergedChain(BeginY, EndY, BeginX1, EndX2);
+ return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
case MergeTypeT::X1_Y_X2:
- return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
case MergeTypeT::Y_X2_X1:
- return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
case MergeTypeT::X2_X1_Y:
- return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
}
llvm_unreachable("unexpected chain merge type");
}
@@ -539,15 +588,14 @@ MergedChain mergeNodes(const std::vector<NodeT *> &X,
/// The implementation of the ExtTSP algorithm.
class ExtTSPImpl {
public:
- ExtTSPImpl(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts)
+ ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts)
: NumNodes(NodeSizes.size()) {
initialize(NodeSizes, NodeCounts, EdgeCounts);
}
/// Run the algorithm and return an optimized ordering of nodes.
- void run(std::vector<uint64_t> &Result) {
+ std::vector<uint64_t> run() {
// Pass 1: Merge nodes with their mutually forced successors
mergeForcedPairs();
@@ -558,78 +606,80 @@ public:
mergeColdChains();
// Collect nodes from all chains
- concatChains(Result);
+ return concatChains();
}
private:
/// Initialize the algorithm's data structures.
- void initialize(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Initialize nodes
+ void initialize(const ArrayRef<uint64_t> &NodeSizes,
+ const ArrayRef<uint64_t> &NodeCounts,
+ const ArrayRef<EdgeCount> &EdgeCounts) {
+ // Initialize nodes.
AllNodes.reserve(NumNodes);
for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
uint64_t ExecutionCount = NodeCounts[Idx];
- // The execution count of the entry node is set to at least one
+ // The execution count of the entry node is set to at least one.
if (Idx == 0 && ExecutionCount == 0)
ExecutionCount = 1;
AllNodes.emplace_back(Idx, Size, ExecutionCount);
}
- // Initialize jumps between nodes
+ // Initialize jumps between the nodes.
SuccNodes.resize(NumNodes);
PredNodes.resize(NumNodes);
std::vector<uint64_t> OutDegree(NumNodes, 0);
AllJumps.reserve(EdgeCounts.size());
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- uint64_t Succ = It.first.second;
- OutDegree[Pred]++;
- // Ignore self-edges
- if (Pred == Succ)
+ for (auto Edge : EdgeCounts) {
+ ++OutDegree[Edge.src];
+ // Ignore self-edges.
+ if (Edge.src == Edge.dst)
continue;
- SuccNodes[Pred].push_back(Succ);
- PredNodes[Succ].push_back(Pred);
- uint64_t ExecutionCount = It.second;
- if (ExecutionCount > 0) {
- NodeT &PredNode = AllNodes[Pred];
- NodeT &SuccNode = AllNodes[Succ];
- AllJumps.emplace_back(&PredNode, &SuccNode, ExecutionCount);
+ SuccNodes[Edge.src].push_back(Edge.dst);
+ PredNodes[Edge.dst].push_back(Edge.src);
+ if (Edge.count > 0) {
+ NodeT &PredNode = AllNodes[Edge.src];
+ NodeT &SuccNode = AllNodes[Edge.dst];
+ AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);
SuccNode.InJumps.push_back(&AllJumps.back());
PredNode.OutJumps.push_back(&AllJumps.back());
+ // Adjust execution counts.
+ PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);
+ SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);
}
}
for (JumpT &Jump : AllJumps) {
- assert(OutDegree[Jump.Source->Index] > 0);
+ assert(OutDegree[Jump.Source->Index] > 0 &&
+ "incorrectly computed out-degree of the block");
Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
}
- // Initialize chains
+ // Initialize chains.
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
for (NodeT &Node : AllNodes) {
+ // Create a chain.
AllChains.emplace_back(Node.Index, &Node);
Node.CurChain = &AllChains.back();
- if (Node.ExecutionCount > 0) {
+ if (Node.ExecutionCount > 0)
HotChains.push_back(&AllChains.back());
- }
}
- // Initialize chain edges
+ // Initialize chain edges.
AllEdges.reserve(AllJumps.size());
for (NodeT &PredNode : AllNodes) {
for (JumpT *Jump : PredNode.OutJumps) {
+ assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");
NodeT *SuccNode = Jump->Target;
ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
- // this edge is already present in the graph
+ // This edge is already present in the graph.
if (CurEdge != nullptr) {
assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
- // this is a new edge
+ // This is a new edge.
AllEdges.emplace_back(Jump);
PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
@@ -642,7 +692,7 @@ private:
/// to B are from A. Such nodes should be adjacent in the optimal ordering;
/// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
- // Find fallthroughs based on edge weights
+ // Find forced pairs of blocks.
for (NodeT &Node : AllNodes) {
if (SuccNodes[Node.Index].size() == 1 &&
PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
@@ -669,12 +719,12 @@ private:
}
if (SuccNode == nullptr)
continue;
- // Break the cycle
+ // Break the cycle.
AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
Node.ForcedPred = nullptr;
}
- // Merge nodes with their fallthrough successors
+ // Merge nodes with their fallthrough successors.
for (NodeT &Node : AllNodes) {
if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
const NodeT *CurBlock = &Node;
@@ -689,33 +739,42 @@ private:
/// Merge pairs of chains while improving the ExtTSP objective.
void mergeChainPairs() {
- /// Deterministically compare pairs of chains
+ /// Deterministically compare pairs of chains.
auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
const ChainT *A2, const ChainT *B2) {
- if (A1 != A2)
- return A1->Id < A2->Id;
- return B1->Id < B2->Id;
+ return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
};
while (HotChains.size() > 1) {
ChainT *BestChainPred = nullptr;
ChainT *BestChainSucc = nullptr;
MergeGainT BestGain;
- // Iterate over all pairs of chains
+ // Iterate over all pairs of chains.
for (ChainT *ChainPred : HotChains) {
- // Get candidates for merging with the current chain
- for (auto EdgeIt : ChainPred->Edges) {
- ChainT *ChainSucc = EdgeIt.first;
- ChainEdge *Edge = EdgeIt.second;
- // Ignore loop edges
- if (ChainPred == ChainSucc)
+ // Get candidates for merging with the current chain.
+ for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
+ // Ignore loop edges.
+ if (Edge->isSelfEdge())
continue;
-
- // Stop early if the combined chain violates the maximum allowed size
+ // Skip the merge if the combined chain violates the maximum specified
+ // size.
if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
continue;
+ // Don't merge the chains if they have vastly different densities.
+ // Skip the merge if the ratio between the densities exceeds
+ // MaxMergeDensityRatio. Smaller values of the option result in fewer
+ // merges, and hence, more chains.
+ const double ChainPredDensity = ChainPred->density();
+ const double ChainSuccDensity = ChainSucc->density();
+ assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&
+ "incorrectly computed chain densities");
+ auto [MinDensity, MaxDensity] =
+ std::minmax(ChainPredDensity, ChainSuccDensity);
+ const double Ratio = MaxDensity / MinDensity;
+ if (Ratio > MaxMergeDensityRatio)
+ continue;
- // Compute the gain of merging the two chains
+ // Compute the gain of merging the two chains.
MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
if (CurGain.score() <= EPS)
continue;
@@ -731,11 +790,11 @@ private:
}
}
- // Stop merging when there is no improvement
+ // Stop merging when there is no improvement.
if (BestGain.score() <= EPS)
break;
- // Merge the best pair of chains
+ // Merge the best pair of chains.
mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
BestGain.mergeType());
}
@@ -743,7 +802,7 @@ private:
/// Merge remaining nodes into chains w/o taking jump counts into
/// consideration. This allows to maintain the original node order in the
- /// absence of profile data
+ /// absence of profile data.
void mergeColdChains() {
for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
// Iterating in reverse order to make sure original fallthrough jumps are
@@ -764,24 +823,22 @@ private:
}
/// Compute the Ext-TSP score for a given node order and a list of jumps.
- double extTSPScore(const MergedChain &MergedBlocks,
- const std::vector<JumpT *> &Jumps) const {
- if (Jumps.empty())
- return 0.0;
+ double extTSPScore(const MergedNodesT &Nodes,
+ const MergedJumpsT &Jumps) const {
uint64_t CurAddr = 0;
- MergedBlocks.forEach([&](const NodeT *Node) {
+ Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});
double Score = 0;
- for (JumpT *Jump : Jumps) {
+ Jumps.forEach([&](const JumpT *Jump) {
const NodeT *SrcBlock = Jump->Source;
const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
- }
+ });
return Score;
}
@@ -793,74 +850,76 @@ private:
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
ChainEdge *Edge) const {
- if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
+ if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
- }
- // Precompute jumps between ChainPred and ChainSucc
- auto Jumps = Edge->jumps();
+ assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
+ // Precompute jumps between ChainPred and ChainSucc.
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
- if (EdgePP != nullptr) {
- Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
- }
- assert(!Jumps.empty() && "trying to merge chains w/o jumps");
+ MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);
- // The object holds the best currently chosen gain of merging the two chains
+ // This object holds the best chosen gain of merging two chains.
MergeGainT Gain = MergeGainT();
/// Given a merge offset and a list of merge types, try to merge two chains
- /// and update Gain with a better alternative
+ /// and update Gain with a better alternative.
auto tryChainMerging = [&](size_t Offset,
const std::vector<MergeTypeT> &MergeTypes) {
- // Skip merging corresponding to concatenation w/o splitting
+ // Skip merging corresponding to concatenation w/o splitting.
if (Offset == 0 || Offset == ChainPred->Nodes.size())
return;
- // Skip merging if it breaks Forced successors
+ // Skip merging if it breaks Forced successors.
NodeT *Node = ChainPred->Nodes[Offset - 1];
if (Node->ForcedSucc != nullptr)
return;
// Apply the merge, compute the corresponding gain, and update the best
- // value, if the merge is beneficial
+ // value, if the merge is beneficial.
for (const MergeTypeT &MergeType : MergeTypes) {
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
}
};
- // Try to concatenate two chains w/o splitting
+ // Try to concatenate two chains w/o splitting.
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
- if (EnableChainSplitAlongJumps) {
- // Attach (a part of) ChainPred before the first node of ChainSucc
- for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
- const NodeT *SrcBlock = Jump->Source;
- if (SrcBlock->CurChain != ChainPred)
- continue;
- size_t Offset = SrcBlock->CurIndex + 1;
- tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
- }
+ // Attach (a part of) ChainPred before the first node of ChainSucc.
+ for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
+ const NodeT *SrcBlock = Jump->Source;
+ if (SrcBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = SrcBlock->CurIndex + 1;
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
+ }
- // Attach (a part of) ChainPred after the last node of ChainSucc
- for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
- const NodeT *DstBlock = Jump->Source;
- if (DstBlock->CurChain != ChainPred)
- continue;
- size_t Offset = DstBlock->CurIndex;
- tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
- }
+ // Attach (a part of) ChainPred after the last node of ChainSucc.
+ for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
+ const NodeT *DstBlock = Jump->Target;
+ if (DstBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = DstBlock->CurIndex;
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
}
- // Try to break ChainPred in various ways and concatenate with ChainSucc
+ // Try to break ChainPred in various ways and concatenate with ChainSucc.
if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
- // Try to split the chain in different ways. In practice, applying
- // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
- // it from consideration to reduce the search space
+ // Do not split the chain along a fall-through jump. One of the two
+ // loops above may still "break" such a jump whenever it results in a
+ // new fall-through.
+ const NodeT *BB = ChainPred->Nodes[Offset - 1];
+ const NodeT *BB2 = ChainPred->Nodes[Offset];
+ if (BB->isSuccessor(BB2))
+ continue;
+
+ // In practice, applying X2_Y_X1 merging almost never provides benefits;
+ // thus, we exclude it from consideration to reduce the search space.
tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
MergeTypeT::X2_X1_Y});
}
}
+
Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
return Gain;
}
@@ -870,19 +929,20 @@ private:
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
- const std::vector<JumpT *> &Jumps,
- size_t MergeOffset, MergeTypeT MergeType) const {
- auto MergedBlocks =
+ const MergedJumpsT &Jumps, size_t MergeOffset,
+ MergeTypeT MergeType) const {
+ MergedNodesT MergedNodes =
mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
- // Do not allow a merge that does not preserve the original entry point
+ // Do not allow a merge that does not preserve the original entry point.
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
- !MergedBlocks.getFirstNode()->isEntry())
+ !MergedNodes.getFirstNode()->isEntry())
return MergeGainT();
- // The gain for the new chain
- auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
- return MergeGainT(NewGainScore, MergeOffset, MergeType);
+ // The gain for the new chain.
+ double NewScore = extTSPScore(MergedNodes, Jumps);
+ double CurScore = ChainPred->Score;
+ return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
}
/// Merge chain From into chain Into, update the list of active chains,
@@ -891,39 +951,398 @@ private:
MergeTypeT MergeType) {
assert(Into != From && "a chain cannot be merged with itself");
- // Merge the nodes
- MergedChain MergedNodes =
+ // Merge the nodes.
+ MergedNodesT MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());
- // Merge the edges
+ // Merge the edges.
Into->mergeEdges(From);
From->clear();
- // Update cached ext-tsp score for the new chain
+ // Update cached ext-tsp score for the new chain.
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
- MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
- Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
+ MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
+ MergedJumpsT MergedJumps(&SelfEdge->jumps());
+ Into->Score = extTSPScore(MergedNodes, MergedJumps);
}
- // Remove the chain from the list of active chains
- llvm::erase_value(HotChains, From);
+ // Remove the chain from the list of active chains.
+ llvm::erase(HotChains, From);
- // Invalidate caches
+ // Invalidate caches.
for (auto EdgeIt : Into->Edges)
EdgeIt.second->invalidateCache();
}
/// Concatenate all chains into the final order.
- void concatChains(std::vector<uint64_t> &Order) {
- // Collect chains and calculate density stats for their sorting
+ std::vector<uint64_t> concatChains() {
+ // Collect non-empty chains.
+ std::vector<const ChainT *> SortedChains;
+ for (ChainT &Chain : AllChains) {
+ if (!Chain.Nodes.empty())
+ SortedChains.push_back(&Chain);
+ }
+
+ // Sorting chains by density in the decreasing order.
+ std::sort(SortedChains.begin(), SortedChains.end(),
+ [&](const ChainT *L, const ChainT *R) {
+ // Place the entry point at the beginning of the order.
+ if (L->isEntry() != R->isEntry())
+ return L->isEntry();
+
+ // Compare by density and break ties by chain identifiers.
+ return std::make_tuple(-L->density(), L->Id) <
+ std::make_tuple(-R->density(), R->Id);
+ });
+
+ // Collect the nodes in the order specified by their chains.
+ std::vector<uint64_t> Order;
+ Order.reserve(NumNodes);
+ for (const ChainT *Chain : SortedChains)
+ for (NodeT *Node : Chain->Nodes)
+ Order.push_back(Node->Index);
+ return Order;
+ }
+
+private:
+ /// The number of nodes in the graph.
+ const size_t NumNodes;
+
+ /// Successors of each node.
+ std::vector<std::vector<uint64_t>> SuccNodes;
+
+ /// Predecessors of each node.
+ std::vector<std::vector<uint64_t>> PredNodes;
+
+ /// All nodes (basic blocks) in the graph.
+ std::vector<NodeT> AllNodes;
+
+ /// All jumps between the nodes.
+ std::vector<JumpT> AllJumps;
+
+ /// All chains of nodes.
+ std::vector<ChainT> AllChains;
+
+ /// All edges between the chains.
+ std::vector<ChainEdge> AllEdges;
+
+ /// Active chains. The vector gets updated at runtime when chains are merged.
+ std::vector<ChainT *> HotChains;
+};
+
+/// The implementation of the Cache-Directed Sort (CDSort) algorithm for
+/// ordering functions represented by a call graph.
+class CDSortImpl {
+public:
+ CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts,
+ ArrayRef<uint64_t> EdgeOffsets)
+ : Config(Config), NumNodes(NodeSizes.size()) {
+ initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);
+ }
+
+ /// Run the algorithm and return an ordered set of function clusters.
+ std::vector<uint64_t> run() {
+ // Merge pairs of chains while improving the objective.
+ mergeChainPairs();
+
+ // Collect nodes from all the chains.
+ return concatChains();
+ }
+
+private:
+ /// Initialize the algorithm's data structures.
+ void initialize(const ArrayRef<uint64_t> &NodeSizes,
+ const ArrayRef<uint64_t> &NodeCounts,
+ const ArrayRef<EdgeCount> &EdgeCounts,
+ const ArrayRef<uint64_t> &EdgeOffsets) {
+ // Initialize nodes.
+ AllNodes.reserve(NumNodes);
+ for (uint64_t Node = 0; Node < NumNodes; Node++) {
+ uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
+ uint64_t ExecutionCount = NodeCounts[Node];
+ AllNodes.emplace_back(Node, Size, ExecutionCount);
+ TotalSamples += ExecutionCount;
+ if (ExecutionCount > 0)
+ TotalSize += Size;
+ }
+
+ // Initialize jumps between the nodes.
+ SuccNodes.resize(NumNodes);
+ PredNodes.resize(NumNodes);
+ AllJumps.reserve(EdgeCounts.size());
+ for (size_t I = 0; I < EdgeCounts.size(); I++) {
+ auto [Pred, Succ, Count] = EdgeCounts[I];
+ // Ignore recursive calls.
+ if (Pred == Succ)
+ continue;
+
+ SuccNodes[Pred].push_back(Succ);
+ PredNodes[Succ].push_back(Pred);
+ if (Count > 0) {
+ NodeT &PredNode = AllNodes[Pred];
+ NodeT &SuccNode = AllNodes[Succ];
+ AllJumps.emplace_back(&PredNode, &SuccNode, Count);
+ AllJumps.back().Offset = EdgeOffsets[I];
+ SuccNode.InJumps.push_back(&AllJumps.back());
+ PredNode.OutJumps.push_back(&AllJumps.back());
+ // Adjust execution counts.
+ PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);
+ SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);
+ }
+ }
+
+ // Initialize chains.
+ AllChains.reserve(NumNodes);
+ for (NodeT &Node : AllNodes) {
+ // Adjust execution counts.
+ Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
+ Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
+ // Create chain.
+ AllChains.emplace_back(Node.Index, &Node);
+ Node.CurChain = &AllChains.back();
+ }
+
+ // Initialize chain edges.
+ AllEdges.reserve(AllJumps.size());
+ for (NodeT &PredNode : AllNodes) {
+ for (JumpT *Jump : PredNode.OutJumps) {
+ NodeT *SuccNode = Jump->Target;
+ ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
+ // This edge is already present in the graph.
+ if (CurEdge != nullptr) {
+ assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
+ CurEdge->appendJump(Jump);
+ continue;
+ }
+ // This is a new edge.
+ AllEdges.emplace_back(Jump);
+ PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
+ SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
+ }
+ }
+ }
+
+ /// Merge pairs of chains while there is an improvement in the objective.
+ void mergeChainPairs() {
+ // Create a priority queue containing all edges ordered by the merge gain.
+ auto GainComparator = [](ChainEdge *L, ChainEdge *R) {
+ return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <
+ std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);
+ };
+ std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);
+
+ // Insert the edges into the queue.
+ [[maybe_unused]] size_t NumActiveChains = 0;
+ for (NodeT &Node : AllNodes) {
+ if (Node.ExecutionCount == 0)
+ continue;
+ ++NumActiveChains;
+ for (const auto &[_, Edge] : Node.CurChain->Edges) {
+ // Ignore self-edges.
+ if (Edge->isSelfEdge())
+ continue;
+ // Ignore already processed edges.
+ if (Edge->gain() != -1.0)
+ continue;
+
+ // Compute the gain of merging the two chains.
+ MergeGainT Gain = getBestMergeGain(Edge);
+ Edge->setMergeGain(Gain);
+
+ if (Edge->gain() > EPS)
+ Queue.insert(Edge);
+ }
+ }
+
+ // Merge the chains while the gain of merging is positive.
+ while (!Queue.empty()) {
+ // Extract the best (top) edge for merging.
+ ChainEdge *BestEdge = *Queue.begin();
+ Queue.erase(Queue.begin());
+ ChainT *BestSrcChain = BestEdge->srcChain();
+ ChainT *BestDstChain = BestEdge->dstChain();
+
+ // Remove outdated edges from the queue.
+ for (const auto &[_, ChainEdge] : BestSrcChain->Edges)
+ Queue.erase(ChainEdge);
+ for (const auto &[_, ChainEdge] : BestDstChain->Edges)
+ Queue.erase(ChainEdge);
+
+ // Merge the best pair of chains.
+ MergeGainT BestGain = BestEdge->getMergeGain();
+ mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),
+ BestGain.mergeType());
+ --NumActiveChains;
+
+ // Insert newly created edges into the queue.
+ for (const auto &[_, Edge] : BestSrcChain->Edges) {
+ // Ignore loop edges.
+ if (Edge->isSelfEdge())
+ continue;
+ if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
+ Config.MaxChainSize)
+ continue;
+
+ // Compute the gain of merging the two chains.
+ MergeGainT Gain = getBestMergeGain(Edge);
+ Edge->setMergeGain(Gain);
+
+ if (Edge->gain() > EPS)
+ Queue.insert(Edge);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"
+ << " of chains from " << NumNodes << " to "
+ << NumActiveChains << "\n");
+ }
+
+ /// Compute the gain of merging two chains.
+ ///
+ /// The function considers all possible ways of merging two chains and
+ /// computes the one having the largest increase in ExtTSP objective. The
+ /// result is a pair with the first element being the gain and the second
+ /// element being the corresponding merging type.
+ MergeGainT getBestMergeGain(ChainEdge *Edge) const {
+ assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
+ // Precompute jumps between ChainPred and ChainSucc.
+ MergedJumpsT Jumps(&Edge->jumps());
+ ChainT *SrcChain = Edge->srcChain();
+ ChainT *DstChain = Edge->dstChain();
+
+ // This object holds the best currently chosen gain of merging two chains.
+ MergeGainT Gain = MergeGainT();
+
+ /// Given a list of merge types, try to merge two chains and update Gain
+ /// with a better alternative.
+ auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) {
+ // Apply the merge, compute the corresponding gain, and update the best
+ // value, if the merge is beneficial.
+ for (const MergeTypeT &MergeType : MergeTypes) {
+ MergeGainT NewGain =
+ computeMergeGain(SrcChain, DstChain, Jumps, MergeType);
+
+ // When forward and backward gains are the same, prioritize merging that
+ // preserves the original order of the functions in the binary.
+ if (std::abs(Gain.score() - NewGain.score()) < EPS) {
+ if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||
+ (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {
+ Gain = NewGain;
+ }
+ } else if (NewGain.score() > Gain.score() + EPS) {
+ Gain = NewGain;
+ }
+ }
+ };
+
+ // Try to concatenate two chains w/o splitting.
+ tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});
+
+ return Gain;
+ }
+
+ /// Compute the score gain of merging two chains, respecting a given type.
+ ///
+ /// The two chains are not modified in the method.
+ MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
+ const MergedJumpsT &Jumps,
+ MergeTypeT MergeType) const {
+ // This doesn't depend on the ordering of the nodes
+ double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
+
+ // Merge offset is always 0, as the chains are not split.
+ size_t MergeOffset = 0;
+ auto MergedBlocks =
+ mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
+ double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);
+
+ double GainScore = DistGain + Config.FrequencyScale * FreqGain;
+ // Scale the result to increase the importance of merging short chains.
+ if (GainScore >= 0.0)
+ GainScore /= std::min(ChainPred->Size, ChainSucc->Size);
+
+ return MergeGainT(GainScore, MergeOffset, MergeType);
+ }
+
+ /// Compute the change of the frequency locality after merging the chains.
+ double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {
+ auto missProbability = [&](double ChainDensity) {
+ double PageSamples = ChainDensity * Config.CacheSize;
+ if (PageSamples >= TotalSamples)
+ return 0.0;
+ double P = PageSamples / TotalSamples;
+ return pow(1.0 - P, static_cast<double>(Config.CacheEntries));
+ };
+
+ // Cache misses on the chains before merging.
+ double CurScore =
+ ChainPred->ExecutionCount * missProbability(ChainPred->density()) +
+ ChainSucc->ExecutionCount * missProbability(ChainSucc->density());
+
+ // Cache misses on the merged chain
+ double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
+ double MergedSize = ChainPred->Size + ChainSucc->Size;
+ double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
+ double NewScore = MergedCounts * missProbability(MergedDensity);
+
+ return CurScore - NewScore;
+ }
+
+ /// Compute the distance locality for a jump / call.
+ double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {
+ uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;
+ double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);
+ return static_cast<double>(Count) * std::pow(D, -Config.DistancePower);
+ }
+
+ /// Compute the change of the distance locality after merging the chains.
+ double distBasedLocalityGain(const MergedNodesT &Nodes,
+ const MergedJumpsT &Jumps) const {
+ uint64_t CurAddr = 0;
+ Nodes.forEach([&](const NodeT *Node) {
+ Node->EstimatedAddr = CurAddr;
+ CurAddr += Node->Size;
+ });
+
+ double CurScore = 0;
+ double NewScore = 0;
+ Jumps.forEach([&](const JumpT *Jump) {
+ uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
+ uint64_t DstAddr = Jump->Target->EstimatedAddr;
+ NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
+ CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
+ });
+ return NewScore - CurScore;
+ }
+
+ /// Merge chain From into chain Into, update the list of active chains,
+ /// adjacency information, and the corresponding cached values.
+ void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
+ MergeTypeT MergeType) {
+ assert(Into != From && "a chain cannot be merged with itself");
+
+ // Merge the nodes.
+ MergedNodesT MergedNodes =
+ mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
+ Into->merge(From, MergedNodes.getNodes());
+
+ // Merge the edges.
+ Into->mergeEdges(From);
+ From->clear();
+ }
+
+ /// Concatenate all chains into the final order.
+ std::vector<uint64_t> concatChains() {
+ // Collect chains and calculate density stats for their sorting.
std::vector<const ChainT *> SortedChains;
DenseMap<const ChainT *, double> ChainDensity;
for (ChainT &Chain : AllChains) {
if (!Chain.Nodes.empty()) {
SortedChains.push_back(&Chain);
- // Using doubles to avoid overflow of ExecutionCounts
+ // Using doubles to avoid overflow of ExecutionCounts.
double Size = 0;
double ExecutionCount = 0;
for (NodeT *Node : Chain.Nodes) {
@@ -935,30 +1354,29 @@ private:
}
}
- // Sorting chains by density in the decreasing order
- std::stable_sort(SortedChains.begin(), SortedChains.end(),
- [&](const ChainT *L, const ChainT *R) {
- // Make sure the original entry point is at the
- // beginning of the order
- if (L->isEntry() != R->isEntry())
- return L->isEntry();
-
- const double DL = ChainDensity[L];
- const double DR = ChainDensity[R];
- // Compare by density and break ties by chain identifiers
- return (DL != DR) ? (DL > DR) : (L->Id < R->Id);
- });
+ // Sort chains by density in the decreasing order.
+ std::sort(SortedChains.begin(), SortedChains.end(),
+ [&](const ChainT *L, const ChainT *R) {
+ const double DL = ChainDensity[L];
+ const double DR = ChainDensity[R];
+ // Compare by density and break ties by chain identifiers.
+ return std::make_tuple(-DL, L->Id) <
+ std::make_tuple(-DR, R->Id);
+ });
- // Collect the nodes in the order specified by their chains
+ // Collect the nodes in the order specified by their chains.
+ std::vector<uint64_t> Order;
Order.reserve(NumNodes);
- for (const ChainT *Chain : SortedChains) {
- for (NodeT *Node : Chain->Nodes) {
+ for (const ChainT *Chain : SortedChains)
+ for (NodeT *Node : Chain->Nodes)
Order.push_back(Node->Index);
- }
- }
+ return Order;
}
private:
+ /// Config for the algorithm.
+ const CDSortConfig Config;
+
/// The number of nodes in the graph.
const size_t NumNodes;
@@ -968,10 +1386,10 @@ private:
/// Predecessors of each node.
std::vector<std::vector<uint64_t>> PredNodes;
- /// All nodes (basic blocks) in the graph.
+ /// All nodes (functions) in the graph.
std::vector<NodeT> AllNodes;
- /// All jumps between the nodes.
+ /// All jumps (function calls) between the nodes.
std::vector<JumpT> AllJumps;
/// All chains of nodes.
@@ -980,65 +1398,95 @@ private:
/// All edges between the chains.
std::vector<ChainEdge> AllEdges;
- /// Active chains. The vector gets updated at runtime when chains are merged.
- std::vector<ChainT *> HotChains;
+ /// The total number of samples in the graph.
+ uint64_t TotalSamples{0};
+
+ /// The total size of the nodes in the graph.
+ uint64_t TotalSize{0};
};
} // end of anonymous namespace
std::vector<uint64_t>
-llvm::applyExtTspLayout(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Verify correctness of the input data
+codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
+ // Verify correctness of the input data.
assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
assert(NodeSizes.size() > 2 && "Incorrect input");
- // Apply the reordering algorithm
+ // Apply the reordering algorithm.
ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
- std::vector<uint64_t> Result;
- Alg.run(Result);
+ std::vector<uint64_t> Result = Alg.run();
- // Verify correctness of the output
+ // Verify correctness of the output.
assert(Result.front() == 0 && "Original entry point is not preserved");
assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
return Result;
}
-double llvm::calcExtTspScore(const std::vector<uint64_t> &Order,
- const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
- // Estimate addresses of the blocks in memory
+double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order,
+ ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
+ // Estimate addresses of the blocks in memory.
std::vector<uint64_t> Addr(NodeSizes.size(), 0);
for (size_t Idx = 1; Idx < Order.size(); Idx++) {
Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
}
std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- OutDegree[Pred]++;
- }
+ for (auto Edge : EdgeCounts)
+ ++OutDegree[Edge.src];
- // Increase the score for each jump
+ // Increase the score for each jump.
double Score = 0;
- for (auto It : EdgeCounts) {
- uint64_t Pred = It.first.first;
- uint64_t Succ = It.first.second;
- uint64_t Count = It.second;
- bool IsConditional = OutDegree[Pred] > 1;
- Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count,
- IsConditional);
+ for (auto Edge : EdgeCounts) {
+ bool IsConditional = OutDegree[Edge.src] > 1;
+ Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],
+ Edge.count, IsConditional);
}
return Score;
}
-double llvm::calcExtTspScore(const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<EdgeCountT> &EdgeCounts) {
+double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
+ ArrayRef<uint64_t> NodeCounts,
+ ArrayRef<EdgeCount> EdgeCounts) {
std::vector<uint64_t> Order(NodeSizes.size());
for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
Order[Idx] = Idx;
}
return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
}
+
+std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
+ const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
+ ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
+ ArrayRef<uint64_t> CallOffsets) {
+ // Verify correctness of the input data.
+ assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");
+
+ // Apply the reordering algorithm.
+ CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);
+ std::vector<uint64_t> Result = Alg.run();
+ assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");
+ return Result;
+}
+
+std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
+ ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
+ ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) {
+ CDSortConfig Config;
+ // Populate the config from the command-line options.
+ if (CacheEntries.getNumOccurrences() > 0)
+ Config.CacheEntries = CacheEntries;
+ if (CacheSize.getNumOccurrences() > 0)
+ Config.CacheSize = CacheSize;
+ if (CDMaxChainSize.getNumOccurrences() > 0)
+ Config.MaxChainSize = CDMaxChainSize;
+ if (DistancePower.getNumOccurrences() > 0)
+ Config.DistancePower = DistancePower;
+ if (FrequencyScale.getNumOccurrences() > 0)
+ Config.FrequencyScale = FrequencyScale;
+ return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts,
+ CallOffsets);
+}
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 4a6719741719..6a2dae5bab68 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -417,7 +417,7 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
- I.moveBefore(MovePos);
+ I.moveBeforePreserving(MovePos);
}
}
@@ -429,7 +429,7 @@ void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
while (FromBB.size() > 1) {
Instruction &I = FromBB.front();
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
- I.moveBefore(MovePos);
+ I.moveBeforePreserving(MovePos);
}
}
diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp
index e07c92df2265..507729bc5ebc 100644
--- a/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -52,12 +52,9 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
- if (!GCL->use_empty()) {
- Constant *V = NGV;
- if (V->getType() != GCL->getType())
- V = ConstantExpr::getBitCast(V, GCL->getType());
- GCL->replaceAllUsesWith(V);
- }
+ if (!GCL->use_empty())
+ GCL->replaceAllUsesWith(NGV);
+
GCL->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Utils/DXILUpgrade.cpp b/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
new file mode 100644
index 000000000000..735686ddce38
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/DXILUpgrade.cpp
@@ -0,0 +1,36 @@
+//===- DXILUpgrade.cpp - Upgrade DXIL metadata to LLVM constructs ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/DXILUpgrade.h"
+
+using namespace llvm;
+
+static bool handleValVerMetadata(Module &M) {
+ NamedMDNode *ValVer = M.getNamedMetadata("dx.valver");
+ if (!ValVer)
+ return false;
+
+ // We don't need the validation version internally, so we drop it.
+ ValVer->dropAllReferences();
+ ValVer->eraseFromParent();
+ return true;
+}
+
+PreservedAnalyses DXILUpgradePass::run(Module &M, ModuleAnalysisManager &AM) {
+ PreservedAnalyses PA;
+ // We never add, remove, or change functions here.
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ PA.preserveSet<AllAnalysesOn<Function>>();
+
+ bool Changed = false;
+ Changed |= handleValVerMetadata(M);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 93cad0888a56..d0cc603426d2 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -801,7 +801,15 @@ bool checkDebugifyMetadata(Module &M,
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
- return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
+ bool Result = applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+ return Result;
}
DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
@@ -826,7 +834,15 @@ private:
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
- return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+ bool NewDebugMode = F.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ F.convertFromNewDbgValues();
+
+ bool Result = applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
+
+ if (NewDebugMode)
+ F.convertToNewDbgValues();
+ return Result;
}
DebugifyFunctionPass(
@@ -852,13 +868,24 @@ private:
/// legacy module pass manager.
struct CheckDebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
+ bool Result;
if (Mode == DebugifyMode::SyntheticDebugInfo)
- return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ Result = checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
- return checkDebugInfoMetadata(
+ else
+ Result = checkDebugInfoMetadata(
M, M.functions(), *DebugInfoBeforePass,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
+ return Result;
}
CheckDebugifyModulePass(
@@ -891,16 +918,26 @@ private:
/// with the legacy module pass manager.
struct CheckDebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
+ bool NewDebugMode = F.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ F.convertFromNewDbgValues();
+
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
+ bool Result;
if (Mode == DebugifyMode::SyntheticDebugInfo)
- return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ Result = checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
NameOfWrappedPass, "CheckFunctionDebugify",
Strip, StatsMap);
- return checkDebugInfoMetadata(
+ else
+ Result = checkDebugInfoMetadata(
M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass,
"CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ F.convertToNewDbgValues();
+ return Result;
}
CheckDebugifyFunctionPass(
@@ -972,6 +1009,10 @@ createDebugifyFunctionPass(enum DebugifyMode Mode,
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
if (Mode == DebugifyMode::SyntheticDebugInfo)
applyDebugifyMetadata(M, M.functions(),
"ModuleDebugify: ", /*ApplyToMF*/ nullptr);
@@ -979,6 +1020,10 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
"ModuleDebugify (original debuginfo)",
NameOfWrappedPass);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
@@ -1010,6 +1055,10 @@ FunctionPass *createCheckDebugifyFunctionPass(
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
ModuleAnalysisManager &) {
+ bool NewDebugMode = M.IsNewDbgInfoFormat;
+ if (NewDebugMode)
+ M.convertFromNewDbgValues();
+
if (Mode == DebugifyMode::SyntheticDebugInfo)
checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
@@ -1018,6 +1067,10 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
M, M.functions(), *DebugInfoBeforePass,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
+
+ if (NewDebugMode)
+ M.convertToNewDbgValues();
+
return PreservedAnalyses::all();
}
@@ -1035,13 +1088,13 @@ void DebugifyEachInstrumentation::registerCallbacks(
return;
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (const auto **CF = any_cast<const Function *>(&IR)) {
+ if (const auto **CF = llvm::any_cast<const Function *>(&IR)) {
Function &F = *const_cast<Function *>(*CF);
applyDebugify(F, Mode, DebugInfoBeforePass, P);
MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
.getManager()
.invalidate(F, PA);
- } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) {
Module &M = *const_cast<Module *>(*CM);
applyDebugify(M, Mode, DebugInfoBeforePass, P);
MAM.invalidate(M, PA);
@@ -1053,7 +1106,7 @@ void DebugifyEachInstrumentation::registerCallbacks(
return;
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (const auto **CF = any_cast<const Function *>(&IR)) {
+ if (const auto **CF = llvm::any_cast<const Function *>(&IR)) {
auto &F = *const_cast<Function *>(*CF);
Module &M = *F.getParent();
auto It = F.getIterator();
@@ -1069,7 +1122,7 @@ void DebugifyEachInstrumentation::registerCallbacks(
MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
.getManager()
.invalidate(F, PA);
- } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ } else if (const auto **CM = llvm::any_cast<const Module *>(&IR)) {
Module &M = *const_cast<Module *>(*CM);
if (Mode == DebugifyMode::SyntheticDebugInfo)
checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index d424ebbef99d..092f1799755d 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -35,7 +35,7 @@ static void insertCall(Function &CurFn, StringRef Func,
Triple TargetTriple(M.getTargetTriple());
if (TargetTriple.isOSAIX() && Func == "__mcount") {
Type *SizeTy = M.getDataLayout().getIntPtrType(C);
- Type *SizePtrTy = SizeTy->getPointerTo();
+ Type *SizePtrTy = PointerType::getUnqual(C);
GlobalVariable *GV = new GlobalVariable(M, SizeTy, /*isConstant=*/false,
GlobalValue::InternalLinkage,
ConstantInt::get(SizeTy, 0));
@@ -54,7 +54,7 @@ static void insertCall(Function &CurFn, StringRef Func,
}
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
- Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
+ Type *ArgTypes[] = {PointerType::getUnqual(C), PointerType::getUnqual(C)};
FunctionCallee Fn = M.getOrInsertFunction(
Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
@@ -65,9 +65,7 @@ static void insertCall(Function &CurFn, StringRef Func,
InsertionPt);
RetAddr->setDebugLoc(DL);
- Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
- RetAddr};
-
+ Value *Args[] = {&CurFn, RetAddr};
CallInst *Call =
CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
Call->setDebugLoc(DL);
diff --git a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index 88c838685bca..cc00106fcbfe 100644
--- a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -70,7 +70,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
// Create a cleanup block.
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
+ Type *ExnTy = StructType::get(PointerType::getUnqual(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index dda236167363..11e24d0585be 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
@@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -317,6 +314,8 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
+ assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 8daeb92130ba..79ca99d1566c 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -160,10 +160,23 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
int FunctionComparator::cmpMetadata(const Metadata *L,
const Metadata *R) const {
// TODO: the following routine coerce the metadata contents into constants
- // before comparison.
+ // or MDStrings before comparison.
// It ignores any other cases, so that the metadata nodes are considered
// equal even though this is not correct.
// We should structurally compare the metadata nodes to be perfect here.
+
+ auto *MDStringL = dyn_cast<MDString>(L);
+ auto *MDStringR = dyn_cast<MDString>(R);
+ if (MDStringL && MDStringR) {
+ if (MDStringL == MDStringR)
+ return 0;
+ return MDStringL->getString().compare(MDStringR->getString());
+ }
+ if (MDStringR)
+ return -1;
+ if (MDStringL)
+ return 1;
+
auto *CL = dyn_cast<ConstantAsMetadata>(L);
auto *CR = dyn_cast<ConstantAsMetadata>(R);
if (CL == CR)
@@ -820,6 +833,21 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
if (ConstR)
return -1;
+ const MetadataAsValue *MetadataValueL = dyn_cast<MetadataAsValue>(L);
+ const MetadataAsValue *MetadataValueR = dyn_cast<MetadataAsValue>(R);
+ if (MetadataValueL && MetadataValueR) {
+ if (MetadataValueL == MetadataValueR)
+ return 0;
+
+ return cmpMetadata(MetadataValueL->getMetadata(),
+ MetadataValueR->getMetadata());
+ }
+
+ if (MetadataValueL)
+ return 1;
+ if (MetadataValueR)
+ return -1;
+
const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
@@ -958,67 +986,3 @@ int FunctionComparator::compare() {
}
return 0;
}
-
-namespace {
-
-// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
-// hash of a sequence of 64bit ints, but the entire input does not need to be
-// available at once. This interface is necessary for functionHash because it
-// needs to accumulate the hash as the structure of the function is traversed
-// without saving these values to an intermediate buffer. This form of hashing
-// is not often needed, as usually the object to hash is just read from a
-// buffer.
-class HashAccumulator64 {
- uint64_t Hash;
-
-public:
- // Initialize to random constant, so the state isn't zero.
- HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
-
- void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
-
- // No finishing is required, because the entire hash value is used.
- uint64_t getHash() { return Hash; }
-};
-
-} // end anonymous namespace
-
-// A function hash is calculated by considering only the number of arguments and
-// whether a function is varargs, the order of basic blocks (given by the
-// successors of each basic block in depth first order), and the order of
-// opcodes of each instruction within each of these basic blocks. This mirrors
-// the strategy compare() uses to compare functions by walking the BBs in depth
-// first order and comparing each instruction in sequence. Because this hash
-// does not look at the operands, it is insensitive to things such as the
-// target of calls and the constants used in the function, which makes it useful
-// when possibly merging functions which are the same modulo constants and call
-// targets.
-FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
- HashAccumulator64 H;
- H.add(F.isVarArg());
- H.add(F.arg_size());
-
- SmallVector<const BasicBlock *, 8> BBs;
- SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
- // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
- // accumulating the hash of the function "structure." (BB and opcode sequence)
- BBs.push_back(&F.getEntryBlock());
- VisitedBBs.insert(BBs[0]);
- while (!BBs.empty()) {
- const BasicBlock *BB = BBs.pop_back_val();
- // This random value acts as a block header, as otherwise the partition of
- // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
- H.add(45798);
- for (const auto &Inst : *BB) {
- H.add(Inst.getOpcode());
- }
- const Instruction *Term = BB->getTerminator();
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
- continue;
- BBs.push_back(Term->getSuccessor(i));
- }
- }
- return H.getHash();
-}
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index dab0be3a9fde..0990c750af55 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -91,18 +91,16 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
Mappings.end());
auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) {
- const std::string TLIName =
- std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate));
- if (!TLIName.empty()) {
- std::string MangledName = VFABI::mangleTLIVectorName(
- TLIName, ScalarName, CI.arg_size(), VF, Predicate);
+ const VecDesc *VD = TLI.getVectorMappingInfo(ScalarName, VF, Predicate);
+ if (VD && !VD->getVectorFnName().empty()) {
+ std::string MangledName = VD->getVectorFunctionABIVariantString();
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
}
- Function *VariantF = M->getFunction(TLIName);
+ Function *VariantF = M->getFunction(VD->getVectorFnName());
if (!VariantF)
- addVariantDeclaration(CI, VF, Predicate, TLIName);
+ addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName());
}
};
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f7b93fc8fd06..39d5f6e53c1d 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -189,20 +190,21 @@ BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
const unsigned PHICapacity = 2;
// Create corresponding new PHIs for all the PHIs in the outer landing pad.
- Instruction *InsertPoint = &InnerResumeDest->front();
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
BasicBlock::iterator I = OuterResumeDest->begin();
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
PHINode *OuterPHI = cast<PHINode>(I);
PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
- OuterPHI->getName() + ".lpad-body",
- InsertPoint);
+ OuterPHI->getName() + ".lpad-body");
+ InnerPHI->insertBefore(InsertPoint);
OuterPHI->replaceAllUsesWith(InnerPHI);
InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
}
// Create a PHI for the exception values.
- InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
- "eh.lpad-body", InsertPoint);
+ InnerEHValuesPHI =
+ PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body");
+ InnerEHValuesPHI->insertBefore(InsertPoint);
CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
@@ -1331,38 +1333,51 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
}
}
-static bool MayContainThrowingOrExitingCall(Instruction *Begin,
- Instruction *End) {
+static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
+ ReturnInst *End) {
assert(Begin->getParent() == End->getParent() &&
"Expected to be in same basic block!");
+ auto BeginIt = Begin->getIterator();
+ assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
return !llvm::isGuaranteedToTransferExecutionToSuccessor(
- Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1);
+ ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
}
-static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
+// Only allow these white listed attributes to be propagated back to the
+// callee. This is because other attributes may only be valid on the call
+// itself, i.e. attributes such as signext and zeroext.
- AttrBuilder AB(CB.getContext(), CB.getAttributes().getRetAttrs());
- if (!AB.hasAttributes())
- return AB;
+// Attributes that are always okay to propagate as if they are violated its
+// immediate UB.
+static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
AttrBuilder Valid(CB.getContext());
- // Only allow these white listed attributes to be propagated back to the
- // callee. This is because other attributes may only be valid on the call
- // itself, i.e. attributes such as signext and zeroext.
- if (auto DerefBytes = AB.getDereferenceableBytes())
+ if (auto DerefBytes = CB.getRetDereferenceableBytes())
Valid.addDereferenceableAttr(DerefBytes);
- if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes())
+ if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
- if (AB.contains(Attribute::NoAlias))
+ if (CB.hasRetAttr(Attribute::NoAlias))
Valid.addAttribute(Attribute::NoAlias);
- if (AB.contains(Attribute::NonNull))
+ if (CB.hasRetAttr(Attribute::NoUndef))
+ Valid.addAttribute(Attribute::NoUndef);
+ return Valid;
+}
+
+// Attributes that need additional checks as propagating them may change
+// behavior or cause new UB.
+static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
+ AttrBuilder Valid(CB.getContext());
+ if (CB.hasRetAttr(Attribute::NonNull))
Valid.addAttribute(Attribute::NonNull);
+ if (CB.hasRetAttr(Attribute::Alignment))
+ Valid.addAlignmentAttr(CB.getRetAlign());
return Valid;
}
static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
- AttrBuilder Valid = IdentifyValidAttributes(CB);
- if (!Valid.hasAttributes())
+ AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
+ AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
+ if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
return;
auto *CalledFunction = CB.getCalledFunction();
auto &Context = CalledFunction->getContext();
@@ -1397,7 +1412,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// limit the check to both RetVal and RI are in the same basic block and
// there are no throwing/exiting instructions between these instructions.
if (RI->getParent() != RetVal->getParent() ||
- MayContainThrowingOrExitingCall(RetVal, RI))
+ MayContainThrowingOrExitingCallAfterCB(RetVal, RI))
continue;
// Add to the existing attributes of NewRetVal, i.e. the cloned call
// instruction.
@@ -1406,7 +1421,62 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// existing attribute value (i.e. attributes such as dereferenceable,
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
AttributeList AL = NewRetVal->getAttributes();
- AttributeList NewAL = AL.addRetAttributes(Context, Valid);
+ if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
+ ValidUB.removeAttribute(Attribute::Dereferenceable);
+ if (ValidUB.getDereferenceableOrNullBytes() <
+ AL.getRetDereferenceableOrNullBytes())
+ ValidUB.removeAttribute(Attribute::DereferenceableOrNull);
+ AttributeList NewAL = AL.addRetAttributes(Context, ValidUB);
+ // Attributes that may generate poison returns are a bit tricky. If we
+ // propagate them, other uses of the callsite might have their behavior
+ // change or cause UB (if they have noundef) b.c of the new potential
+ // poison.
+ // Take the following three cases:
+ //
+ // 1)
+ // define nonnull ptr @foo() {
+ // %p = call ptr @bar()
+ // call void @use(ptr %p) willreturn nounwind
+ // ret ptr %p
+ // }
+ //
+ // 2)
+ // define noundef nonnull ptr @foo() {
+ // %p = call ptr @bar()
+ // call void @use(ptr %p) willreturn nounwind
+ // ret ptr %p
+ // }
+ //
+ // 3)
+ // define nonnull ptr @foo() {
+ // %p = call noundef ptr @bar()
+ // ret ptr %p
+ // }
+ //
+ // In case 1, we can't propagate nonnull because poison value in @use may
+ // change behavior or trigger UB.
+ // In case 2, we don't need to be concerned about propagating nonnull, as
+ // any new poison at @use will trigger UB anyways.
+ // In case 3, we can never propagate nonnull because it may create UB due to
+ // the noundef on @bar.
+ if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
+ ValidPG.removeAttribute(Attribute::Alignment);
+ if (ValidPG.hasAttributes()) {
+ // Three checks.
+ // If the callsite has `noundef`, then a poison due to violating the
+ // return attribute will create UB anyways so we can always propagate.
+ // Otherwise, if the return value (callee to be inlined) has `noundef`, we
+ // can't propagate as a new poison return will cause UB.
+ // Finally, check if the return value has no uses whose behavior may
+ // change/may cause UB if we potentially return poison. At the moment this
+ // is implemented overly conservatively with a single-use check.
+ // TODO: Update the single-use check to iterate through uses and only bail
+ // if we have a potentially dangerous use.
+
+ if (CB.hasRetAttr(Attribute::NoUndef) ||
+ (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))
+ NewAL = NewAL.addRetAttributes(Context, ValidPG);
+ }
NewRetVal->setAttributes(NewAL);
}
}
@@ -1515,10 +1585,10 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
if (ByValAlignment)
Alignment = std::max(Alignment, *ByValAlignment);
- Value *NewAlloca =
- new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
- Arg->getName(), &*Caller->begin()->begin());
- IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+ AllocaInst *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(),
+ nullptr, Alignment, Arg->getName());
+ NewAlloca->insertBefore(Caller->begin()->begin());
+ IFI.StaticAllocas.push_back(NewAlloca);
// Uses of the argument in the function should use our new alloca
// instead.
@@ -1538,8 +1608,8 @@ static bool isUsedByLifetimeMarker(Value *V) {
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
Type *Ty = AI->getType();
- Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
- Ty->getPointerAddressSpace());
+ Type *Int8PtrTy =
+ PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace());
if (Ty == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
@@ -1596,48 +1666,71 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// the call site location instead.
bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
- for (; FI != Fn->end(); ++FI) {
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- // Loop metadata needs to be updated so that the start and end locs
- // reference inlined-at locations.
- auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
- &IANodes](Metadata *MD) -> Metadata * {
- if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
- return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
- return MD;
- };
- updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
+ // Helper-util for updating the metadata attached to an instruction.
+ auto UpdateInst = [&](Instruction &I) {
+ // Loop metadata needs to be updated so that the start and end locs
+ // reference inlined-at locations.
+ auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
+ &IANodes](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
- if (!NoInlineLineTables)
- if (DebugLoc DL = BI->getDebugLoc()) {
- DebugLoc IDL =
- inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
- BI->setDebugLoc(IDL);
- continue;
- }
+ if (!NoInlineLineTables)
+ if (DebugLoc DL = I.getDebugLoc()) {
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes);
+ I.setDebugLoc(IDL);
+ return;
+ }
- if (CalleeHasDebugInfo && !NoInlineLineTables)
- continue;
+ if (CalleeHasDebugInfo && !NoInlineLineTables)
+ return;
- // If the inlined instruction has no line number, or if inline info
- // is not being generated, make it look as if it originates from the call
- // location. This is important for ((__always_inline, __nodebug__))
- // functions which must use caller location for all instructions in their
- // function body.
+ // If the inlined instruction has no line number, or if inline info
+ // is not being generated, make it look as if it originates from the call
+ // location. This is important for ((__always_inline, __nodebug__))
+ // functions which must use caller location for all instructions in their
+ // function body.
- // Don't update static allocas, as they may get moved later.
- if (auto *AI = dyn_cast<AllocaInst>(BI))
- if (allocaWouldBeStaticInEntry(AI))
- continue;
+ // Don't update static allocas, as they may get moved later.
+ if (auto *AI = dyn_cast<AllocaInst>(&I))
+ if (allocaWouldBeStaticInEntry(AI))
+ return;
- // Do not force a debug loc for pseudo probes, since they do not need to
- // be debuggable, and also they are expected to have a zero/null dwarf
- // discriminator at this point which could be violated otherwise.
- if (isa<PseudoProbeInst>(BI))
- continue;
+ // Do not force a debug loc for pseudo probes, since they do not need to
+ // be debuggable, and also they are expected to have a zero/null dwarf
+ // discriminator at this point which could be violated otherwise.
+ if (isa<PseudoProbeInst>(I))
+ return;
- BI->setDebugLoc(TheCallDL);
+ I.setDebugLoc(TheCallDL);
+ };
+
+ // Helper-util for updating debug-info records attached to instructions.
+ auto UpdateDPV = [&](DPValue *DPV) {
+ assert(DPV->getDebugLoc() && "Debug Value must have debug loc");
+ if (NoInlineLineTables) {
+ DPV->setDebugLoc(TheCallDL);
+ return;
+ }
+ DebugLoc DL = DPV->getDebugLoc();
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode,
+ DPV->getMarker()->getParent()->getContext(), IANodes);
+ DPV->setDebugLoc(IDL);
+ };
+
+ // Iterate over all instructions, updating metadata and debug-info records.
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ UpdateInst(*BI);
+ for (DPValue &DPV : BI->getDbgValueRange()) {
+ UpdateDPV(&DPV);
+ }
}
// Remove debug info intrinsics if we're not keeping inline info.
@@ -1647,11 +1740,12 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (isa<DbgInfoIntrinsic>(BI)) {
BI = BI->eraseFromParent();
continue;
+ } else {
+ BI->dropDbgValues();
}
++BI;
}
}
-
}
}
@@ -1760,12 +1854,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
continue;
auto *OrigBB = cast<BasicBlock>(Entry.first);
auto *ClonedBB = cast<BasicBlock>(Entry.second);
- uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB);
if (!ClonedBBs.insert(ClonedBB).second) {
// Multiple blocks in the callee might get mapped to one cloned block in
// the caller since we prune the callee as we clone it. When that happens,
// we want to use the maximum among the original blocks' frequencies.
- uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB);
if (NewFreq > Freq)
Freq = NewFreq;
}
@@ -1773,8 +1867,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
}
BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
CallerBFI->setBlockFreqAndScale(
- EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
- ClonedBBs);
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs);
}
/// Update the branch metadata for cloned call instructions.
@@ -1882,8 +1975,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
Builder.SetInsertPoint(II);
Function *IFn =
Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
- Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
+ Builder.CreateCall(IFn, RetOpnd, "");
}
II->eraseFromParent();
InsertRetainCall = false;
@@ -1918,8 +2010,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
// to objc_retain.
Builder.SetInsertPoint(RI);
Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);
- Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
+ Builder.CreateCall(IFn, RetOpnd, "");
}
}
}
@@ -1953,9 +2044,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// The inliner does not know how to inline through calls with operand bundles
// in general ...
+ Value *ConvergenceControlToken = nullptr;
if (CB.hasOperandBundles()) {
for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
- uint32_t Tag = CB.getOperandBundleAt(i).getTagID();
+ auto OBUse = CB.getOperandBundleAt(i);
+ uint32_t Tag = OBUse.getTagID();
// ... but it knows how to inline through "deopt" operand bundles ...
if (Tag == LLVMContext::OB_deopt)
continue;
@@ -1966,11 +2059,37 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
continue;
if (Tag == LLVMContext::OB_kcfi)
continue;
+ if (Tag == LLVMContext::OB_convergencectrl) {
+ ConvergenceControlToken = OBUse.Inputs[0].get();
+ continue;
+ }
return InlineResult::failure("unsupported operand bundle");
}
}
+ // FIXME: The check below is redundant and incomplete. According to spec, if a
+ // convergent call is missing a token, then the caller is using uncontrolled
+ // convergence. If the callee has an entry intrinsic, then the callee is using
+ // controlled convergence, and the call cannot be inlined. A proper
+ // implemenation of this check requires a whole new analysis that identifies
+ // convergence in every function. For now, we skip that and just do this one
+ // cursory check. The underlying assumption is that in a compiler flow that
+ // fully implements convergence control tokens, there is no mixing of
+ // controlled and uncontrolled convergent operations in the whole program.
+ if (CB.isConvergent()) {
+ auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();
+ if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
+ if (IntrinsicCall->getIntrinsicID() ==
+ Intrinsic::experimental_convergence_entry) {
+ if (!ConvergenceControlToken) {
+ return InlineResult::failure(
+ "convergent call needs convergencectrl operand");
+ }
+ }
+ }
+ }
+
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CB.doesNotThrow();
@@ -2260,6 +2379,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
IFI.GetAssumptionCache(*Caller).registerAssumption(II);
}
+ if (ConvergenceControlToken) {
+ auto *I = FirstNewBlock->getFirstNonPHI();
+ if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
+ if (IntrinsicCall->getIntrinsicID() ==
+ Intrinsic::experimental_convergence_entry) {
+ IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken);
+ IntrinsicCall->eraseFromParent();
+ }
+ }
+ }
+
// If there are any alloca instructions in the block that used to be the entry
// block for the callee, move them to the entry block of the caller. First
// calculate which instruction they should be inserted before. We insert the
@@ -2296,6 +2426,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Transfer all of the allocas over in a block. Using splice means
// that the instructions aren't removed from the symbol table, then
// reinserted.
+ I.setTailBit(true);
Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,
AI->getIterator(), I);
}
@@ -2400,7 +2531,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
!IFI.StaticAllocas.empty()) {
- IRBuilder<> builder(&FirstNewBlock->front());
+ IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
// Don't mark swifterror allocas. They can't have bitcast uses.
@@ -2454,14 +2585,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// If the inlined code contained dynamic alloca instructions, wrap the inlined
// code with llvm.stacksave/llvm.stackrestore intrinsics.
if (InlinedFunctionInfo.ContainsDynamicAllocas) {
- Module *M = Caller->getParent();
- // Get the two intrinsics we care about.
- Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
- Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
-
// Insert the llvm.stacksave.
CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
- .CreateCall(StackSave, {}, "savedstack");
+ .CreateStackSave("savedstack");
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
@@ -2472,7 +2598,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
continue;
if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
continue;
- IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
+ IRBuilder<>(RI).CreateStackRestore(SavedPtr);
}
}
@@ -2574,6 +2700,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
Builder.CreateRetVoid();
else
Builder.CreateRet(NewDeoptCall);
+ // Since the ret type is changed, remove the incompatible attributes.
+ NewDeoptCall->removeRetAttrs(
+ AttributeFuncs::typeIncompatible(NewDeoptCall->getType()));
}
// Leave behind the normal returns so we can merge control flow.
@@ -2704,8 +2833,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
if (IFI.CallerBFI) {
// Copy original BB's block frequency to AfterCallBB
- IFI.CallerBFI->setBlockFreq(
- AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ IFI.CallerBFI->setBlockFreq(AfterCallBB,
+ IFI.CallerBFI->getBlockFreq(OrigBB));
}
// Change the branch that used to go to AfterCallBB to branch to the first
@@ -2731,8 +2860,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
if (!CB.use_empty()) {
- PHI = PHINode::Create(RTy, Returns.size(), CB.getName(),
- &AfterCallBB->front());
+ PHI = PHINode::Create(RTy, Returns.size(), CB.getName());
+ PHI->insertBefore(AfterCallBB->begin());
// Anything that used the result of the function call should now use the
// PHI node as their operand.
CB.replaceAllUsesWith(PHI);
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index c36b0533580b..5e0c312fe149 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -160,7 +160,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa", &ExitBB->front());
+ I->getName() + ".lcssa");
+ PN->insertBefore(ExitBB->begin());
if (InsertedPHIs)
InsertedPHIs->push_back(PN);
// Get the debug location from the original instruction.
@@ -241,7 +242,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
}
SmallVector<DbgValueInst *, 4> DbgValues;
- llvm::findDbgValues(DbgValues, I);
+ SmallVector<DPValue *, 4> DPValues;
+ llvm::findDbgValues(DbgValues, I, &DPValues);
// Update pre-existing debug value uses that reside outside the loop.
for (auto *DVI : DbgValues) {
@@ -257,6 +259,21 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
DVI->replaceVariableLocationOp(I, V);
}
+ // RemoveDIs: copy-paste of block above, using non-instruction debug-info
+ // records.
+ for (DPValue *DPV : DPValues) {
+ BasicBlock *UserBB = DPV->getMarker()->getParent();
+ if (InstBB == UserBB || L->contains(UserBB))
+ continue;
+ // We currently only handle debug values residing in blocks that were
+ // traversed while rewriting the uses. If we inserted just a single PHI,
+ // we will handle all relevant debug values.
+ Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+ : SSAUpdate.FindValueForBlock(UserBB);
+ if (V)
+ DPV->replaceVariableLocationOp(I, V);
+ }
+
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
// to post-process them to keep LCSSA form.
for (PHINode *InsertedPN : LocalInsertedPHIs) {
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index cdcfb5050bff..6220f8509309 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -101,7 +101,7 @@ private:
float Val) {
Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
if (!Arg->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Arg->getType());
+ V = ConstantFoldCastInstruction(Instruction::FPExt, V, Arg->getType());
if (BBBuilder.GetInsertBlock()->getParent()->hasFnAttribute(Attribute::StrictFP))
BBBuilder.setIsFPConstrained(true);
return BBBuilder.CreateFCmp(Cmp, Arg, V);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index f153ace5d3fc..51f39e0ba0cc 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -86,6 +87,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+extern cl::opt<bool> UseNewDbgInfoFormat;
+
#define DEBUG_TYPE "local"
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
@@ -227,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Remove weight for this case.
std::swap(Weights[Idx + 1], Weights.back());
Weights.pop_back();
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(Weights));
+ setBranchWeights(*SI, Weights);
}
// Remove this entry.
BasicBlock *ParentBB = SI->getParent();
@@ -414,7 +415,7 @@ bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths(
return wouldInstructionBeTriviallyDead(I, TLI);
}
-bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I,
const TargetLibraryInfo *TLI) {
if (I->isTerminator())
return false;
@@ -428,7 +429,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (isa<DbgVariableIntrinsic>(I))
return false;
- if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
+ if (const DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
if (DLI->getLabel())
return false;
return true;
@@ -443,9 +444,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (!II)
return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_guard: {
+ // Guards on true are operationally no-ops. In the future we can
+ // consider more sophisticated tradeoffs for guards considering potential
+ // for check widening, but for now we keep things simple.
+ auto *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0));
+ return Cond && Cond->isOne();
+ }
// TODO: These intrinsics are not safe to remove, because this may remove
// a well-defined trap.
- switch (II->getIntrinsicID()) {
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
case Intrinsic::ptrauth_auth:
@@ -461,7 +469,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
// Safe to delete llvm.stacksave and launder.invariant.group if dead.
if (II->getIntrinsicID() == Intrinsic::stacksave ||
II->getIntrinsicID() == Intrinsic::launder_invariant_group)
@@ -484,13 +492,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return false;
}
- // Assumptions are dead if their condition is trivially true. Guards on
- // true are operationally no-ops. In the future we can consider more
- // sophisticated tradeoffs for guards considering potential for check
- // widening, but for now we keep things simple.
- if ((II->getIntrinsicID() == Intrinsic::assume &&
- isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) ||
- II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ // Assumptions are dead if their condition is trivially true.
+ if (II->getIntrinsicID() == Intrinsic::assume &&
+ isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
return !Cond->isZero();
@@ -605,10 +609,13 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, I);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, I, &DPUsers);
for (auto *DII : DbgUsers)
DII->setKillLocation();
- return !DbgUsers.empty();
+ for (auto *DPV : DPUsers)
+ DPV->setKillLocation();
+ return !DbgUsers.empty() || !DPUsers.empty();
}
/// areAllUsesEqual - Check whether the uses of a value are all the same.
@@ -847,17 +854,17 @@ static bool CanMergeValues(Value *First, Value *Second) {
/// branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+static bool
+CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ,
+ const SmallPtrSetImpl<BasicBlock *> &BBPreds) {
assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
<< Succ->getName() << "\n");
// Shortcut, if there is only a single predecessor it must be BB and merging
// is always safe
- if (Succ->getSinglePredecessor()) return true;
-
- // Make a list of the predecessors of BB
- SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+ if (Succ->getSinglePredecessor())
+ return true;
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
@@ -997,6 +1004,35 @@ static void replaceUndefValuesInPhi(PHINode *PN,
}
}
+// Only when they shares a single common predecessor, return true.
+// Only handles cases when BB can't be merged while its predecessors can be
+// redirected.
+static bool
+CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
+ const SmallPtrSetImpl<BasicBlock *> &BBPreds,
+ const SmallPtrSetImpl<BasicBlock *> &SuccPreds,
+ BasicBlock *&CommonPred) {
+
+ // There must be phis in BB, otherwise BB will be merged into Succ directly
+ if (BB->phis().empty() || Succ->phis().empty())
+ return false;
+
+ // BB must have predecessors not shared that can be redirected to Succ
+ if (!BB->hasNPredecessorsOrMore(2))
+ return false;
+
+ // Get single common predecessors of both BB and Succ
+ for (BasicBlock *SuccPred : SuccPreds) {
+ if (BBPreds.count(SuccPred)) {
+ if (CommonPred)
+ return false;
+ CommonPred = SuccPred;
+ }
+ }
+
+ return true;
+}
+
/// Replace a value flowing from a block to a phi with
/// potentially multiple instances of that value flowing from the
/// block's predecessors to the phi.
@@ -1004,9 +1040,11 @@ static void replaceUndefValuesInPhi(PHINode *PN,
/// \param BB The block with the value flowing into the phi.
/// \param BBPreds The predecessors of BB.
/// \param PN The phi that we are updating.
+/// \param CommonPred The common predecessor of BB and PN's BasicBlock
static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
const PredBlockVector &BBPreds,
- PHINode *PN) {
+ PHINode *PN,
+ BasicBlock *CommonPred) {
Value *OldVal = PN->removeIncomingValue(BB, false);
assert(OldVal && "No entry in PHI for Pred BB!");
@@ -1034,26 +1072,39 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
// will trigger asserts if we try to clean it up now, without also
// simplifying the corresponding conditional branch).
BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+
+ if (PredBB == CommonPred)
+ continue;
+
Value *PredVal = OldValPN->getIncomingValue(i);
- Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
- IncomingValues);
+ Value *Selected =
+ selectIncomingValueForBlock(PredVal, PredBB, IncomingValues);
// And add a new incoming value for this predecessor for the
// newly retargeted branch.
PN->addIncoming(Selected, PredBB);
}
+ if (CommonPred)
+ PN->addIncoming(OldValPN->getIncomingValueForBlock(CommonPred), BB);
+
} else {
for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
// Update existing incoming values in PN for this
// predecessor of BB.
BasicBlock *PredBB = BBPreds[i];
- Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
- IncomingValues);
+
+ if (PredBB == CommonPred)
+ continue;
+
+ Value *Selected =
+ selectIncomingValueForBlock(OldVal, PredBB, IncomingValues);
// And add a new incoming value for this predecessor for the
// newly retargeted branch.
PN->addIncoming(Selected, PredBB);
}
+ if (CommonPred)
+ PN->addIncoming(OldVal, BB);
}
replaceUndefValuesInPhi(PN, IncomingValues);
@@ -1064,13 +1115,30 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
assert(BB != &BB->getParent()->getEntryBlock() &&
"TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
- // We can't eliminate infinite loops.
+ // We can't simplify infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
- if (BB == Succ) return false;
+ if (BB == Succ)
+ return false;
+
+ SmallPtrSet<BasicBlock *, 16> BBPreds(pred_begin(BB), pred_end(BB));
+ SmallPtrSet<BasicBlock *, 16> SuccPreds(pred_begin(Succ), pred_end(Succ));
- // Check to see if merging these blocks would cause conflicts for any of the
- // phi nodes in BB or Succ. If not, we can safely merge.
- if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+ // The single common predecessor of BB and Succ when BB cannot be killed
+ BasicBlock *CommonPred = nullptr;
+
+ bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds);
+
+ // Even if we can not fold bB into Succ, we may be able to redirect the
+ // predecessors of BB to Succ.
+ bool BBPhisMergeable =
+ BBKillable ||
+ CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred);
+
+ if (!BBKillable && !BBPhisMergeable)
+ return false;
+
+ // Check to see if merging these blocks/phis would cause conflicts for any of
+ // the phi nodes in BB or Succ. If not, we can safely merge.
// Check for cases where Succ has multiple predecessors and a PHI node in BB
// has uses which will not disappear when the PHI nodes are merged. It is
@@ -1099,6 +1167,11 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
}
}
+ if (BBPhisMergeable && CommonPred)
+ LLVM_DEBUG(dbgs() << "Found Common Predecessor between: " << BB->getName()
+ << " and " << Succ->getName() << " : "
+ << CommonPred->getName() << "\n");
+
// 'BB' and 'BB->Pred' are loop latches, bail out to presrve inner loop
// metadata.
//
@@ -1171,25 +1244,37 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
if (PredTI->hasMetadata(LLVMContext::MD_loop))
return false;
- LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+ if (BBKillable)
+ LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+ else if (BBPhisMergeable)
+ LLVM_DEBUG(dbgs() << "Merge Phis in Trivial BB: \n" << *BB);
SmallVector<DominatorTree::UpdateType, 32> Updates;
+
if (DTU) {
// To avoid processing the same predecessor more than once.
SmallPtrSet<BasicBlock *, 8> SeenPreds;
- // All predecessors of BB will be moved to Succ.
- SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
+ // All predecessors of BB (except the common predecessor) will be moved to
+ // Succ.
Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1);
- for (auto *PredOfBB : predecessors(BB))
- // This predecessor of BB may already have Succ as a successor.
- if (!PredsOfSucc.contains(PredOfBB))
+
+ for (auto *PredOfBB : predecessors(BB)) {
+ // Do not modify those common predecessors of BB and Succ
+ if (!SuccPreds.contains(PredOfBB))
if (SeenPreds.insert(PredOfBB).second)
Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ }
+
SeenPreds.clear();
+
for (auto *PredOfBB : predecessors(BB))
- if (SeenPreds.insert(PredOfBB).second)
+ // When BB cannot be killed, do not remove the edge between BB and
+ // CommonPred.
+ if (SeenPreds.insert(PredOfBB).second && PredOfBB != CommonPred)
Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+
+ if (BBKillable)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
if (isa<PHINode>(Succ->begin())) {
@@ -1201,21 +1286,19 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
// Loop over all of the PHI nodes in the successor of BB.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode *PN = cast<PHINode>(I);
-
- redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN, CommonPred);
}
}
if (Succ->getSinglePredecessor()) {
// BB is the only predecessor of Succ, so Succ will end up with exactly
// the same predecessors BB had.
-
// Copy over any phi, debug or lifetime instruction.
BB->getTerminator()->eraseFromParent();
- Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB);
+ Succ->splice(Succ->getFirstNonPHIIt(), BB);
} else {
while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ // We explicitly check for such uses for merging phis.
assert(PN->use_empty() && "There shouldn't be any uses here!");
PN->eraseFromParent();
}
@@ -1228,26 +1311,42 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
for (BasicBlock *Pred : predecessors(BB))
Pred->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopMD);
- // Everything that jumped to BB now goes to Succ.
- BB->replaceAllUsesWith(Succ);
- if (!Succ->hasName()) Succ->takeName(BB);
+ if (BBKillable) {
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
- // Clear the successor list of BB to match updates applying to DTU later.
- if (BB->getTerminator())
- BB->back().eraseFromParent();
- new UnreachableInst(BB->getContext(), BB);
- assert(succ_empty(BB) && "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
+ if (!Succ->hasName())
+ Succ->takeName(BB);
+
+ // Clear the successor list of BB to match updates applying to DTU later.
+ if (BB->getTerminator())
+ BB->back().eraseFromParent();
+
+ new UnreachableInst(BB->getContext(), BB);
+ assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+ } else if (BBPhisMergeable) {
+ // Everything except CommonPred that jumped to BB now goes to Succ.
+ BB->replaceUsesWithIf(Succ, [BBPreds, CommonPred](Use &U) -> bool {
+ if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser()))
+ return UseInst->getParent() != CommonPred &&
+ BBPreds.contains(UseInst->getParent());
+ return false;
+ });
+ }
if (DTU)
DTU->applyUpdates(Updates);
- DeleteDeadBlock(BB, DTU);
+ if (BBKillable)
+ DeleteDeadBlock(BB, DTU);
return true;
}
-static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
+static bool
+EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB,
+ SmallPtrSetImpl<PHINode *> &ToRemove) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
// one having an undef where the other doesn't could be collapsed.
@@ -1263,12 +1362,14 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
// Note that we only look in the upper square's triangle,
// we already checked that the lower triangle PHI's aren't identical.
for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) {
+ if (ToRemove.contains(DuplicatePN))
+ continue;
if (!DuplicatePN->isIdenticalToWhenDefined(PN))
continue;
// A duplicate. Replace this PHI with the base PHI.
++NumPHICSEs;
DuplicatePN->replaceAllUsesWith(PN);
- DuplicatePN->eraseFromParent();
+ ToRemove.insert(DuplicatePN);
Changed = true;
// The RAUW can change PHIs that we already visited.
@@ -1279,7 +1380,9 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
return Changed;
}
-static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
+static bool
+EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB,
+ SmallPtrSetImpl<PHINode *> &ToRemove) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
// one having an undef where the other doesn't could be collapsed.
@@ -1343,12 +1446,14 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
// Examine each PHI.
bool Changed = false;
for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) {
+ if (ToRemove.contains(PN))
+ continue;
auto Inserted = PHISet.insert(PN);
if (!Inserted.second) {
// A duplicate. Replace this PHI with its duplicate.
++NumPHICSEs;
PN->replaceAllUsesWith(*Inserted.first);
- PN->eraseFromParent();
+ ToRemove.insert(PN);
Changed = true;
// The RAUW can change PHIs that we already visited. Start over from the
@@ -1361,25 +1466,27 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
return Changed;
}
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB,
+ SmallPtrSetImpl<PHINode *> &ToRemove) {
if (
#ifndef NDEBUG
!PHICSEDebugHash &&
#endif
hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize))
- return EliminateDuplicatePHINodesNaiveImpl(BB);
- return EliminateDuplicatePHINodesSetBasedImpl(BB);
+ return EliminateDuplicatePHINodesNaiveImpl(BB, ToRemove);
+ return EliminateDuplicatePHINodesSetBasedImpl(BB, ToRemove);
}
-/// If the specified pointer points to an object that we control, try to modify
-/// the object's alignment to PrefAlign. Returns a minimum known alignment of
-/// the value after the operation, which may be lower than PrefAlign.
-///
-/// Increating value alignment isn't often possible though. If alignment is
-/// important, a more reliable approach is to simply align all global variables
-/// and allocation instructions to their preferred alignment from the beginning.
-static Align tryEnforceAlignment(Value *V, Align PrefAlign,
- const DataLayout &DL) {
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ SmallPtrSet<PHINode *, 8> ToRemove;
+ bool Changed = EliminateDuplicatePHINodes(BB, ToRemove);
+ for (PHINode *PN : ToRemove)
+ PN->eraseFromParent();
+ return Changed;
+}
+
+Align llvm::tryEnforceAlignment(Value *V, Align PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
@@ -1463,12 +1570,18 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
SmallVector<DbgValueInst *, 1> DbgValues;
- findDbgValues(DbgValues, APN);
+ SmallVector<DPValue *, 1> DPValues;
+ findDbgValues(DbgValues, APN, &DPValues);
for (auto *DVI : DbgValues) {
assert(is_contained(DVI->getValues(), APN));
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
+ for (auto *DPV : DPValues) {
+ assert(is_contained(DPV->location_ops(), APN));
+ if ((DPV->getVariable() == DIVar) && (DPV->getExpression() == DIExpr))
+ return true;
+ }
return false;
}
@@ -1504,6 +1617,67 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
// Could not determine size of variable. Conservatively return false.
return false;
}
+// RemoveDIs: duplicate implementation of the above, using DPValues, the
+// replacement for dbg.values.
+static bool valueCoversEntireFragment(Type *ValTy, DPValue *DPV) {
+ const DataLayout &DL = DPV->getModule()->getDataLayout();
+ TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (std::optional<uint64_t> FragmentSize = DPV->getFragmentSizeInBits())
+ return TypeSize::isKnownGE(ValueSize, TypeSize::getFixed(*FragmentSize));
+
+ // We can't always calculate the size of the DI variable (e.g. if it is a
+ // VLA). Try to use the size of the alloca that the dbg intrinsic describes
+ // intead.
+ if (DPV->isAddressOfVariable()) {
+ // DPV should have exactly 1 location when it is an address.
+ assert(DPV->getNumVariableLocationOps() == 1 &&
+ "address of variable must have exactly 1 location operand.");
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DPV->getVariableLocationOp(0))) {
+ if (std::optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
+ return TypeSize::isKnownGE(ValueSize, *FragmentSize);
+ }
+ }
+ }
+ // Could not determine size of variable. Conservatively return false.
+ return false;
+}
+
+static void insertDbgValueOrDPValue(DIBuilder &Builder, Value *DV,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ const DebugLoc &NewLoc,
+ BasicBlock::iterator Instr) {
+ if (!UseNewDbgInfoFormat) {
+ auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+ (Instruction *)nullptr);
+ DbgVal->insertBefore(Instr);
+ } else {
+ // RemoveDIs: if we're using the new debug-info format, allocate a
+ // DPValue directly instead of a dbg.value intrinsic.
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ Instr->getParent()->insertDPValueBefore(DV, Instr);
+ }
+}
+
+static void insertDbgValueOrDPValueAfter(DIBuilder &Builder, Value *DV,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ const DebugLoc &NewLoc,
+ BasicBlock::iterator Instr) {
+ if (!UseNewDbgInfoFormat) {
+ auto *DbgVal = Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc,
+ (Instruction *)nullptr);
+ DbgVal->insertAfter(&*Instr);
+ } else {
+ // RemoveDIs: if we're using the new debug-info format, allocate a
+ // DPValue directly instead of a dbg.value intrinsic.
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *DV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ Instr->getParent()->insertDPValueAfter(DV, &*Instr);
+ }
+}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare intrinsic.
@@ -1533,7 +1707,8 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
DIExpr->isDeref() || (!DIExpr->startsWithDeref() &&
valueCoversEntireFragment(DV->getType(), DII));
if (CanConvert) {
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc,
+ SI->getIterator());
return;
}
@@ -1545,7 +1720,19 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// know which part) we insert an dbg.value intrinsic to indicate that we
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
+ insertDbgValueOrDPValue(Builder, DV, DIVar, DIExpr, NewLoc,
+ SI->getIterator());
+}
+
+// RemoveDIs: duplicate the getDebugValueLoc method using DPValues instead of
+// dbg.value intrinsics.
+static DebugLoc getDebugValueLocDPV(DPValue *DPV) {
+ // Original dbg.declare must have a location.
+ const DebugLoc &DeclareLoc = DPV->getDebugLoc();
+ MDNode *Scope = DeclareLoc.getScope();
+ DILocation *InlinedAt = DeclareLoc.getInlinedAt();
+ // Produce an unknown location with the correct scope / inlinedAt fields.
+ return DILocation::get(DPV->getContext(), 0, 0, Scope, InlinedAt);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1571,9 +1758,40 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// future if multi-location support is added to the IR, it might be
// preferable to keep tracking both the loaded value and the original
// address in case the alloca can not be elided.
- Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
- DbgValue->insertAfter(LI);
+ insertDbgValueOrDPValueAfter(Builder, LI, DIVar, DIExpr, NewLoc,
+ LI->getIterator());
+}
+
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, StoreInst *SI,
+ DIBuilder &Builder) {
+ assert(DPV->isAddressOfVariable());
+ auto *DIVar = DPV->getVariable();
+ assert(DIVar && "Missing variable");
+ auto *DIExpr = DPV->getExpression();
+ Value *DV = SI->getValueOperand();
+
+ DebugLoc NewLoc = getDebugValueLocDPV(DPV);
+
+ if (!valueCoversEntireFragment(DV->getType(), DPV)) {
+ // FIXME: If storing to a part of the variable described by the dbg.declare,
+ // then we want to insert a DPValue.value for the corresponding fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV
+ << '\n');
+ // For now, when there is a store to parts of the variable (but we do not
+ // know which part) we insert an DPValue record to indicate that we know
+ // nothing about the variable's content.
+ DV = UndefValue::get(DV->getType());
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator());
+ return;
+ }
+
+ assert(UseNewDbgInfoFormat);
+ // Create a DPValue directly and insert.
+ ValueAsMetadata *DVAM = ValueAsMetadata::get(DV);
+ DPValue *NewDPV = new DPValue(DVAM, DIVar, DIExpr, NewLoc.get());
+ SI->getParent()->insertDPValueBefore(NewDPV, SI->getIterator());
}
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
@@ -1604,8 +1822,38 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// The block may be a catchswitch block, which does not have a valid
// insertion point.
// FIXME: Insert dbg.value markers in the successors when appropriate.
- if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
+ if (InsertionPt != BB->end()) {
+ insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt);
+ }
+}
+
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, LoadInst *LI,
+ DIBuilder &Builder) {
+ auto *DIVar = DPV->getVariable();
+ auto *DIExpr = DPV->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (!valueCoversEntireFragment(LI->getType(), DPV)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a DPValue for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV
+ << '\n');
+ return;
+ }
+
+ DebugLoc NewLoc = getDebugValueLocDPV(DPV);
+
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ assert(UseNewDbgInfoFormat);
+
+ // Create a DPValue directly and insert.
+ ValueAsMetadata *LIVAM = ValueAsMetadata::get(LI);
+ DPValue *DV = new DPValue(LIVAM, DIVar, DIExpr, NewLoc.get());
+ LI->getParent()->insertDPValueAfter(DV, LI);
}
/// Determine whether this alloca is either a VLA or an array.
@@ -1618,6 +1866,36 @@ static bool isArray(AllocaInst *AI) {
static bool isStructure(AllocaInst *AI) {
return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy();
}
+void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, PHINode *APN,
+ DIBuilder &Builder) {
+ auto *DIVar = DPV->getVariable();
+ auto *DIExpr = DPV->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (PhiHasDebugValue(DIVar, DIExpr, APN))
+ return;
+
+ if (!valueCoversEntireFragment(APN->getType(), DPV)) {
+ // FIXME: If only referring to a part of the variable described by the
+ // dbg.declare, then we want to insert a DPValue for the corresponding
+ // fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to DPValue: " << *DPV
+ << '\n');
+ return;
+ }
+
+ BasicBlock *BB = APN->getParent();
+ auto InsertionPt = BB->getFirstInsertionPt();
+
+ DebugLoc NewLoc = getDebugValueLocDPV(DPV);
+
+ // The block may be a catchswitch block, which does not have a valid
+ // insertion point.
+ // FIXME: Insert DPValue markers in the successors when appropriate.
+ if (InsertionPt != BB->end()) {
+ insertDbgValueOrDPValue(Builder, APN, DIVar, DIExpr, NewLoc, InsertionPt);
+ }
+}
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
@@ -1674,8 +1952,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
DebugLoc NewLoc = getDebugValueLoc(DDI);
auto *DerefExpr =
DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
- NewLoc, CI);
+ insertDbgValueOrDPValue(DIB, AI, DDI->getVariable(), DerefExpr,
+ NewLoc, CI->getIterator());
}
} else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
if (BI->getType()->isPointerTy())
@@ -1694,6 +1972,69 @@ bool llvm::LowerDbgDeclare(Function &F) {
return Changed;
}
+// RemoveDIs: re-implementation of insertDebugValuesForPHIs, but which pulls the
+// debug-info out of the block's DPValues rather than dbg.value intrinsics.
+static void insertDPValuesForPHIs(BasicBlock *BB,
+ SmallVectorImpl<PHINode *> &InsertedPHIs) {
+ assert(BB && "No BasicBlock to clone DPValue(s) from.");
+ if (InsertedPHIs.size() == 0)
+ return;
+
+ // Map existing PHI nodes to their DPValues.
+ DenseMap<Value *, DPValue *> DbgValueMap;
+ for (auto &I : *BB) {
+ for (auto &DPV : I.getDbgValueRange()) {
+ for (Value *V : DPV.location_ops())
+ if (auto *Loc = dyn_cast_or_null<PHINode>(V))
+ DbgValueMap.insert({Loc, &DPV});
+ }
+ }
+ if (DbgValueMap.size() == 0)
+ return;
+
+ // Map a pair of the destination BB and old DPValue to the new DPValue,
+ // so that if a DPValue is being rewritten to use more than one of the
+ // inserted PHIs in the same destination BB, we can update the same DPValue
+ // with all the new PHIs instead of creating one copy for each.
+ MapVector<std::pair<BasicBlock *, DPValue *>, DPValue *> NewDbgValueMap;
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, create a new DPValue that will propagate
+ // the info through the new PHI. If we use more than one new PHI in a single
+ // destination BB with the same old dbg.value, merge the updates so that we
+ // get a single new DPValue with all the new PHIs.
+ for (auto PHI : InsertedPHIs) {
+ BasicBlock *Parent = PHI->getParent();
+ // Avoid inserting a debug-info record into an EH block.
+ if (Parent->getFirstNonPHI()->isEHPad())
+ continue;
+ for (auto VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ DPValue *DbgII = cast<DPValue>(V->second);
+ auto NewDI = NewDbgValueMap.find({Parent, DbgII});
+ if (NewDI == NewDbgValueMap.end()) {
+ DPValue *NewDbgII = DbgII->clone();
+ NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first;
+ }
+ DPValue *NewDbgII = NewDI->second;
+ // If PHI contains VI as an operand more than once, we may
+ // replaced it in NewDbgII; confirm that it is present.
+ if (is_contained(NewDbgII->location_ops(), VI))
+ NewDbgII->replaceVariableLocationOp(VI, PHI);
+ }
+ }
+ }
+ // Insert the new DPValues into their destination blocks.
+ for (auto DI : NewDbgValueMap) {
+ BasicBlock *Parent = DI.first.first;
+ DPValue *NewDbgII = DI.second;
+ auto InsertionPt = Parent->getFirstInsertionPt();
+ assert(InsertionPt != Parent->end() && "Ill-formed basic block");
+
+ InsertionPt->DbgMarker->insertDPValue(NewDbgII, true);
+ }
+}
+
/// Propagate dbg.value intrinsics through the newly inserted PHIs.
void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
SmallVectorImpl<PHINode *> &InsertedPHIs) {
@@ -1701,6 +2042,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
if (InsertedPHIs.size() == 0)
return;
+ insertDPValuesForPHIs(BB, InsertedPHIs);
+
// Map existing PHI nodes to their dbg.values.
ValueToValueMapTy DbgValueMap;
for (auto &I : *BB) {
@@ -1775,44 +2118,60 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
return !DbgDeclares.empty();
}
-static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
- DIBuilder &Builder, int Offset) {
- const DebugLoc &Loc = DVI->getDebugLoc();
- auto *DIVar = DVI->getVariable();
- auto *DIExpr = DVI->getExpression();
+static void updateOneDbgValueForAlloca(const DebugLoc &Loc,
+ DILocalVariable *DIVar,
+ DIExpression *DIExpr, Value *NewAddress,
+ DbgValueInst *DVI, DPValue *DPV,
+ DIBuilder &Builder, int Offset) {
assert(DIVar && "Missing variable");
- // This is an alloca-based llvm.dbg.value. The first thing it should do with
- // the alloca pointer is dereference it. Otherwise we don't know how to handle
- // it and give up.
+ // This is an alloca-based dbg.value/DPValue. The first thing it should do
+ // with the alloca pointer is dereference it. Otherwise we don't know how to
+ // handle it and give up.
if (!DIExpr || DIExpr->getNumElements() < 1 ||
DIExpr->getElement(0) != dwarf::DW_OP_deref)
return;
// Insert the offset before the first deref.
- // We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset)
DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
- Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
- DVI->eraseFromParent();
+ if (DVI) {
+ DVI->setExpression(DIExpr);
+ DVI->replaceVariableLocationOp(0u, NewAddress);
+ } else {
+ assert(DPV);
+ DPV->setExpression(DIExpr);
+ DPV->replaceVariableLocationOp(0u, NewAddress);
+ }
}
void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset) {
- if (auto *L = LocalAsMetadata::getIfExists(AI))
- if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
- for (Use &U : llvm::make_early_inc_range(MDV->uses()))
- if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
- replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
+ SmallVector<DbgValueInst *, 1> DbgUsers;
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgValues(DbgUsers, AI, &DPUsers);
+
+ // Attempt to replace dbg.values that use this alloca.
+ for (auto *DVI : DbgUsers)
+ updateOneDbgValueForAlloca(DVI->getDebugLoc(), DVI->getVariable(),
+ DVI->getExpression(), NewAllocaAddress, DVI,
+ nullptr, Builder, Offset);
+
+ // Replace any DPValues that use this alloca.
+ for (DPValue *DPV : DPUsers)
+ updateOneDbgValueForAlloca(DPV->getDebugLoc(), DPV->getVariable(),
+ DPV->getExpression(), NewAllocaAddress, nullptr,
+ DPV, Builder, Offset);
}
/// Where possible to salvage debug information for \p I do so.
/// If not possible mark undef.
void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
- salvageDebugInfoForDbgValues(I, DbgUsers);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, &I, &DPUsers);
+ salvageDebugInfoForDbgValues(I, DbgUsers, DPUsers);
}
/// Salvage the address component of \p DAI.
@@ -1850,7 +2209,8 @@ static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) {
}
void llvm::salvageDebugInfoForDbgValues(
- Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
+ Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers,
+ ArrayRef<DPValue *> DPUsers) {
// These are arbitrary chosen limits on the maximum number of values and the
// maximum size of a debug expression we can salvage up to, used for
// performance reasons.
@@ -1916,12 +2276,70 @@ void llvm::salvageDebugInfoForDbgValues(
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
Salvaged = true;
}
+ // Duplicate of above block for DPValues.
+ for (auto *DPV : DPUsers) {
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool StackValue = DPV->getType() == DPValue::LocationType::Value;
+ auto DPVLocation = DPV->location_ops();
+ assert(
+ is_contained(DPVLocation, &I) &&
+ "DbgVariableIntrinsic must use salvaged instruction as its location");
+ SmallVector<Value *, 4> AdditionalValues;
+ // 'I' may appear more than once in DPV's location ops, and each use of 'I'
+ // must be updated in the DIExpression and potentially have additional
+ // values added; thus we call salvageDebugInfoImpl for each 'I' instance in
+ // DPVLocation.
+ Value *Op0 = nullptr;
+ DIExpression *SalvagedExpr = DPV->getExpression();
+ auto LocItr = find(DPVLocation, &I);
+ while (SalvagedExpr && LocItr != DPVLocation.end()) {
+ SmallVector<uint64_t, 16> Ops;
+ unsigned LocNo = std::distance(DPVLocation.begin(), LocItr);
+ uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
+ Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
+ if (!Op0)
+ break;
+ SalvagedExpr =
+ DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
+ LocItr = std::find(++LocItr, DPVLocation.end(), &I);
+ }
+ // salvageDebugInfoImpl should fail on examining the first element of
+ // DbgUsers, or none of them.
+ if (!Op0)
+ break;
+
+ DPV->replaceVariableLocationOp(&I, Op0);
+ bool IsValidSalvageExpr =
+ SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (AdditionalValues.empty() && IsValidSalvageExpr) {
+ DPV->setExpression(SalvagedExpr);
+ } else if (DPV->getType() == DPValue::LocationType::Value &&
+ IsValidSalvageExpr &&
+ DPV->getNumVariableLocationOps() + AdditionalValues.size() <=
+ MaxDebugArgs) {
+ DPV->addVariableLocationOps(AdditionalValues, SalvagedExpr);
+ } else {
+ // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is
+ // currently only valid for stack value expressions.
+ // Also do not salvage if the resulting DIArgList would contain an
+ // unreasonably large number of values.
+ Value *Undef = UndefValue::get(I.getOperand(0)->getType());
+ DPV->replaceVariableLocationOp(I.getOperand(0), Undef);
+ }
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << DPV << '\n');
+ Salvaged = true;
+ }
if (Salvaged)
return;
for (auto *DII : DbgUsers)
DII->setKillLocation();
+
+ for (auto *DPV : DPUsers)
+ DPV->setKillLocation();
}
Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
@@ -2136,16 +2554,20 @@ using DbgValReplacement = std::optional<DIExpression *>;
/// changes are made.
static bool rewriteDebugUsers(
Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
- function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
+ function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr,
+ function_ref<DbgValReplacement(DPValue &DPV)> RewriteDPVExpr) {
// Find debug users of From.
SmallVector<DbgVariableIntrinsic *, 1> Users;
- findDbgUsers(Users, &From);
- if (Users.empty())
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(Users, &From, &DPUsers);
+ if (Users.empty() && DPUsers.empty())
return false;
// Prevent use-before-def of To.
bool Changed = false;
+
SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage;
+ SmallPtrSet<DPValue *, 1> UndefOrSalvageDPV;
if (isa<Instruction>(&To)) {
bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
@@ -2163,6 +2585,25 @@ static bool rewriteDebugUsers(
UndefOrSalvage.insert(DII);
}
}
+
+ // DPValue implementation of the above.
+ for (auto *DPV : DPUsers) {
+ Instruction *MarkedInstr = DPV->getMarker()->MarkedInstr;
+ Instruction *NextNonDebug = MarkedInstr;
+ // The next instruction might still be a dbg.declare, skip over it.
+ if (isa<DbgVariableIntrinsic>(NextNonDebug))
+ NextNonDebug = NextNonDebug->getNextNonDebugInstruction();
+
+ if (DomPointAfterFrom && NextNonDebug == &DomPoint) {
+ LLVM_DEBUG(dbgs() << "MOVE: " << *DPV << '\n');
+ DPV->removeFromParent();
+ // Ensure there's a marker.
+ DomPoint.getParent()->insertDPValueAfter(DPV, &DomPoint);
+ Changed = true;
+ } else if (!DT.dominates(&DomPoint, MarkedInstr)) {
+ UndefOrSalvageDPV.insert(DPV);
+ }
+ }
}
// Update debug users without use-before-def risk.
@@ -2179,8 +2620,21 @@ static bool rewriteDebugUsers(
LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
Changed = true;
}
+ for (auto *DPV : DPUsers) {
+ if (UndefOrSalvageDPV.count(DPV))
+ continue;
- if (!UndefOrSalvage.empty()) {
+ DbgValReplacement DVR = RewriteDPVExpr(*DPV);
+ if (!DVR)
+ continue;
+
+ DPV->replaceVariableLocationOp(&From, &To);
+ DPV->setExpression(*DVR);
+ LLVM_DEBUG(dbgs() << "REWRITE: " << DPV << '\n');
+ Changed = true;
+ }
+
+ if (!UndefOrSalvage.empty() || !UndefOrSalvageDPV.empty()) {
// Try to salvage the remaining debug users.
salvageDebugInfo(From);
Changed = true;
@@ -2228,12 +2682,15 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
return DII.getExpression();
};
+ auto IdentityDPV = [&](DPValue &DPV) -> DbgValReplacement {
+ return DPV.getExpression();
+ };
// Handle no-op conversions.
Module &M = *From.getModule();
const DataLayout &DL = M.getDataLayout();
if (isBitCastSemanticsPreserving(DL, FromTy, ToTy))
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV);
// Handle integer-to-integer widening and narrowing.
// FIXME: Use DW_OP_convert when it's available everywhere.
@@ -2245,7 +2702,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
// When the width of the result grows, assume that a debugger will only
// access the low `FromBits` bits when inspecting the source variable.
if (FromBits < ToBits)
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
+ return rewriteDebugUsers(From, To, DomPoint, DT, Identity, IdentityDPV);
// The width of the result has shrunk. Use sign/zero extension to describe
// the source variable's high bits.
@@ -2261,7 +2718,22 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits,
Signed);
};
- return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
+ // RemoveDIs: duplicate implementation working on DPValues rather than on
+ // dbg.value intrinsics.
+ auto SignOrZeroExtDPV = [&](DPValue &DPV) -> DbgValReplacement {
+ DILocalVariable *Var = DPV.getVariable();
+
+ // Without knowing signedness, sign/zero extension isn't possible.
+ auto Signedness = Var->getSignedness();
+ if (!Signedness)
+ return std::nullopt;
+
+ bool Signed = *Signedness == DIBasicType::Signedness::Signed;
+ return DIExpression::appendExt(DPV.getExpression(), ToBits, FromBits,
+ Signed);
+ };
+ return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt,
+ SignOrZeroExtDPV);
}
// TODO: Floating-point conversions, vectors.
@@ -2275,12 +2747,17 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ // RemoveDIs: erasing debug-info must be done manually.
+ EndInst->dropDbgValues();
while (EndInst != &BB->front()) {
// Delete the next to last instruction.
Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
+ // EHPads can't have DPValues attached to them, but it might be possible
+ // for things with token type.
+ Inst->dropDbgValues();
EndInst = Inst;
continue;
}
@@ -2288,6 +2765,8 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
++NumDeadDbgInst;
else
++NumDeadInst;
+ // RemoveDIs: erasing debug-info must be done manually.
+ Inst->dropDbgValues();
Inst->eraseFromParent();
}
return {NumDeadInst, NumDeadDbgInst};
@@ -2329,6 +2808,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
DTU->applyUpdates(Updates);
}
+ BB->flushTerminatorDbgValues();
return NumInstrsRemoved;
}
@@ -2482,9 +2962,9 @@ static bool markAliveBlocks(Function &F,
// If we found a call to a no-return function, insert an unreachable
// instruction after it. Make sure there isn't *already* one there
// though.
- if (!isa<UnreachableInst>(CI->getNextNode())) {
+ if (!isa<UnreachableInst>(CI->getNextNonDebugInstruction())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, DTU);
+ changeToUnreachable(CI->getNextNonDebugInstruction(), false, DTU);
Changed = true;
}
break;
@@ -2896,9 +3376,10 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
for (Use &U : llvm::make_early_inc_range(From->uses())) {
if (!Dominates(Root, U))
continue;
+ LLVM_DEBUG(dbgs() << "Replace dominated use of '";
+ From->printAsOperand(dbgs());
+ dbgs() << "' with " << *To << " in " << *U.getUser() << "\n");
U.set(To);
- LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName()
- << "' as " << *To << " in " << *U << "\n");
++Count;
}
return Count;
@@ -3017,9 +3498,12 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
void llvm::dropDebugUsers(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
+ SmallVector<DPValue *, 1> DPUsers;
+ findDbgUsers(DbgUsers, &I, &DPUsers);
for (auto *DII : DbgUsers)
DII->eraseFromParent();
+ for (auto *DPV : DPUsers)
+ DPV->eraseFromParent();
}
void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
@@ -3051,6 +3535,8 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
I->dropUBImplyingAttrsAndMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
+ // RemoveDIs: drop debug-info too as the following code does.
+ I->dropDbgValues();
if (I->isDebugOrPseudoInst()) {
// Remove DbgInfo and pseudo probe Intrinsics.
II = I->eraseFromParent();
@@ -3063,6 +3549,41 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
BB->getTerminator()->getIterator());
}
+DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
+ Type &Ty) {
+ // Create integer constant expression.
+ auto createIntegerExpression = [&DIB](const Constant &CV) -> DIExpression * {
+ const APInt &API = cast<ConstantInt>(&CV)->getValue();
+ std::optional<int64_t> InitIntOpt = API.trySExtValue();
+ return InitIntOpt ? DIB.createConstantValueExpression(
+ static_cast<uint64_t>(*InitIntOpt))
+ : nullptr;
+ };
+
+ if (isa<ConstantInt>(C))
+ return createIntegerExpression(C);
+
+ if (Ty.isFloatTy() || Ty.isDoubleTy()) {
+ const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF();
+ return DIB.createConstantValueExpression(
+ APF.bitcastToAPInt().getZExtValue());
+ }
+
+ if (!Ty.isPointerTy())
+ return nullptr;
+
+ if (isa<ConstantPointerNull>(C))
+ return DIB.createConstantValueExpression(0);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(&C))
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ const Value *V = CE->getOperand(0);
+ if (auto CI = dyn_cast_or_null<ConstantInt>(V))
+ return createIntegerExpression(*CI);
+ }
+ return nullptr;
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
new file mode 100644
index 000000000000..ea6d952cfa7d
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
@@ -0,0 +1,904 @@
+#include "llvm/Transforms/Utils/LoopConstrainer.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+using namespace llvm;
+
+static const char *ClonedLoopTag = "loop_constrainer.loop.clone";
+
+#define DEBUG_TYPE "loop-constrainer"
+
+/// Given a loop with an deccreasing induction variable, is it possible to
+/// safely calculate the bounds of a new loop using the given Predicate.
+static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
+ const SCEV *Step, ICmpInst::Predicate Pred,
+ unsigned LatchBrExitIdx, Loop *L,
+ ScalarEvolution &SE) {
+ if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
+ Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
+ return false;
+
+ if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
+ return false;
+
+ assert(SE.isKnownNegative(Step) && "expecting negative step");
+
+ LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n");
+ LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
+ LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
+ LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n");
+ LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n");
+ LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n");
+
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ // The predicate that we need to check that the induction variable lies
+ // within bounds.
+ ICmpInst::Predicate BoundPred =
+ IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT;
+
+ if (LatchBrExitIdx == 1)
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+
+ assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1");
+
+ const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
+ unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
+ APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getMinValue(BitWidth);
+ const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne);
+
+ const SCEV *MinusOne =
+ SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType()));
+
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) &&
+ SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit);
+}
+
+/// Given a loop with an increasing induction variable, is it possible to
+/// safely calculate the bounds of a new loop using the given Predicate.
+static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
+ const SCEV *Step, ICmpInst::Predicate Pred,
+ unsigned LatchBrExitIdx, Loop *L,
+ ScalarEvolution &SE) {
+ if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT &&
+ Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT)
+ return false;
+
+ if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n");
+ LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
+ LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
+ LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n");
+ LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n");
+ LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n");
+
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ // The predicate that we need to check that the induction variable lies
+ // within bounds.
+ ICmpInst::Predicate BoundPred =
+ IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+
+ if (LatchBrExitIdx == 1)
+ return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV);
+
+ assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1");
+
+ const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
+ unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
+ APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth)
+ : APInt::getMaxValue(BitWidth);
+ const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
+
+ return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start,
+ SE.getAddExpr(BoundSCEV, Step)) &&
+ SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
+}
+
+/// Returns estimate for max latch taken count of the loop of the narrowest
+/// available type. If the latch block has such estimate, it is returned.
+/// Otherwise, we use max exit count of whole loop (that is potentially of wider
+/// type than latch check itself), which is still better than no estimate.
+static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE,
+ const Loop &L) {
+ const SCEV *FromBlock =
+ SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum);
+ if (isa<SCEVCouldNotCompute>(FromBlock))
+ return SE.getSymbolicMaxBackedgeTakenCount(&L);
+ return FromBlock;
+}
+
+std::optional<LoopStructure>
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
+ bool AllowUnsignedLatchCond,
+ const char *&FailureReason) {
+ if (!L.isLoopSimplifyForm()) {
+ FailureReason = "loop not in LoopSimplify form";
+ return std::nullopt;
+ }
+
+ BasicBlock *Latch = L.getLoopLatch();
+ assert(Latch && "Simplified loops only have one latch!");
+
+ if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
+ FailureReason = "loop has already been cloned";
+ return std::nullopt;
+ }
+
+ if (!L.isLoopExiting(Latch)) {
+ FailureReason = "no loop latch";
+ return std::nullopt;
+ }
+
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Preheader = L.getLoopPreheader();
+ if (!Preheader) {
+ FailureReason = "no preheader";
+ return std::nullopt;
+ }
+
+ BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBr || LatchBr->isUnconditional()) {
+ FailureReason = "latch terminator not conditional branch";
+ return std::nullopt;
+ }
+
+ unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
+ if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
+ FailureReason = "latch terminator branch not conditional on integral icmp";
+ return std::nullopt;
+ }
+
+ const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L);
+ if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) {
+ FailureReason = "could not compute latch count";
+ return std::nullopt;
+ }
+ assert(SE.getLoopDisposition(MaxBETakenCount, &L) ==
+ ScalarEvolution::LoopInvariant &&
+ "loop variant exit count doesn't make sense!");
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LeftValue = ICI->getOperand(0);
+ const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
+ IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
+
+ Value *RightValue = ICI->getOperand(1);
+ const SCEV *RightSCEV = SE.getSCEV(RightValue);
+
+ // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ std::swap(LeftValue, RightValue);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else {
+ FailureReason = "no add recurrences in the icmp";
+ return std::nullopt;
+ }
+ }
+
+ auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return true;
+
+ IntegerType *Ty = cast<IntegerType>(AR->getType());
+ IntegerType *WideTy =
+ IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+
+ const SCEVAddRecExpr *ExtendAfterOp =
+ dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+ if (ExtendAfterOp) {
+ const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+ const SCEV *ExtendedStep =
+ SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+
+ bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+ ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+ if (NoSignedWrap)
+ return true;
+ }
+
+ // We may have proved this when computing the sign extension above.
+ return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
+ };
+
+ // `ICI` is interpreted as taking the backedge if the *next* value of the
+ // induction variable satisfies some constraint.
+
+ const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
+ if (IndVarBase->getLoop() != &L) {
+ FailureReason = "LHS in cmp is not an AddRec for this loop";
+ return std::nullopt;
+ }
+ if (!IndVarBase->isAffine()) {
+ FailureReason = "LHS in icmp not induction variable";
+ return std::nullopt;
+ }
+ const SCEV *StepRec = IndVarBase->getStepRecurrence(SE);
+ if (!isa<SCEVConstant>(StepRec)) {
+ FailureReason = "LHS in icmp not induction variable";
+ return std::nullopt;
+ }
+ ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue();
+
+ if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) {
+ FailureReason = "LHS in icmp needs nsw for equality predicates";
+ return std::nullopt;
+ }
+
+ assert(!StepCI->isZero() && "Zero step?");
+ bool IsIncreasing = !StepCI->isNegative();
+ bool IsSignedPredicate;
+ const SCEV *StartNext = IndVarBase->getStart();
+ const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
+ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+ const SCEV *Step = SE.getSCEV(StepCI);
+
+ const SCEV *FixedRightSCEV = nullptr;
+
+ // If RightValue resides within loop (but still being loop invariant),
+ // regenerate it as preheader.
+ if (auto *I = dyn_cast<Instruction>(RightValue))
+ if (L.contains(I->getParent()))
+ FixedRightSCEV = RightSCEV;
+
+ if (IsIncreasing) {
+ bool DecreasedRightValueByOne = false;
+ if (StepCI->isOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (++i != len) { while (++i < len) {
+ // ... ---> ...
+ // } }
+ // If both parts are known non-negative, it is profitable to use
+ // unsigned comparison in increasing loop. This allows us to make the
+ // comparison check against "RightSCEV + 1" more optimistic.
+ if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) &&
+ isKnownNonNegativeInLoop(RightSCEV, &L, SE))
+ Pred = ICmpInst::ICMP_ULT;
+ else
+ Pred = ICmpInst::ICMP_SLT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
+ // while (true) { while (true) {
+ // if (++i == len) ---> if (++i > len - 1)
+ // break; break;
+ // ... ...
+ // } }
+ if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
+ cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ false)) {
+ Pred = ICmpInst::ICMP_UGT;
+ RightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ true)) {
+ Pred = ICmpInst::ICMP_SGT;
+ RightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ }
+ }
+ }
+
+ bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
+ bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
+ bool FoundExpectedPred =
+ (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp slt semantically, found something else";
+ return std::nullopt;
+ }
+
+ IsSignedPredicate = ICmpInst::isSigned(Pred);
+ if (!IsSignedPredicate && !AllowUnsignedLatchCond) {
+ FailureReason = "unsigned latch conditions are explicitly prohibited";
+ return std::nullopt;
+ }
+
+ if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred,
+ LatchBrExitIdx, &L, SE)) {
+ FailureReason = "Unsafe loop bounds";
+ return std::nullopt;
+ }
+ if (LatchBrExitIdx == 0) {
+ // We need to increase the right value unless we have already decreased
+ // it virtually when we replaced EQ with SGT.
+ if (!DecreasedRightValueByOne)
+ FixedRightSCEV =
+ SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ } else {
+ assert(!DecreasedRightValueByOne &&
+ "Right value can be decreased only for LatchBrExitIdx == 0!");
+ }
+ } else {
+ bool IncreasedRightValueByOne = false;
+ if (StepCI->isMinusOne()) {
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (--i != len) { while (--i > len) {
+ // ... ---> ...
+ // } }
+ // We intentionally don't turn the predicate into UGT even if we know
+ // that both operands are non-negative, because it will only pessimize
+ // our check against "RightSCEV - 1".
+ Pred = ICmpInst::ICMP_SGT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) {
+ // while (true) { while (true) {
+ // if (--i == len) ---> if (--i < len + 1)
+ // break; break;
+ // ... ...
+ // } }
+ if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
+ cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
+ Pred = ICmpInst::ICMP_ULT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
+ Pred = ICmpInst::ICMP_SLT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ }
+ }
+ }
+
+ bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT);
+ bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT);
+
+ bool FoundExpectedPred =
+ (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp sgt semantically, found something else";
+ return std::nullopt;
+ }
+
+ IsSignedPredicate =
+ Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT;
+
+ if (!IsSignedPredicate && !AllowUnsignedLatchCond) {
+ FailureReason = "unsigned latch conditions are explicitly prohibited";
+ return std::nullopt;
+ }
+
+ if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred,
+ LatchBrExitIdx, &L, SE)) {
+ FailureReason = "Unsafe bounds";
+ return std::nullopt;
+ }
+
+ if (LatchBrExitIdx == 0) {
+ // We need to decrease the right value unless we have already increased
+ // it virtually when we replaced EQ with SLT.
+ if (!IncreasedRightValueByOne)
+ FixedRightSCEV =
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ } else {
+ assert(!IncreasedRightValueByOne &&
+ "Right value can be increased only for LatchBrExitIdx == 0!");
+ }
+ }
+ BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
+
+ assert(!L.contains(LatchExit) && "expected an exit block!");
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ SCEVExpander Expander(SE, DL, "loop-constrainer");
+ Instruction *Ins = Preheader->getTerminator();
+
+ if (FixedRightSCEV)
+ RightValue =
+ Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins);
+
+ Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins);
+ IndVarStartV->setName("indvar.start");
+
+ LoopStructure Result;
+
+ Result.Tag = "main";
+ Result.Header = Header;
+ Result.Latch = Latch;
+ Result.LatchBr = LatchBr;
+ Result.LatchExit = LatchExit;
+ Result.LatchBrExitIdx = LatchBrExitIdx;
+ Result.IndVarStart = IndVarStartV;
+ Result.IndVarStep = StepCI;
+ Result.IndVarBase = LeftValue;
+ Result.IndVarIncreasing = IsIncreasing;
+ Result.LoopExitAt = RightValue;
+ Result.IsSignedPredicate = IsSignedPredicate;
+ Result.ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType());
+
+ FailureReason = nullptr;
+
+ return Result;
+}
+
+// Add metadata to the loop L to disable loop optimizations. Callers need to
+// confirm that optimizing loop L is not beneficial.
+static void DisableAllLoopOptsOnLoop(Loop &L) {
+ // We do not care about any existing loopID related metadata for L, since we
+ // are setting all loop metadata to false.
+ LLVMContext &Context = L.getHeader()->getContext();
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDNode *Dummy = MDNode::get(Context, {});
+ MDNode *DisableUnroll = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
+ Metadata *FalseVal =
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0));
+ MDNode *DisableVectorize = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
+ MDNode *DisableLICMVersioning = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
+ MDNode *DisableDistribution = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
+ MDNode *NewLoopID =
+ MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
+ DisableLICMVersioning, DisableDistribution});
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L.setLoopID(NewLoopID);
+}
+
+LoopConstrainer::LoopConstrainer(Loop &L, LoopInfo &LI,
+ function_ref<void(Loop *, bool)> LPMAddNewLoop,
+ const LoopStructure &LS, ScalarEvolution &SE,
+ DominatorTree &DT, Type *T, SubRanges SR)
+ : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE),
+ DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L), RangeTy(T),
+ MainLoopStructure(LS), SR(SR) {}
+
+void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
+ const char *Tag) const {
+ for (BasicBlock *BB : OriginalLoop.getBlocks()) {
+ BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
+ Result.Blocks.push_back(Clone);
+ Result.Map[BB] = Clone;
+ }
+
+ auto GetClonedValue = [&Result](Value *V) {
+ assert(V && "null values not in domain!");
+ auto It = Result.Map.find(V);
+ if (It == Result.Map.end())
+ return V;
+ return static_cast<Value *>(It->second);
+ };
+
+ auto *ClonedLatch =
+ cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
+ ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
+ MDNode::get(Ctx, {}));
+
+ Result.Structure = MainLoopStructure.map(GetClonedValue);
+ Result.Structure.Tag = Tag;
+
+ for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
+ BasicBlock *ClonedBB = Result.Blocks[i];
+ BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
+
+ assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
+
+ for (Instruction &I : *ClonedBB)
+ RemapInstruction(&I, Result.Map,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Exit blocks will now have one more predecessor and their PHI nodes need
+ // to be edited to reflect that. No phi nodes need to be introduced because
+ // the loop is in LCSSA.
+
+ for (auto *SBB : successors(OriginalBB)) {
+ if (OriginalLoop.contains(SBB))
+ continue; // not an exit block
+
+ for (PHINode &PN : SBB->phis()) {
+ Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
+ PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+ SE.forgetValue(&PN);
+ }
+ }
+ }
+}
+
+LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
+ const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
+ BasicBlock *ContinuationBlock) const {
+ // We start with a loop with a single latch:
+ //
+ // +--------------------+
+ // | |
+ // | preheader |
+ // | |
+ // +--------+-----------+
+ // | ----------------\
+ // | / |
+ // +--------v----v------+ |
+ // | | |
+ // | header | |
+ // | | |
+ // +--------------------+ |
+ // |
+ // ..... |
+ // |
+ // +--------------------+ |
+ // | | |
+ // | latch >----------/
+ // | |
+ // +-------v------------+
+ // |
+ // |
+ // | +--------------------+
+ // | | |
+ // +---> original exit |
+ // | |
+ // +--------------------+
+ //
+ // We change the control flow to look like
+ //
+ //
+ // +--------------------+
+ // | |
+ // | preheader >-------------------------+
+ // | | |
+ // +--------v-----------+ |
+ // | /-------------+ |
+ // | / | |
+ // +--------v--v--------+ | |
+ // | | | |
+ // | header | | +--------+ |
+ // | | | | | |
+ // +--------------------+ | | +-----v-----v-----------+
+ // | | | |
+ // | | | .pseudo.exit |
+ // | | | |
+ // | | +-----------v-----------+
+ // | | |
+ // ..... | | |
+ // | | +--------v-------------+
+ // +--------------------+ | | | |
+ // | | | | | ContinuationBlock |
+ // | latch >------+ | | |
+ // | | | +----------------------+
+ // +---------v----------+ |
+ // | |
+ // | |
+ // | +---------------^-----+
+ // | | |
+ // +-----> .exit.selector |
+ // | |
+ // +----------v----------+
+ // |
+ // +--------------------+ |
+ // | | |
+ // | original exit <----+
+ // | |
+ // +--------------------+
+
+ RewrittenRangeInfo RRI;
+
+ BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
+ RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
+ &F, BBInsertLocation);
+ RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
+ BBInsertLocation);
+
+ BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
+ bool Increasing = LS.IndVarIncreasing;
+ bool IsSignedPredicate = LS.IsSignedPredicate;
+
+ IRBuilder<> B(PreheaderJump);
+ auto NoopOrExt = [&](Value *V) {
+ if (V->getType() == RangeTy)
+ return V;
+ return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+ : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+ };
+
+ // EnterLoopCond - is it okay to start executing this `LS'?
+ Value *EnterLoopCond = nullptr;
+ auto Pred =
+ Increasing
+ ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
+ : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+ Value *IndVarStart = NoopOrExt(LS.IndVarStart);
+ EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
+
+ B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
+ PreheaderJump->eraseFromParent();
+
+ LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+ B.SetInsertPoint(LS.LatchBr);
+ Value *IndVarBase = NoopOrExt(LS.IndVarBase);
+ Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
+
+ Value *CondForBranch = LS.LatchBrExitIdx == 1
+ ? TakeBackedgeLoopCond
+ : B.CreateNot(TakeBackedgeLoopCond);
+
+ LS.LatchBr->setCondition(CondForBranch);
+
+ B.SetInsertPoint(RRI.ExitSelector);
+
+ // IterationsLeft - are there any more iterations left, given the original
+ // upper bound on the induction variable? If not, we branch to the "real"
+ // exit.
+ Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
+ Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
+ B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
+
+ BranchInst *BranchToContinuation =
+ BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
+
+ // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
+ // each of the PHI nodes in the loop header. This feeds into the initial
+ // value of the same PHI nodes if/when we continue execution.
+ for (PHINode &PN : LS.Header->phis()) {
+ PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
+ BranchToContinuation);
+
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
+ RRI.ExitSelector);
+ RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
+ }
+
+ RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
+ BranchToContinuation);
+ RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
+ RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
+
+ // The latch exit now has a branch from `RRI.ExitSelector' instead of
+ // `LS.Latch'. The PHI nodes need to be updated to reflect that.
+ LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
+
+ return RRI;
+}
+
+void LoopConstrainer::rewriteIncomingValuesForPHIs(
+ LoopStructure &LS, BasicBlock *ContinuationBlock,
+ const LoopConstrainer::RewrittenRangeInfo &RRI) const {
+ unsigned PHIIndex = 0;
+ for (PHINode &PN : LS.Header->phis())
+ PN.setIncomingValueForBlock(ContinuationBlock,
+ RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+
+ LS.IndVarStart = RRI.IndVarEnd;
+}
+
+BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
+ BasicBlock *OldPreheader,
+ const char *Tag) const {
+ BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
+ BranchInst::Create(LS.Header, Preheader);
+
+ LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
+
+ return Preheader;
+}
+
+void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
+ Loop *ParentLoop = OriginalLoop.getParentLoop();
+ if (!ParentLoop)
+ return;
+
+ for (BasicBlock *BB : BBs)
+ ParentLoop->addBasicBlockToLoop(BB, LI);
+}
+
+Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
+ ValueToValueMapTy &VM,
+ bool IsSubloop) {
+ Loop &New = *LI.AllocateLoop();
+ if (Parent)
+ Parent->addChildLoop(&New);
+ else
+ LI.addTopLevelLoop(&New);
+ LPMAddNewLoop(&New, IsSubloop);
+
+ // Add all of the blocks in Original to the new loop.
+ for (auto *BB : Original->blocks())
+ if (LI.getLoopFor(BB) == Original)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *SubLoop : *Original)
+ createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true);
+
+ return &New;
+}
+
+bool LoopConstrainer::run() {
+ BasicBlock *Preheader = OriginalLoop.getLoopPreheader();
+ assert(Preheader != nullptr && "precondition!");
+
+ OriginalPreheader = Preheader;
+ MainLoopPreheader = Preheader;
+ bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
+ bool Increasing = MainLoopStructure.IndVarIncreasing;
+ IntegerType *IVTy = cast<IntegerType>(RangeTy);
+
+ SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "loop-constrainer");
+ Instruction *InsertPt = OriginalPreheader->getTerminator();
+
+ // It would have been better to make `PreLoop' and `PostLoop'
+ // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
+ // constructor.
+ ClonedLoop PreLoop, PostLoop;
+ bool NeedsPreLoop =
+ Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
+ bool NeedsPostLoop =
+ Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
+
+ Value *ExitPreLoopAt = nullptr;
+ Value *ExitMainLoopAt = nullptr;
+ const SCEVConstant *MinusOneS =
+ cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
+
+ if (NeedsPreLoop) {
+ const SCEV *ExitPreLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitPreLoopAtSCEV = *SR.LowLimit;
+ else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
+ else {
+ LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing "
+ << "preloop exit limit. HighLimit = "
+ << *(*SR.HighLimit) << "\n");
+ return false;
+ }
+
+ if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) {
+ LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the"
+ << " preloop exit limit " << *ExitPreLoopAtSCEV
+ << " at block " << InsertPt->getParent()->getName()
+ << "\n");
+ return false;
+ }
+
+ ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
+ ExitPreLoopAt->setName("exit.preloop.at");
+ }
+
+ if (NeedsPostLoop) {
+ const SCEV *ExitMainLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitMainLoopAtSCEV = *SR.HighLimit;
+ else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
+ IsSignedPredicate))
+ ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
+ else {
+ LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing "
+ << "mainloop exit limit. LowLimit = "
+ << *(*SR.LowLimit) << "\n");
+ return false;
+ }
+
+ if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) {
+ LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the"
+ << " main loop exit limit " << *ExitMainLoopAtSCEV
+ << " at block " << InsertPt->getParent()->getName()
+ << "\n");
+ return false;
+ }
+
+ ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
+ ExitMainLoopAt->setName("exit.mainloop.at");
+ }
+
+ // We clone these ahead of time so that we don't have to deal with changing
+ // and temporarily invalid IR as we transform the loops.
+ if (NeedsPreLoop)
+ cloneLoop(PreLoop, "preloop");
+ if (NeedsPostLoop)
+ cloneLoop(PostLoop, "postloop");
+
+ RewrittenRangeInfo PreLoopRRI;
+
+ if (NeedsPreLoop) {
+ Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
+ PreLoop.Structure.Header);
+
+ MainLoopPreheader =
+ createPreheader(MainLoopStructure, Preheader, "mainloop");
+ PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+ ExitPreLoopAt, MainLoopPreheader);
+ rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
+ PreLoopRRI);
+ }
+
+ BasicBlock *PostLoopPreheader = nullptr;
+ RewrittenRangeInfo PostLoopRRI;
+
+ if (NeedsPostLoop) {
+ PostLoopPreheader =
+ createPreheader(PostLoop.Structure, Preheader, "postloop");
+ PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
+ ExitMainLoopAt, PostLoopPreheader);
+ rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
+ PostLoopRRI);
+ }
+
+ BasicBlock *NewMainLoopPreheader =
+ MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
+ BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
+ PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
+ PostLoopRRI.ExitSelector, NewMainLoopPreheader};
+
+ // Some of the above may be nullptr, filter them out before passing to
+ // addToParentLoopIfNeeded.
+ auto NewBlocksEnd =
+ std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
+
+ addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd));
+
+ DT.recalculate(F);
+
+ // We need to first add all the pre and post loop blocks into the loop
+ // structures (as part of createClonedLoopStructure), and then update the
+ // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
+ // LI when LoopSimplifyForm is generated.
+ Loop *PreL = nullptr, *PostL = nullptr;
+ if (!PreLoop.Blocks.empty()) {
+ PreL = createClonedLoopStructure(&OriginalLoop,
+ OriginalLoop.getParentLoop(), PreLoop.Map,
+ /* IsSubLoop */ false);
+ }
+
+ if (!PostLoop.Blocks.empty()) {
+ PostL =
+ createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
+ PostLoop.Map, /* IsSubLoop */ false);
+ }
+
+ // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
+ auto CanonicalizeLoop = [&](Loop *L, bool IsOriginalLoop) {
+ formLCSSARecursively(*L, DT, &LI, &SE);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
+ // Pre/post loops are slow paths, we do not need to perform any loop
+ // optimizations on them.
+ if (!IsOriginalLoop)
+ DisableAllLoopOptsOnLoop(*L);
+ };
+ if (PreL)
+ CanonicalizeLoop(PreL, false);
+ if (PostL)
+ CanonicalizeLoop(PostL, false);
+ CanonicalizeLoop(&OriginalLoop, true);
+
+ /// At this point:
+ /// - We've broken a "main loop" out of the loop in a way that the "main loop"
+ /// runs with the induction variable in a subset of [Begin, End).
+ /// - There is no overflow when computing "main loop" exit limit.
+ /// - Max latch taken count of the loop is limited.
+ /// It guarantees that induction variable will not overflow iterating in the
+ /// "main loop".
+ if (isa<OverflowingBinaryOperator>(MainLoopStructure.IndVarBase))
+ if (IsSignedPredicate)
+ cast<BinaryOperator>(MainLoopStructure.IndVarBase)
+ ->setHasNoSignedWrap(true);
+ /// TODO: support unsigned predicate.
+ /// To add NUW flag we need to prove that both operands of BO are
+ /// non-negative. E.g:
+ /// ...
+ /// %iv.next = add nsw i32 %iv, -1
+ /// %cmp = icmp ult i32 %iv.next, %n
+ /// br i1 %cmp, label %loopexit, label %loop
+ ///
+ /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will
+ /// overflow, therefore NUW flag is not legal here.
+
+ return true;
+}
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index d701cf110154..f76fa3bb6c61 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -351,11 +351,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
MaxPeelCount =
std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
- auto ComputePeelCount = [&](Value *Condition) -> void {
- if (!Condition->getType()->isIntegerTy())
+ const unsigned MaxDepth = 4;
+ std::function<void(Value *, unsigned)> ComputePeelCount =
+ [&](Value *Condition, unsigned Depth) -> void {
+ if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
return;
Value *LeftVal, *RightVal;
+ if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
+ match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
+ ComputePeelCount(LeftVal, Depth + 1);
+ ComputePeelCount(RightVal, Depth + 1);
+ return;
+ }
+
CmpInst::Predicate Pred;
if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
return;
@@ -443,7 +452,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
for (BasicBlock *BB : L.blocks()) {
for (Instruction &I : *BB) {
if (SelectInst *SI = dyn_cast<SelectInst>(&I))
- ComputePeelCount(SI->getCondition());
+ ComputePeelCount(SI->getCondition(), 0);
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -454,7 +463,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
if (L.getLoopLatch() == BB)
continue;
- ComputePeelCount(BI->getCondition());
+ ComputePeelCount(BI->getCondition(), 0);
}
return DesiredPeelCount;
@@ -624,21 +633,24 @@ struct WeightInfo {
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
/// go to exit.
/// Then, Estimated ExitCount = F / E.
-/// For I-th (counting from 0) peeled off iteration we set the the weights for
+/// For I-th (counting from 0) peeled off iteration we set the weights for
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
/// The probability to go to exit 1/(EC-I) increases. At the same time
/// the estimated exit count in the remainder loop reduces by I.
/// To avoid dealing with division rounding we can just multiple both part
/// of weights to E and use weight as (F - I * E, E).
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
- MDBuilder MDB(Term->getContext());
- Term->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Info.Weights));
+ setBranchWeights(*Term, Info.Weights);
for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
if (SubWeight != 0)
- Info.Weights[Idx] = Info.Weights[Idx] > SubWeight
- ? Info.Weights[Idx] - SubWeight
- : 1;
+ // Don't set the probability of taking the edge from latch to loop header
+ // to less than 1:1 ratio (meaning Weight should not be lower than
+ // SubWeight), as this could significantly reduce the loop's hotness,
+ // which would be incorrect in the case of underestimating the trip count.
+ Info.Weights[Idx] =
+ Info.Weights[Idx] > SubWeight
+ ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
+ : SubWeight;
}
/// Initialize the weights for all exiting blocks.
@@ -685,14 +697,6 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
}
}
-/// Update the weights of original exiting block after peeling off all
-/// iterations.
-static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) {
- MDBuilder MDB(Term->getContext());
- Term->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Info.Weights));
-}
-
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
@@ -1028,8 +1032,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- for (const auto &[Term, Info] : Weights)
- fixupBranchWeights(Term, Info);
+ for (const auto &[Term, Info] : Weights) {
+ setBranchWeights(*Term, Info.Weights);
+ }
// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index d81db5647c60..76280ed492b3 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -25,6 +25,8 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +52,9 @@ static cl::opt<bool>
cl::desc("Allow loop rotation multiple times in order to reach "
"a better latch exit"));
+// Probability that a rotated loop has zero trip count / is never entered.
+static constexpr uint32_t ZeroTripCountWeights[] = {1, 127};
+
namespace {
/// A simple loop rotation transformation.
class LoopRotate {
@@ -154,7 +159,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
// intrinsics.
SmallVector<DbgValueInst *, 1> DbgValues;
- llvm::findDbgValues(DbgValues, OrigHeaderVal);
+ SmallVector<DPValue *, 1> DPValues;
+ llvm::findDbgValues(DbgValues, OrigHeaderVal, &DPValues);
for (auto &DbgValue : DbgValues) {
// The original users in the OrigHeader are already using the original
// definitions.
@@ -175,6 +181,29 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
NewVal = UndefValue::get(OrigHeaderVal->getType());
DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
}
+
+ // RemoveDIs: duplicate implementation for non-instruction debug-info
+ // storage in DPValues.
+ for (DPValue *DPV : DPValues) {
+ // The original users in the OrigHeader are already using the original
+ // definitions.
+ BasicBlock *UserBB = DPV->getMarker()->getParent();
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped and anything else can be handled by
+ // the SSAUpdater. To avoid adding PHINodes, check if the value is
+ // available in UserBB, if not substitute undef.
+ Value *NewVal;
+ if (UserBB == OrigPreheader)
+ NewVal = OrigPreHeaderVal;
+ else if (SSA.HasValueForBlock(UserBB))
+ NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
+ else
+ NewVal = UndefValue::get(OrigHeaderVal->getType());
+ DPV->replaceVariableLocationOp(OrigHeaderVal, NewVal);
+ }
}
}
@@ -244,6 +273,123 @@ static bool canRotateDeoptimizingLatchExit(Loop *L) {
return false;
}
+static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI,
+ bool HasConditionalPreHeader,
+ bool SuccsSwapped) {
+ MDNode *WeightMD = getBranchWeightMDNode(PreHeaderBI);
+ if (WeightMD == nullptr)
+ return;
+
+ // LoopBI should currently be a clone of PreHeaderBI with the same
+ // metadata. But we double check to make sure we don't have a degenerate case
+ // where instsimplify changed the instructions.
+ if (WeightMD != getBranchWeightMDNode(LoopBI))
+ return;
+
+ SmallVector<uint32_t, 2> Weights;
+ extractFromBranchWeightMD(WeightMD, Weights);
+ if (Weights.size() != 2)
+ return;
+ uint32_t OrigLoopExitWeight = Weights[0];
+ uint32_t OrigLoopBackedgeWeight = Weights[1];
+
+ if (SuccsSwapped)
+ std::swap(OrigLoopExitWeight, OrigLoopBackedgeWeight);
+
+ // Update branch weights. Consider the following edge-counts:
+ //
+ // | |-------- |
+ // V V | V
+ // Br i1 ... | Br i1 ...
+ // | | | | |
+ // x| y| | becomes: | y0| |-----
+ // V V | | V V |
+ // Exit Loop | | Loop |
+ // | | | Br i1 ... |
+ // ----- | | | |
+ // x0| x1| y1 | |
+ // V V ----
+ // Exit
+ //
+ // The following must hold:
+ // - x == x0 + x1 # counts to "exit" must stay the same.
+ // - y0 == x - x0 == x1 # how often loop was entered at all.
+ // - y1 == y - y0 # How often loop was repeated (after first iter.).
+ //
+ // We cannot generally deduce how often we had a zero-trip count loop so we
+ // have to make a guess for how to distribute x among the new x0 and x1.
+
+ uint32_t ExitWeight0; // aka x0
+ uint32_t ExitWeight1; // aka x1
+ uint32_t EnterWeight; // aka y0
+ uint32_t LoopBackWeight; // aka y1
+ if (OrigLoopExitWeight > 0 && OrigLoopBackedgeWeight > 0) {
+ ExitWeight0 = 0;
+ if (HasConditionalPreHeader) {
+ // Here we cannot know how many 0-trip count loops we have, so we guess:
+ if (OrigLoopBackedgeWeight >= OrigLoopExitWeight) {
+ // If the loop count is bigger than the exit count then we set
+ // probabilities as if 0-trip count nearly never happens.
+ ExitWeight0 = ZeroTripCountWeights[0];
+ // Scale up counts if necessary so we can match `ZeroTripCountWeights`
+ // for the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio.
+ while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) {
+ // ... but don't overflow.
+ uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1);
+ if ((OrigLoopBackedgeWeight & HighBit) != 0 ||
+ (OrigLoopExitWeight & HighBit) != 0)
+ break;
+ OrigLoopBackedgeWeight <<= 1;
+ OrigLoopExitWeight <<= 1;
+ }
+ } else {
+ // If there's a higher exit-count than backedge-count then we set
+ // probabilities as if there are only 0-trip and 1-trip cases.
+ ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
+ }
+ }
+ ExitWeight1 = OrigLoopExitWeight - ExitWeight0;
+ EnterWeight = ExitWeight1;
+ LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight;
+ } else if (OrigLoopExitWeight == 0) {
+ if (OrigLoopBackedgeWeight == 0) {
+ // degenerate case... keep everything zero...
+ ExitWeight0 = 0;
+ ExitWeight1 = 0;
+ EnterWeight = 0;
+ LoopBackWeight = 0;
+ } else {
+ // Special case "LoopExitWeight == 0" weights which behaves like an
+ // endless where we don't want loop-enttry (y0) to be the same as
+ // loop-exit (x1).
+ ExitWeight0 = 0;
+ ExitWeight1 = 0;
+ EnterWeight = 1;
+ LoopBackWeight = OrigLoopBackedgeWeight;
+ }
+ } else {
+ // loop is never entered.
+ assert(OrigLoopBackedgeWeight == 0 && "remaining case is backedge zero");
+ ExitWeight0 = 1;
+ ExitWeight1 = 1;
+ EnterWeight = 0;
+ LoopBackWeight = 0;
+ }
+
+ const uint32_t LoopBIWeights[] = {
+ SuccsSwapped ? LoopBackWeight : ExitWeight1,
+ SuccsSwapped ? ExitWeight1 : LoopBackWeight,
+ };
+ setBranchWeights(LoopBI, LoopBIWeights);
+ if (HasConditionalPreHeader) {
+ const uint32_t PreHeaderBIWeights[] = {
+ SuccsSwapped ? EnterWeight : ExitWeight0,
+ SuccsSwapped ? ExitWeight0 : EnterWeight,
+ };
+ setBranchWeights(PreHeaderBI, PreHeaderBIWeights);
+ }
+}
+
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -363,7 +509,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// loop. Otherwise loop is not suitable for rotation.
BasicBlock *Exit = BI->getSuccessor(0);
BasicBlock *NewHeader = BI->getSuccessor(1);
- if (L->contains(Exit))
+ bool BISuccsSwapped = L->contains(Exit);
+ if (BISuccsSwapped)
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
assert(L->contains(NewHeader) && !L->contains(Exit) &&
@@ -394,20 +541,32 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// duplication.
using DbgIntrinsicHash =
std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
+ auto makeHash = [](auto *D) -> DbgIntrinsicHash {
auto VarLocOps = D->location_ops();
return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
D->getVariable()},
D->getExpression()};
};
+
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
DbgIntrinsics.insert(makeHash(DII));
- else
+ // Until RemoveDIs supports dbg.declares in DPValue format, we'll need
+ // to collect DPValues attached to any other debug intrinsics.
+ for (const DPValue &DPV : DII->getDbgValueRange())
+ DbgIntrinsics.insert(makeHash(&DPV));
+ } else {
break;
+ }
}
+ // Build DPValue hashes for DPValues attached to the terminator, which isn't
+ // considered in the loop above.
+ for (const DPValue &DPV :
+ OrigPreheader->getTerminator()->getDbgValueRange())
+ DbgIntrinsics.insert(makeHash(&DPV));
+
// Remember the local noalias scope declarations in the header. After the
// rotation, they must be duplicated and the scope must be cloned. This
// avoids unwanted interaction across iterations.
@@ -416,6 +575,29 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
NoAliasDeclInstructions.push_back(Decl);
+ Module *M = OrigHeader->getModule();
+
+ // Track the next DPValue to clone. If we have a sequence where an
+ // instruction is hoisted instead of being cloned:
+ // DPValue blah
+ // %foo = add i32 0, 0
+ // DPValue xyzzy
+ // %bar = call i32 @foobar()
+ // where %foo is hoisted, then the DPValue "blah" will be seen twice, once
+ // attached to %foo, then when %foo his hoisted it will "fall down" onto the
+ // function call:
+ // DPValue blah
+ // DPValue xyzzy
+ // %bar = call i32 @foobar()
+ // causing it to appear attached to the call too.
+ //
+ // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from
+ // here" position to account for this behaviour. We point it at any DPValues
+ // on the next instruction, here labelled xyzzy, before we hoist %foo.
+ // Later, we only only clone DPValues from that position (xyzzy) onwards,
+ // which avoids cloning DPValue "blah" multiple times.
+ std::optional<DPValue::self_iterator> NextDbgInst = std::nullopt;
+
while (I != E) {
Instruction *Inst = &*I++;
@@ -428,7 +610,21 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
!Inst->mayWriteToMemory() && !Inst->isTerminator() &&
!isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
+
+ if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) {
+ auto DbgValueRange =
+ LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInst);
+ RemapDPValueRange(M, DbgValueRange, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ // Erase anything we've seen before.
+ for (DPValue &DPV : make_early_inc_range(DbgValueRange))
+ if (DbgIntrinsics.count(makeHash(&DPV)))
+ DPV.eraseFromParent();
+ }
+
+ NextDbgInst = I->getDbgValueRange().begin();
Inst->moveBefore(LoopEntryBranch);
+
++NumInstrsHoisted;
continue;
}
@@ -439,6 +635,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
++NumInstrsDuplicated;
+ if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat) {
+ auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInst);
+ RemapDPValueRange(M, Range, ValueMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NextDbgInst = std::nullopt;
+ // Erase anything we've seen before.
+ for (DPValue &DPV : make_early_inc_range(Range))
+ if (DbgIntrinsics.count(makeHash(&DPV)))
+ DPV.eraseFromParent();
+ }
+
// Eagerly remap the operands of the instruction.
RemapInstruction(C, ValueMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
@@ -553,6 +760,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// OrigPreHeader's old terminator (the original branch into the loop), and
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+ OrigPreheader->flushTerminatorDbgValues();
// Update MemorySSA before the rewrite call below changes the 1:1
// instruction:cloned_instruction_or_value mapping.
@@ -605,9 +813,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// to split as many edges.
BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
assert(PHBI->isConditional() && "Should be clone of BI condbr!");
- if (!isa<ConstantInt>(PHBI->getCondition()) ||
- PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
- NewHeader) {
+ const Value *Cond = PHBI->getCondition();
+ const bool HasConditionalPreHeader =
+ !isa<ConstantInt>(Cond) ||
+ PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader;
+
+ updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped);
+
+ if (HasConditionalPreHeader) {
// The conditional branch can't be folded, handle the general case.
// Split edges as necessary to preserve LoopSimplify form.
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 3e604fdf2e11..07e622b1577f 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -429,8 +429,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
}
// Nuke all entries except the zero'th.
- for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
- PN->removeIncomingValue(e-i, false);
+ PN->removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
+ /* DeletePHIIfEmpty */ false);
// Finally, add the newly constructed PHI node as the entry for the BEBlock.
PN->addIncoming(NewPN, BEBlock);
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 511dd61308f9..ee6f7b35750a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -838,7 +837,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
DTUToUse ? nullptr : DT)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- llvm::erase_value(UnrolledLoopBlocks, Dest);
+ llvm::erase(UnrolledLoopBlocks, Dest);
}
}
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 31b8cd34eb24..3c06a6e47a30 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 1e22eca30d2d..612f69970881 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -56,6 +56,17 @@ static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
"unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
cl::desc("Assume the non latch exit block to be predictable"));
+// Probability that the loop trip count is so small that after the prolog
+// we do not enter the unrolled loop at all.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};
+// Probability that the loop trip count is so small that we skip the unrolled
+// loop completely and immediately enter the epilogue loop.
+// It is unlikely that the loop trip count is smaller than the unroll factor;
+// other than that, the choice of constant is not tuned yet.
+static const uint32_t EpilogHeaderWeights[] = {1, 127};
+
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
@@ -105,8 +116,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// PrologLatch. When supporting multiple-exiting block loops, we can have
// two or more blocks that have the LatchExit as the target in the
// original loop.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- PrologExit->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
+ NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(&PN)) {
@@ -169,7 +180,14 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
nullptr, PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume loop is nearly always entered.
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);
+ }
+ B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,
+ BranchWeights);
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
@@ -194,8 +212,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *Exit, BasicBlock *PreHeader,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA,
- ScalarEvolution &SE) {
+ LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
+ unsigned Count) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -269,8 +287,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
for (PHINode &PN : Succ->phis()) {
// Add new PHI nodes to the loop exit block and update epilog
// PHIs with the new PHI values.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- NewExit->getFirstNonPHI());
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
+ NewPN->insertBefore(NewExit->getFirstNonPHIIt());
// Adding a value to the new PHI node from the unrolling loop preheader.
NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
// Adding a value to the new PHI node from the unrolling loop latch.
@@ -292,7 +310,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
- B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume equal distribution in interval [0, Count).
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(1, Count - 1);
+ }
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -316,8 +340,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
const bool UnrollRemainder,
BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
- ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
+ DominatorTree *DT, LoopInfo *LI, unsigned Count) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -363,14 +388,34 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
+ PHINode *NewIdx =
+ PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
+ NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
auto *One = ConstantInt::get(NewIdx->getType(), 1);
- Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ Value *IdxNext =
+ Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*LatchBR)) {
+ uint32_t ExitWeight;
+ uint32_t BackEdgeWeight;
+ if (Count >= 3) {
+ // Note: We do not enter this loop for zero-remainders. The check
+ // is at the end of the loop. We assume equal distribution between
+ // possible remainders in [1, Count).
+ ExitWeight = 1;
+ BackEdgeWeight = (Count - 2) / 2;
+ } else {
+ // Unnecessary backedge, should never be taken. The conditional
+ // jump should be optimized away later.
+ ExitWeight = 1;
+ BackEdgeWeight = 0;
+ }
+ MDBuilder MDB(Builder.getContext());
+ BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ }
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
NewIdx->addIncoming(Zero, InsertTop);
NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
@@ -464,32 +509,6 @@ static bool canProfitablyUnrollMultiExitLoop(
// know of kinds of multiexit loops that would benefit from unrolling.
}
-// Assign the maximum possible trip count as the back edge weight for the
-// remainder loop if the original loop comes with a branch weight.
-static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
- Loop *RemainderLoop,
- uint64_t UnrollFactor) {
- uint64_t TrueWeight, FalseWeight;
- BranchInst *LatchBR =
- cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
- if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight))
- return;
- uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
- ? FalseWeight
- : TrueWeight;
- assert(UnrollFactor > 1);
- uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
- BasicBlock *Header = RemainderLoop->getHeader();
- BasicBlock *Latch = RemainderLoop->getLoopLatch();
- auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
- unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(RemainderLatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-}
-
/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
/// accounting for the possibility of unsigned overflow in the 2s complement
/// domain. Preconditions:
@@ -775,7 +794,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
- B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
+ MDNode *BranchWeights = nullptr;
+ if (hasBranchWeightMD(*Latch->getTerminator())) {
+ // Assume loop is nearly always entered.
+ MDBuilder MDB(B.getContext());
+ BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
+ }
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
@@ -804,12 +829,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
-
- // Assign the maximum possible trip count as the back edge weight for the
- // remainder loop if the original loop comes with a branch weight.
- if (remainderLoop && !UnrollRemainder)
- updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
// Insert the cloned blocks into the function.
F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -893,9 +913,12 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
for (BasicBlock *BB : NewBlocks) {
+ Module *M = BB->getModule();
for (Instruction &I : *BB) {
RemapInstruction(&I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ RemapDPValueRange(M, I.getDbgValueRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
}
@@ -903,7 +926,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
@@ -912,8 +935,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
IRBuilder<> B2(NewPreHeader->getTerminator());
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
- Header->getFirstNonPHI());
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
+ NewIdx->insertBefore(Header->getFirstNonPHIIt());
B2.SetInsertPoint(LatchBR);
auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
auto *One = ConstantInt::get(NewIdx->getType(), 1);
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 7d6662c44f07..59485126b280 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -296,7 +296,7 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
StringRef AttrName = cast<MDString>(NameMD)->getString();
// Do not inherit excluded attributes.
- return !AttrName.startswith(InheritOptionsExceptPrefix);
+ return !AttrName.starts_with(InheritOptionsExceptPrefix);
};
if (InheritThisAttribute(Op))
@@ -556,12 +556,8 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// Removes all incoming values from all other exiting blocks (including
// duplicate values from an exiting block).
// Nuke all entries except the zero'th entry which is the preheader entry.
- // NOTE! We need to remove Incoming Values in the reverse order as done
- // below, to keep the indices valid for deletion (removeIncomingValues
- // updates getNumIncomingValues and shifts all values down into the
- // operand being deleted).
- for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
- P.removeIncomingValue(e - i, false);
+ P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
+ /* DeletePHIIfEmpty */ false);
assert((P.getNumIncomingValues() == 1 &&
P.getIncomingBlock(PredIndex) == Preheader) &&
@@ -608,6 +604,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// Use a map to unique and a vector to guarantee deterministic ordering.
llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;
llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+ llvm::SmallVector<DPValue *, 4> DeadDPValues;
if (ExitBlock) {
// Given LCSSA form is satisfied, we should not have users of instructions
@@ -632,6 +629,24 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
"Unexpected user in reachable block");
U.set(Poison);
}
+
+ // RemoveDIs: do the same as below for DPValues.
+ if (Block->IsNewDbgInfoFormat) {
+ for (DPValue &DPV :
+ llvm::make_early_inc_range(I.getDbgValueRange())) {
+ DebugVariable Key(DPV.getVariable(), DPV.getExpression(),
+ DPV.getDebugLoc().get());
+ if (!DeadDebugSet.insert(Key).second)
+ continue;
+ // Unlinks the DPV from it's container, for later insertion.
+ DPV.removeFromParent();
+ DeadDPValues.push_back(&DPV);
+ }
+ }
+
+ // For one of each variable encountered, preserve a debug intrinsic (set
+ // to Poison) and transfer it to the loop exit. This terminates any
+ // variable locations that were set during the loop.
auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
if (!DVI)
continue;
@@ -646,12 +661,22 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// be be replaced with undef. Loop invariant values will still be available.
// Move dbg.values out the loop so that earlier location ranges are still
// terminated and loop invariant assignments are preserved.
- Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
- assert(InsertDbgValueBefore &&
+ DIBuilder DIB(*ExitBlock->getModule());
+ BasicBlock::iterator InsertDbgValueBefore =
+ ExitBlock->getFirstInsertionPt();
+ assert(InsertDbgValueBefore != ExitBlock->end() &&
"There should be a non-PHI instruction in exit block, else these "
"instructions will have no parent.");
+
for (auto *DVI : DeadDebugInst)
- DVI->moveBefore(InsertDbgValueBefore);
+ DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);
+
+ // Due to the "head" bit in BasicBlock::iterator, we're going to insert
+ // each DPValue right at the start of the block, wheras dbg.values would be
+ // repeatedly inserted before the first instruction. To replicate this
+ // behaviour, do it backwards.
+ for (DPValue *DPV : llvm::reverse(DeadDPValues))
+ ExitBlock->insertDPValueBefore(DPV, InsertDbgValueBefore);
}
// Remove the block from the reference counting scheme, so that we can
@@ -937,8 +962,8 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
}
}
-Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
- RecurKind RK, Value *Left, Value *Right) {
+Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
Value *Cmp =
@@ -1028,14 +1053,12 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
-Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
- const TargetTransformInfo *TTI,
- Value *Src,
- const RecurrenceDescriptor &Desc,
- PHINode *OrigPhi) {
- assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
- Desc.getRecurrenceKind()) &&
- "Unexpected reduction kind");
+Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi) {
+ assert(
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&
+ "Unexpected reduction kind");
Value *InitVal = Desc.getRecurrenceStartValue();
Value *NewVal = nullptr;
@@ -1068,9 +1091,8 @@ Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
}
-Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
- const TargetTransformInfo *TTI,
- Value *Src, RecurKind RdxKind) {
+Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
+ RecurKind RdxKind) {
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
case RecurKind::Add:
@@ -1111,7 +1133,6 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
}
Value *llvm::createTargetReduction(IRBuilderBase &B,
- const TargetTransformInfo *TTI,
const RecurrenceDescriptor &Desc, Value *Src,
PHINode *OrigPhi) {
// TODO: Support in-order reductions based on the recurrence descriptor.
@@ -1121,10 +1142,10 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
B.setFastMathFlags(Desc.getFastMathFlags());
RecurKind RK = Desc.getRecurrenceKind();
- if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
- return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
+ return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);
- return createSimpleTargetReduction(B, TTI, Src, RK);
+ return createSimpleTargetReduction(B, Src, RK);
}
Value *llvm::createOrderedReduction(IRBuilderBase &B,
@@ -1453,7 +1474,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
// Note that we must not perform expansions until after
// we query *all* the costs, because if we perform temporary expansion
// inbetween, one that we might not intend to keep, said expansion
- // *may* affect cost calculation of the the next SCEV's we'll query,
+ // *may* affect cost calculation of the next SCEV's we'll query,
// and next SCEV may errneously get smaller cost.
// Collect all the candidate PHINodes to be rewritten.
@@ -1632,42 +1653,92 @@ Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
struct PointerBounds {
TrackingVH<Value> Start;
TrackingVH<Value> End;
+ Value *StrideToCheck;
};
/// Expand code for the lower and upper bound of the pointer group \p CG
/// in \p TheLoop. \return the values for the bounds.
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
Loop *TheLoop, Instruction *Loc,
- SCEVExpander &Exp) {
+ SCEVExpander &Exp, bool HoistRuntimeChecks) {
LLVMContext &Ctx = Loc->getContext();
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
+ Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
Value *Start = nullptr, *End = nullptr;
LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
+
+ // If the Low and High values are themselves loop-variant, then we may want
+ // to expand the range to include those covered by the outer loop as well.
+ // There is a trade-off here with the advantage being that creating checks
+ // using the expanded range permits the runtime memory checks to be hoisted
+ // out of the outer loop. This reduces the cost of entering the inner loop,
+ // which can be significant for low trip counts. The disadvantage is that
+ // there is a chance we may now never enter the vectorized inner loop,
+ // whereas using a restricted range check could have allowed us to enter at
+ // least once. This is why the behaviour is not currently the default and is
+ // controlled by the parameter 'HoistRuntimeChecks'.
+ if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
+ isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
+ auto *HighAR = cast<SCEVAddRecExpr>(High);
+ auto *LowAR = cast<SCEVAddRecExpr>(Low);
+ const Loop *OuterLoop = TheLoop->getParentLoop();
+ const SCEV *Recur = LowAR->getStepRecurrence(*Exp.getSE());
+ if (Recur == HighAR->getStepRecurrence(*Exp.getSE()) &&
+ HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ const SCEV *OuterExitCount =
+ Exp.getSE()->getExitCount(OuterLoop, OuterLoopLatch);
+ if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
+ OuterExitCount->getType()->isIntegerTy()) {
+ const SCEV *NewHigh = cast<SCEVAddRecExpr>(High)->evaluateAtIteration(
+ OuterExitCount, *Exp.getSE());
+ if (!isa<SCEVCouldNotCompute>(NewHigh)) {
+ LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
+ "outer loop in order to permit hoisting\n");
+ High = NewHigh;
+ Low = cast<SCEVAddRecExpr>(Low)->getStart();
+ // If there is a possibility that the stride is negative then we have
+ // to generate extra checks to ensure the stride is positive.
+ if (!Exp.getSE()->isKnownNonNegative(Recur)) {
+ Stride = Recur;
+ LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
+ "positive: "
+ << *Stride << '\n');
+ }
+ }
+ }
+ }
+ }
+
+ Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(High, PtrArithTy, Loc);
if (CG->NeedsFreeze) {
IRBuilder<> Builder(Loc);
Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
End = Builder.CreateFreeze(End, End->getName() + ".fr");
}
- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
- return {Start, End};
+ Value *StrideVal =
+ Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
+ LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
+ return {Start, End, StrideVal};
}
/// Turns a collection of checks into a collection of expanded upper and
/// lower bounds for both pointers in the check.
static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
- Instruction *Loc, SCEVExpander &Exp) {
+ Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
// Here we're relying on the SCEV Expander's cache to only emit code for the
// same bounds once.
transform(PointerChecks, std::back_inserter(ChecksWithBounds),
[&](const RuntimePointerCheck &Check) {
- PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
- Second = expandBounds(Check.second, L, Loc, Exp);
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
+ HoistRuntimeChecks),
+ Second = expandBounds(Check.second, L, Loc, Exp,
+ HoistRuntimeChecks);
return std::make_pair(First, Second);
});
@@ -1677,10 +1748,11 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
Value *llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
- SCEVExpander &Exp) {
+ SCEVExpander &Exp, bool HoistRuntimeChecks) {
// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
- auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
+ auto ExpandedChecks =
+ expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
LLVMContext &Ctx = Loc->getContext();
IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
@@ -1693,21 +1765,13 @@ Value *llvm::addRuntimeChecks(
const PointerBounds &A = Check.first, &B = Check.second;
// Check if two pointers (A and B) conflict where conflict is computed as:
// start(A) <= end(B) && start(B) <= end(A)
- unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
- unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
- assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
- (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ assert((A.Start->getType()->getPointerAddressSpace() ==
+ B.End->getType()->getPointerAddressSpace()) &&
+ (B.Start->getType()->getPointerAddressSpace() ==
+ A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces");
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
-
// [A|B].Start points to the first accessed byte under base [A|B].
// [A|B].End points to the last accessed byte, plus one.
// There is no conflict when the intervals are disjoint:
@@ -1716,9 +1780,21 @@ Value *llvm::addRuntimeChecks(
// bound0 = (B.Start < A.End)
// bound1 = (A.Start < B.End)
// IsConflict = bound0 & bound1
- Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
- Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
+ Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
+ Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ if (A.StrideToCheck) {
+ Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
+ A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
+ "stride.check");
+ IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
+ }
+ if (B.StrideToCheck) {
+ Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
+ B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
+ "stride.check");
+ IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
+ }
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
@@ -1740,23 +1816,31 @@ Value *llvm::addDiffRuntimeChecks(
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
+ auto &SE = *Expander.getSE();
+ // Map to keep track of created compares, The key is the pair of operands for
+ // the compare, to allow detecting and re-using redundant compares.
+ DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &C : Checks) {
Type *Ty = C.SinkStart->getType();
// Compute VF * IC * AccessSize.
auto *VFTimesUFTimesSize =
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
ConstantInt::get(Ty, IC * C.AccessSize));
- Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
- Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
- if (C.NeedsFreeze) {
- IRBuilder<> Builder(Loc);
- Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
- Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
- }
- Value *Diff = ChkBuilder.CreateSub(Sink, Src);
- Value *IsConflict =
- ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+ Value *Diff = Expander.expandCodeFor(
+ SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
+
+ // Check if the same compare has already been created earlier. In that case,
+ // there is no need to check it again.
+ Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
+ if (IsConflict)
+ continue;
+ IsConflict =
+ ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+ SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
+ if (C.NeedsFreeze)
+ IsConflict =
+ ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 78ebe75c121b..548b0f3c55f0 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -145,8 +145,8 @@ void LoopVersioning::addPHINodes(
}
// If not create it.
if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- &PHIBlock->front());
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver");
+ PN->insertBefore(PHIBlock->begin());
SmallVector<User*, 8> UsersToUpdate;
for (User *U : Inst->users())
if (!VersionedLoop->contains(cast<Instruction>(U)->getParent()))
diff --git a/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
index 195c274ff18e..4908535cba54 100644
--- a/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
+++ b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
@@ -128,7 +128,7 @@ static bool runImpl(Module &M) {
// extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
LLVMContext &C = M.getContext();
- PointerType *VoidStar = Type::getInt8PtrTy(C);
+ PointerType *VoidStar = PointerType::getUnqual(C);
Type *AtExitFuncArgs[] = {VoidStar};
FunctionType *AtExitFuncTy =
FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
@@ -140,6 +140,17 @@ static bool runImpl(Module &M) {
{PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar},
/*isVarArg=*/false));
+ // If __cxa_atexit is defined (e.g. in the case of LTO) and arg0 is not
+ // actually used (i.e. it's dummy/stub function as used in emscripten when
+ // the program never exits) we can simply return early and clear out
+ // @llvm.global_dtors.
+ if (auto F = dyn_cast<Function>(AtExit.getCallee())) {
+ if (F && F->hasExactDefinition() && F->getArg(0)->getNumUses() == 0) {
+ GV->eraseFromParent();
+ return true;
+ }
+ }
+
// Declare __dso_local.
Type *DsoHandleTy = Type::getInt8Ty(C);
Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] {
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 906eb71fc2d9..c75de8687879 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -64,17 +64,6 @@ void llvm::createMemCpyLoopKnownSize(
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
- // Cast the Src and Dst pointers to pointers to the loop operand type (if
- // needed).
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
@@ -137,13 +126,9 @@ void llvm::createMemCpyLoopKnownSize(
uint64_t GepIndex = BytesCopied / OperandSize;
assert(GepIndex * OperandSize == BytesCopied &&
"Division should have no Remainder!");
- // Cast source to operand type and load
- PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
- Value *CastedSrc = SrcAddr->getType() == SrcPtrType
- ? SrcAddr
- : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
LoadInst *Load =
RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
if (!CanOverlap) {
@@ -151,13 +136,8 @@ void llvm::createMemCpyLoopKnownSize(
Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
}
- // Cast destination to operand type and store.
- PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
- Value *CastedDst = DstAddr->getType() == DstPtrType
- ? DstAddr
- : RBuilder.CreateBitCast(DstAddr, DstPtrType);
Value *DstGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
DstIsVolatile);
if (!CanOverlap) {
@@ -206,15 +186,6 @@ void llvm::createMemCpyLoopUnknownSize(
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
// Calculate the loop trip count, and remaining bytes to copy after the loop.
Type *CopyLenType = CopyLen->getType();
IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
@@ -305,13 +276,9 @@ void llvm::createMemCpyLoopUnknownSize(
ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
ResidualIndex->addIncoming(Zero, ResHeaderBB);
- Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
- SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
- Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
- DstAddr, PointerType::get(ResLoopOpType, DstAS));
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
- Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
- ResLoopOpType, SrcAsResLoopOpType, FullOffset);
+ Value *SrcGEP =
+ ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset);
LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
PartSrcAlign, SrcIsVolatile);
if (!CanOverlap) {
@@ -319,8 +286,8 @@ void llvm::createMemCpyLoopUnknownSize(
Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
}
- Value *DstGEP = ResBuilder.CreateInBoundsGEP(
- ResLoopOpType, DstAsResLoopOpType, FullOffset);
+ Value *DstGEP =
+ ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset);
StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
DstIsVolatile);
if (!CanOverlap) {
@@ -479,11 +446,6 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
IRBuilder<> Builder(OrigBB->getTerminator());
- // Cast pointer to the type of value getting stored
- unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- DstAddr = Builder.CreateBitCast(DstAddr,
- PointerType::get(SetValue->getType(), dstAS));
-
Builder.CreateCondBr(
Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
LoopBB);
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 44ac65f265f0..fd0112ae529c 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -151,7 +151,7 @@ void MetaRename(Module &M,
auto IsNameExcluded = [](StringRef &Name,
SmallVectorImpl<StringRef> &ExcludedPrefixes) {
return any_of(ExcludedPrefixes,
- [&Name](auto &Prefix) { return Name.startswith(Prefix); });
+ [&Name](auto &Prefix) { return Name.starts_with(Prefix); });
};
// Leave library functions alone because their presence or absence could
@@ -159,7 +159,7 @@ void MetaRename(Module &M,
auto ExcludeLibFuncs = [&](Function &F) {
LibFunc Tmp;
StringRef Name = F.getName();
- return Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ return Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
GetTLI(F).getLibFunc(F, Tmp) ||
IsNameExcluded(Name, ExcludedFuncPrefixes);
};
@@ -177,7 +177,7 @@ void MetaRename(Module &M,
// Rename all aliases
for (GlobalAlias &GA : M.aliases()) {
StringRef Name = GA.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
IsNameExcluded(Name, ExcludedAliasesPrefixes))
continue;
@@ -187,7 +187,7 @@ void MetaRename(Module &M,
// Rename all global variables
for (GlobalVariable &GV : M.globals()) {
StringRef Name = GV.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ if (Name.starts_with("llvm.") || (!Name.empty() && Name[0] == 1) ||
IsNameExcluded(Name, ExcludedGlobalsPrefixes))
continue;
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 1e243ef74df7..7de0959ca57e 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -44,17 +44,17 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
}
GVCtor->eraseFromParent();
} else {
- EltTy = StructType::get(
- IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
- IRB.getInt8PtrTy());
+ EltTy = StructType::get(IRB.getInt32Ty(),
+ PointerType::get(FnTy, F->getAddressSpace()),
+ IRB.getPtrTy());
}
// Build a 3 field global_ctor entry. We don't take a comdat key.
Constant *CSVals[3];
CSVals[0] = IRB.getInt32(Priority);
CSVals[1] = F;
- CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
- : Constant::getNullValue(IRB.getInt8PtrTy());
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
+ : Constant::getNullValue(IRB.getPtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
@@ -96,7 +96,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
if (GV)
GV->eraseFromParent();
- Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
+ Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
for (auto *V : Values)
Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
@@ -301,7 +301,7 @@ std::string llvm::getUniqueModuleId(Module *M) {
MD5 Md5;
bool ExportsSymbols = false;
auto AddGlobal = [&](GlobalValue &GV) {
- if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
!GV.hasExternalLinkage() || GV.hasComdat())
return;
ExportsSymbols = true;
@@ -346,7 +346,8 @@ void VFABI::setVectorVariantNames(CallInst *CI,
#ifndef NDEBUG
for (const std::string &VariantMapping : VariantMappings) {
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
- std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
+ std::optional<VFInfo> VI =
+ VFABI::tryDemangleForVFABI(VariantMapping, CI->getFunctionType());
assert(VI && "Cannot add an invalid VFABI name.");
assert(M->getNamedValue(VI->VectorName) &&
"Cannot add variant to attribute: "
diff --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
index b0ca0b15c08e..a977ad87b79f 100644
--- a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
+++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
@@ -14,7 +14,6 @@
#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -50,7 +49,7 @@ static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
else if (auto *SI = dyn_cast<StoreInst>(&I))
ML = MemoryLocation::get(SI);
else
- assert(false && "memory location set");
+ return std::nullopt;
if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
return ML;
@@ -202,7 +201,7 @@ static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
// if two instructions are moved from the same BB to the same BB, we insert
// the second one in the front, then the first on top of it.
for (auto &Job : reverse(JobList)) {
- Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
+ Job.first->moveBefore(*Job.second, Job.second->getFirstInsertionPt());
MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
MemorySSA::InsertionPlace::Beginning);
}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 1f16ba78bdb0..902977b08d15 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -23,7 +23,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
@@ -33,12 +32,6 @@
using namespace llvm;
using namespace PatternMatch;
-INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
static cl::opt<bool> VerifyPredicateInfo(
"verify-predicateinfo", cl::init(false), cl::Hidden,
cl::desc("Verify PredicateInfo in legacy printer pass."));
@@ -835,20 +828,6 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
void PredicateInfo::verifyPredicateInfo() const {}
-char PredicateInfoPrinterLegacyPass::ID = 0;
-
-PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
- : FunctionPass(ID) {
- initializePredicateInfoPrinterLegacyPassPass(
- *PassRegistry::getPassRegistry());
-}
-
-void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
-}
-
// Replace ssa_copy calls created by PredicateInfo with their operand.
static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
@@ -862,18 +841,6 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
}
}
-bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
- PredInfo->print(dbgs());
- if (VerifyPredicateInfo)
- PredInfo->verifyPredicateInfo();
-
- replaceCreatedSSACopys(*PredInfo, F);
- return false;
-}
-
PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 2e5f40d39912..717b6d301c8c 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugProgramInstruction.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -172,6 +173,7 @@ public:
struct AllocaInfo {
using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
+ using DPUserVec = SmallVector<DPValue *, 1>;
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
@@ -182,6 +184,7 @@ struct AllocaInfo {
/// Debug users of the alloca - does not include dbg.assign intrinsics.
DbgUserVec DbgUsers;
+ DPUserVec DPUsers;
/// Helper to update assignment tracking debug info.
AssignmentTrackingInfo AssignmentTracking;
@@ -192,6 +195,7 @@ struct AllocaInfo {
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
DbgUsers.clear();
+ DPUsers.clear();
AssignmentTracking.clear();
}
@@ -225,7 +229,7 @@ struct AllocaInfo {
}
}
DbgUserVec AllDbgUsers;
- findDbgUsers(AllDbgUsers, AI);
+ findDbgUsers(AllDbgUsers, AI, &DPUsers);
std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(),
std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) {
return !isa<DbgAssignIntrinsic>(DII);
@@ -329,6 +333,7 @@ struct PromoteMem2Reg {
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
+ SmallVector<AllocaInfo::DPUserVec, 8> AllocaDPUsers;
/// For each alloca, keep an instance of a helper class that gives us an easy
/// way to update assignment tracking debug info if the alloca is promoted.
@@ -525,14 +530,18 @@ static bool rewriteSingleStoreAlloca(
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
- if (DII->isAddressOfVariable()) {
- ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
- DII->eraseFromParent();
- } else if (DII->getExpression()->startsWithDeref()) {
- DII->eraseFromParent();
+ auto ConvertDebugInfoForStore = [&](auto &Container) {
+ for (auto *DbgItem : Container) {
+ if (DbgItem->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DbgItem, Info.OnlyStore, DIB);
+ DbgItem->eraseFromParent();
+ } else if (DbgItem->getExpression()->startsWithDeref()) {
+ DbgItem->eraseFromParent();
+ }
}
- }
+ };
+ ConvertDebugInfoForStore(Info.DbgUsers);
+ ConvertDebugInfoForStore(Info.DPUsers);
// Remove dbg.assigns linked to the alloca as these are now redundant.
at::deleteAssignmentMarkers(AI);
@@ -629,12 +638,18 @@ static bool promoteSingleBlockAlloca(
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Update assignment tracking info for the store we're going to delete.
Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete);
+
// Record debuginfo for the store before removing it.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
- if (DII->isAddressOfVariable()) {
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ auto DbgUpdateForStore = [&](auto &Container) {
+ for (auto *DbgItem : Container) {
+ if (DbgItem->isAddressOfVariable()) {
+ ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB);
+ }
}
- }
+ };
+ DbgUpdateForStore(Info.DbgUsers);
+ DbgUpdateForStore(Info.DPUsers);
+
SI->eraseFromParent();
LBI.deleteValue(SI);
}
@@ -644,9 +659,14 @@ static bool promoteSingleBlockAlloca(
AI->eraseFromParent();
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgUsers)
- if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
- DII->eraseFromParent();
+ auto DbgUpdateForAlloca = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ };
+ DbgUpdateForAlloca(Info.DbgUsers);
+ DbgUpdateForAlloca(Info.DPUsers);
++NumLocalPromoted;
return true;
@@ -657,6 +677,7 @@ void PromoteMem2Reg::run() {
AllocaDbgUsers.resize(Allocas.size());
AllocaATInfo.resize(Allocas.size());
+ AllocaDPUsers.resize(Allocas.size());
AllocaInfo Info;
LargeBlockInfo LBI;
@@ -720,6 +741,8 @@ void PromoteMem2Reg::run() {
AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
if (!Info.AssignmentTracking.empty())
AllocaATInfo[AllocaNum] = Info.AssignmentTracking;
+ if (!Info.DPUsers.empty())
+ AllocaDPUsers[AllocaNum] = Info.DPUsers;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -795,11 +818,16 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare intrinsics from the function.
- for (auto &DbgUsers : AllocaDbgUsers) {
- for (auto *DII : DbgUsers)
- if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
- DII->eraseFromParent();
- }
+ auto RemoveDbgDeclares = [&](auto &Container) {
+ for (auto &DbgUsers : Container) {
+ for (auto *DbgItem : DbgUsers)
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ }
+ };
+ RemoveDbgDeclares(AllocaDbgUsers);
+ RemoveDbgDeclares(AllocaDPUsers);
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
@@ -981,8 +1009,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
- Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- &BB->front());
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++));
+ PN->insertBefore(BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
return true;
@@ -1041,9 +1069,13 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
- for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
- if (DII->isAddressOfVariable())
- ConvertDebugDeclareToDebugValue(DII, APN, DIB);
+ auto ConvertDbgDeclares = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DbgItem, APN, DIB);
+ };
+ ConvertDbgDeclares(AllocaDbgUsers[AllocaNo]);
+ ConvertDbgDeclares(AllocaDPUsers[AllocaNo]);
// Get the next phi node.
++PNI;
@@ -1098,9 +1130,13 @@ NextIteration:
IncomingLocs[AllocaNo] = SI->getDebugLoc();
AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB,
&DbgAssignsToDelete);
- for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
- if (DII->isAddressOfVariable())
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ auto ConvertDbgDeclares = [&](auto &Container) {
+ for (auto *DbgItem : Container)
+ if (DbgItem->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DbgItem, SI, DIB);
+ };
+ ConvertDbgDeclares(AllocaDbgUsers[ai->second]);
+ ConvertDbgDeclares(AllocaDPUsers[ai->second]);
SI->eraseFromParent();
}
}
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index c9ff94dc9744..ea628d7c3d7d 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -153,17 +153,12 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
Builder.SetInsertPoint(Load);
Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
&M, Intrinsic::load_relative, {Index->getType()});
- Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy());
// Create a call to load.relative intrinsic that computes the target address
// by adding base address (lookup table address) and relative offset.
- Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset},
+ Value *Result = Builder.CreateCall(LoadRelIntrinsic, {RelLookupTable, Offset},
"reltable.intrinsic");
- // Create a bitcast instruction if necessary.
- if (Load->getType() != Builder.getInt8PtrTy())
- Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast");
-
// Replace load instruction with the new generated instruction sequence.
Load->replaceAllUsesWith(Result);
// Remove Load and GEP instructions.
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index de3626a24212..ab95698abc43 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -107,9 +107,7 @@ bool SCCPSolver::tryToReplaceWithConstant(Value *V) {
static bool refineInstruction(SCCPSolver &Solver,
const SmallPtrSetImpl<Value *> &InsertedValues,
Instruction &Inst) {
- if (!isa<OverflowingBinaryOperator>(Inst))
- return false;
-
+ bool Changed = false;
auto GetRange = [&Solver, &InsertedValues](Value *Op) {
if (auto *Const = dyn_cast<ConstantInt>(Op))
return ConstantRange(Const->getValue());
@@ -120,23 +118,32 @@ static bool refineInstruction(SCCPSolver &Solver,
return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(),
/*UndefAllowed=*/false);
};
- auto RangeA = GetRange(Inst.getOperand(0));
- auto RangeB = GetRange(Inst.getOperand(1));
- bool Changed = false;
- if (!Inst.hasNoUnsignedWrap()) {
- auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::BinaryOps(Inst.getOpcode()), RangeB,
- OverflowingBinaryOperator::NoUnsignedWrap);
- if (NUWRange.contains(RangeA)) {
- Inst.setHasNoUnsignedWrap();
- Changed = true;
+
+ if (isa<OverflowingBinaryOperator>(Inst)) {
+ auto RangeA = GetRange(Inst.getOperand(0));
+ auto RangeB = GetRange(Inst.getOperand(1));
+ if (!Inst.hasNoUnsignedWrap()) {
+ auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB,
+ OverflowingBinaryOperator::NoUnsignedWrap);
+ if (NUWRange.contains(RangeA)) {
+ Inst.setHasNoUnsignedWrap();
+ Changed = true;
+ }
}
- }
- if (!Inst.hasNoSignedWrap()) {
- auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- Instruction::BinaryOps(Inst.getOpcode()), RangeB, OverflowingBinaryOperator::NoSignedWrap);
- if (NSWRange.contains(RangeA)) {
- Inst.setHasNoSignedWrap();
+ if (!Inst.hasNoSignedWrap()) {
+ auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB,
+ OverflowingBinaryOperator::NoSignedWrap);
+ if (NSWRange.contains(RangeA)) {
+ Inst.setHasNoSignedWrap();
+ Changed = true;
+ }
+ }
+ } else if (isa<ZExtInst>(Inst) && !Inst.hasNonNeg()) {
+ auto Range = GetRange(Inst.getOperand(0));
+ if (Range.isAllNonNegative()) {
+ Inst.setNonNeg();
Changed = true;
}
}
@@ -171,6 +178,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
if (InsertedValues.count(Op0) || !isNonNegative(Op0))
return false;
NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst);
+ NewInst->setNonNeg();
break;
}
case Instruction::AShr: {
@@ -179,6 +187,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
if (InsertedValues.count(Op0) || !isNonNegative(Op0))
return false;
NewInst = BinaryOperator::CreateLShr(Op0, Inst.getOperand(1), "", &Inst);
+ NewInst->setIsExact(Inst.isExact());
break;
}
case Instruction::SDiv:
@@ -191,6 +200,8 @@ static bool replaceSignedInst(SCCPSolver &Solver,
auto NewOpcode = Inst.getOpcode() == Instruction::SDiv ? Instruction::UDiv
: Instruction::URem;
NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
+ if (Inst.getOpcode() == Instruction::SDiv)
+ NewInst->setIsExact(Inst.isExact());
break;
}
default:
@@ -1029,8 +1040,9 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- // Unwinding instructions successors are always executable.
- if (TI.isExceptionalTerminator()) {
+ // We cannot analyze special terminators, so consider all successors
+ // executable.
+ if (TI.isSpecialTerminator()) {
Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -1098,13 +1110,6 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- // In case of callbr, we pessimistically assume that all successors are
- // feasible.
- if (isa<CallBrInst>(&TI)) {
- Succs.assign(TI.getNumSuccessors(), true);
- return;
- }
-
LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
@@ -1231,10 +1236,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (Constant *OpC = getConstant(OpSt, I.getOperand(0)->getType())) {
// Fold the constant as we build.
- Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
- markConstant(&I, C);
- } else if (I.getDestTy()->isIntegerTy() &&
- I.getSrcTy()->isIntOrIntVectorTy()) {
+ if (Constant *C =
+ ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL))
+ return (void)markConstant(&I, C);
+ }
+
+ if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy()) {
auto &LV = getValueState(&I);
ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy());
@@ -1539,11 +1546,8 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
return (void)markOverdefined(&I);
}
- Constant *Ptr = Operands[0];
- auto Indices = ArrayRef(Operands.begin() + 1, Operands.end());
- Constant *C =
- ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
- markConstant(&I, C);
+ if (Constant *C = ConstantFoldInstOperands(&I, Operands, DL))
+ markConstant(&I, C);
}
void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index ebe9cb27f5ab..fc21fb552137 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -156,8 +156,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
- ProtoName, &BB->front());
+ PHINode *InsertedPHI =
+ PHINode::Create(ProtoType, PredValues.size(), ProtoName);
+ InsertedPHI->insertBefore(BB->begin());
// Fill in all the predecessors of the PHI.
for (const auto &PredValue : PredValues)
@@ -198,12 +199,18 @@ void SSAUpdater::RewriteUse(Use &U) {
void SSAUpdater::UpdateDebugValues(Instruction *I) {
SmallVector<DbgValueInst *, 4> DbgValues;
- llvm::findDbgValues(DbgValues, I);
+ SmallVector<DPValue *, 4> DPValues;
+ llvm::findDbgValues(DbgValues, I, &DPValues);
for (auto &DbgValue : DbgValues) {
if (DbgValue->getParent() == I->getParent())
continue;
UpdateDebugValue(I, DbgValue);
}
+ for (auto &DPV : DPValues) {
+ if (DPV->getParent() == I->getParent())
+ continue;
+ UpdateDebugValue(I, DPV);
+ }
}
void SSAUpdater::UpdateDebugValues(Instruction *I,
@@ -213,16 +220,31 @@ void SSAUpdater::UpdateDebugValues(Instruction *I,
}
}
+void SSAUpdater::UpdateDebugValues(Instruction *I,
+ SmallVectorImpl<DPValue *> &DPValues) {
+ for (auto &DPV : DPValues) {
+ UpdateDebugValue(I, DPV);
+ }
+}
+
void SSAUpdater::UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue) {
BasicBlock *UserBB = DbgValue->getParent();
if (HasValueForBlock(UserBB)) {
Value *NewVal = GetValueAtEndOfBlock(UserBB);
DbgValue->replaceVariableLocationOp(I, NewVal);
- }
- else
+ } else
DbgValue->setKillLocation();
}
+void SSAUpdater::UpdateDebugValue(Instruction *I, DPValue *DPV) {
+ BasicBlock *UserBB = DPV->getParent();
+ if (HasValueForBlock(UserBB)) {
+ Value *NewVal = GetValueAtEndOfBlock(UserBB);
+ DPV->replaceVariableLocationOp(I, NewVal);
+ } else
+ DPV->setKillLocation();
+}
+
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -295,8 +317,9 @@ public:
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
- Updater->ProtoName, &BB->front());
+ PHINode *PHI =
+ PHINode::Create(Updater->ProtoType, NumPreds, Updater->ProtoName);
+ PHI->insertBefore(BB->begin());
return PHI;
}
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 31d62fbf0618..101b70d8def4 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -159,7 +159,7 @@ public:
/// Get the total flow from a given source node.
/// Returns a list of pairs (target node, amount of flow to the target).
- const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
+ std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
std::vector<std::pair<uint64_t, int64_t>> Flow;
for (const auto &Edge : Edges[Src]) {
if (Edge.Flow > 0)
diff --git a/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/llvm/lib/Transforms/Utils/SanitizerStats.cpp
index fd21ee4cc408..b80c5a6f9d68 100644
--- a/llvm/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/llvm/lib/Transforms/Utils/SanitizerStats.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
- StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
+ StatTy = ArrayType::get(PointerType::getUnqual(M->getContext()), 2);
EmptyModuleStatsTy = makeModuleStatsTy();
ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
@@ -33,28 +33,28 @@ ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
}
StructType *SanitizerStatReport::makeModuleStatsTy() {
- return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
- Type::getInt32Ty(M->getContext()),
- makeModuleStatsArrayTy()});
+ return StructType::get(M->getContext(),
+ {PointerType::getUnqual(M->getContext()),
+ Type::getInt32Ty(M->getContext()),
+ makeModuleStatsArrayTy()});
}
void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
Function *F = B.GetInsertBlock()->getParent();
Module *M = F->getParent();
- PointerType *Int8PtrTy = B.getInt8PtrTy();
+ PointerType *PtrTy = B.getPtrTy();
IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
- ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
+ ArrayType *StatTy = ArrayType::get(PtrTy, 2);
Inits.push_back(ConstantArray::get(
StatTy,
- {Constant::getNullValue(Int8PtrTy),
+ {Constant::getNullValue(PtrTy),
ConstantExpr::getIntToPtr(
ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
kSanitizerStatKindBits)),
- Int8PtrTy)}));
+ PtrTy)}));
- FunctionType *StatReportTy =
- FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
+ FunctionType *StatReportTy = FunctionType::get(B.getVoidTy(), PtrTy, false);
FunctionCallee StatReport =
M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
@@ -64,7 +64,7 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
ConstantInt::get(IntPtrTy, Inits.size() - 1),
});
- B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
+ B.CreateCall(StatReport, InitAddr);
}
void SanitizerStatReport::finish() {
@@ -73,7 +73,7 @@ void SanitizerStatReport::finish() {
return;
}
- PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ PointerType *Int8PtrTy = PointerType::getUnqual(M->getContext());
IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
Type *VoidTy = Type::getVoidTy(M->getContext());
@@ -85,8 +85,7 @@ void SanitizerStatReport::finish() {
{Constant::getNullValue(Int8PtrTy),
ConstantInt::get(Int32Ty, Inits.size()),
ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
- ModuleStatsGV->replaceAllUsesWith(
- ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
+ ModuleStatsGV->replaceAllUsesWith(NewModuleStatsGV);
ModuleStatsGV->eraseFromParent();
// Create a global constructor to register NewModuleStatsGV.
@@ -99,7 +98,7 @@ void SanitizerStatReport::finish() {
FunctionCallee StatInit =
M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
- B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
+ B.CreateCall(StatInit, NewModuleStatsGV);
B.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 20844271b943..cd3ac317cd23 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -170,11 +170,10 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
if (Op == Instruction::IntToPtr) {
auto *PtrTy = cast<PointerType>(Ty);
if (DL.isNonIntegralPointerType(PtrTy)) {
- auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
"alloc size of i8 must by 1 byte for the GEP to be correct");
return Builder.CreateGEP(
- Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "scevgep");
+ Builder.getInt8Ty(), Constant::getNullValue(PtrTy), V, "scevgep");
}
}
// Short-circuit unnecessary bitcasts.
@@ -313,11 +312,11 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// loop-invariant portions of expressions, after considering what
/// can be folded using target addressing modes.
///
-Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) {
+Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
assert(!isa<Instruction>(V) ||
SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
- Value *Idx = expandCodeForImpl(Offset, Ty);
+ Value *Idx = expand(Offset);
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
@@ -339,7 +338,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) {
if (IP->getOpcode() == Instruction::GetElementPtr &&
IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Type::getInt8Ty(Ty->getContext()))
+ Builder.getInt8Ty())
return &*IP;
if (IP == BlockBegin) break;
}
@@ -457,8 +456,6 @@ public:
}
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
-
// Collect all the add operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal, and
// so that pointer operands are inserted first, which the code below relies on
@@ -498,20 +495,19 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
X = SE.getSCEV(U->getValue());
NewOps.push_back(X);
}
- Sum = expandAddToGEP(SE.getAddExpr(NewOps), Ty, Sum);
+ Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
- Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty);
- Sum = InsertNoopCastOfTo(Sum, Ty);
+ Value *W = expand(SE.getNegativeSCEV(Op));
Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
- Value *W = expandCodeForImpl(Op, Ty);
- Sum = InsertNoopCastOfTo(Sum, Ty);
+ Value *W = expand(Op);
// Canonicalize a constant to the RHS.
- if (isa<Constant>(Sum)) std::swap(Sum, W);
+ if (isa<Constant>(Sum))
+ std::swap(Sum, W);
Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
/*IsSafeToHoist*/ true);
++I;
@@ -522,7 +518,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
+ Type *Ty = S->getType();
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
@@ -541,7 +537,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Expand the calculation of X pow N in the following manner:
// Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
// X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
- const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+ const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
auto E = I;
// Calculate how many times the same operand from the same loop is included
// into this power.
@@ -559,7 +555,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
// that are needed into the result.
- Value *P = expandCodeForImpl(I->second, Ty);
+ Value *P = expand(I->second);
Value *Result = nullptr;
if (Exponent & 1)
Result = P;
@@ -584,14 +580,12 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
Prod = ExpandOpBinPowN();
} else if (I->second->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
- Prod = InsertNoopCastOfTo(Prod, Ty);
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
++I;
} else {
// A simple mul.
Value *W = ExpandOpBinPowN();
- Prod = InsertNoopCastOfTo(Prod, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Prod)) std::swap(Prod, W);
const APInt *RHS;
@@ -616,18 +610,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
-
- Value *LHS = expandCodeForImpl(S->getLHS(), Ty);
+ Value *LHS = expand(S->getLHS());
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
- ConstantInt::get(Ty, RHS.logBase2()),
+ ConstantInt::get(SC->getType(), RHS.logBase2()),
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
- Value *RHS = expandCodeForImpl(S->getRHS(), Ty);
+ Value *RHS = expand(S->getRHS());
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
@@ -803,12 +795,11 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
/// need to materialize IV increments elsewhere to handle difficult situations.
Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
- Type *ExpandTy, Type *IntTy,
bool useSubtract) {
Value *IncV;
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
- if (ExpandTy->isPointerTy()) {
- IncV = expandAddToGEP(SE.getSCEV(StepV), IntTy, PN);
+ if (PN->getType()->isPointerTy()) {
+ IncV = expandAddToGEP(SE.getSCEV(StepV), PN);
} else {
IncV = useSubtract ?
Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
@@ -824,12 +815,11 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Requested,
bool &InvertStep) {
// We can't transform to match a pointer PHI.
- if (Phi->getType()->isPointerTy())
+ Type *PhiTy = Phi->getType();
+ Type *RequestedTy = Requested->getType();
+ if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
return false;
- Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
- Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
-
if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
return false;
@@ -886,12 +876,10 @@ static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
/// values, and return the PHI.
PHINode *
SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
- const Loop *L,
- Type *ExpandTy,
- Type *IntTy,
- Type *&TruncTy,
+ const Loop *L, Type *&TruncTy,
bool &InvertStep) {
- assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+ assert((!IVIncInsertLoop || IVIncInsertPos) &&
+ "Uninitialized insert position");
// Reuse a previously-inserted PHI, if present.
BasicBlock *LatchBlock = L->getLoopLatch();
@@ -962,7 +950,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// later.
AddRecPhiMatch = &PN;
IncV = TempIncV;
- TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+ TruncTy = Normalized->getType();
}
}
@@ -996,8 +984,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
assert(L->getLoopPreheader() &&
"Can't expand add recurrences without a loop preheader!");
Value *StartV =
- expandCodeForImpl(Normalized->getStart(), ExpandTy,
- L->getLoopPreheader()->getTerminator());
+ expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
// StartV must have been be inserted into L's preheader to dominate the new
// phi.
@@ -1008,6 +995,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand code for the step value. Do this before creating the PHI so that PHI
// reuse code doesn't see an incomplete PHI.
const SCEV *Step = Normalized->getStepRecurrence(SE);
+ Type *ExpandTy = Normalized->getType();
// If the stride is negative, insert a sub instead of an add for the increment
// (unless it's a constant, because subtracts of constants are canonicalized
// to adds).
@@ -1015,8 +1003,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeForImpl(
- Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+ Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1047,7 +1034,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
Instruction *InsertPos = L == IVIncInsertLoop ?
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
- Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
if (isa<OverflowingBinaryOperator>(IncV)) {
if (IncrementIsNUW)
@@ -1070,8 +1057,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
- Type *STy = S->getType();
- Type *IntTy = SE.getEffectiveSCEVType(STy);
const Loop *L = S->getLoop();
// Determine a normalized form of this expression, which is the expression
@@ -1084,51 +1069,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
}
- // Strip off any non-loop-dominating component from the addrec start.
- const SCEV *Start = Normalized->getStart();
- const SCEV *PostLoopOffset = nullptr;
- if (!SE.properlyDominates(Start, L->getHeader())) {
- PostLoopOffset = Start;
- Start = SE.getConstant(Normalized->getType(), 0);
- Normalized = cast<SCEVAddRecExpr>(
- SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
- Normalized->getLoop(),
- Normalized->getNoWrapFlags(SCEV::FlagNW)));
- }
-
- // Strip off any non-loop-dominating component from the addrec step.
+ [[maybe_unused]] const SCEV *Start = Normalized->getStart();
const SCEV *Step = Normalized->getStepRecurrence(SE);
- const SCEV *PostLoopScale = nullptr;
- if (!SE.dominates(Step, L->getHeader())) {
- PostLoopScale = Step;
- Step = SE.getConstant(Normalized->getType(), 1);
- if (!Start->isZero()) {
- // The normalization below assumes that Start is constant zero, so if
- // it isn't re-associate Start to PostLoopOffset.
- assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?");
- PostLoopOffset = Start;
- Start = SE.getConstant(Normalized->getType(), 0);
- }
- Normalized =
- cast<SCEVAddRecExpr>(SE.getAddRecExpr(
- Start, Step, Normalized->getLoop(),
- Normalized->getNoWrapFlags(SCEV::FlagNW)));
- }
-
- // Expand the core addrec. If we need post-loop scaling, force it to
- // expand to an integer type to avoid the need for additional casting.
- Type *ExpandTy = PostLoopScale ? IntTy : STy;
- // We can't use a pointer type for the addrec if the pointer type is
- // non-integral.
- Type *AddRecPHIExpandTy =
- DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+ assert(SE.properlyDominates(Start, L->getHeader()) &&
+ "Start does not properly dominate loop header");
+ assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
// In some cases, we decide to reuse an existing phi node but need to truncate
// it and/or invert the step.
Type *TruncTy = nullptr;
bool InvertStep = false;
- PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
- IntTy, TruncTy, InvertStep);
+ PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
// Accommodate post-inc mode, if necessary.
Value *Result;
@@ -1167,59 +1118,29 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// inserting an extra IV increment. StepV might fold into PostLoopOffset,
// but hopefully expandCodeFor handles that.
bool useSubtract =
- !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+ !S->getType()->isPointerTy() && Step->isNonConstantNegative();
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
Value *StepV;
{
// Expand the step somewhere that dominates the loop header.
SCEVInsertPointGuard Guard(Builder, this);
- StepV = expandCodeForImpl(
- Step, IntTy, &*L->getHeader()->getFirstInsertionPt());
+ StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
}
- Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ Result = expandIVInc(PN, StepV, L, useSubtract);
}
}
// We have decided to reuse an induction variable of a dominating loop. Apply
// truncation and/or inversion of the step.
if (TruncTy) {
- Type *ResTy = Result->getType();
- // Normalize the result type.
- if (ResTy != SE.getEffectiveSCEVType(ResTy))
- Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
// Truncate the result.
if (TruncTy != Result->getType())
Result = Builder.CreateTrunc(Result, TruncTy);
// Invert the result.
if (InvertStep)
- Result = Builder.CreateSub(
- expandCodeForImpl(Normalized->getStart(), TruncTy), Result);
- }
-
- // Re-apply any non-loop-dominating scale.
- if (PostLoopScale) {
- assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
- Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateMul(Result,
- expandCodeForImpl(PostLoopScale, IntTy));
- }
-
- // Re-apply any non-loop-dominating offset.
- if (PostLoopOffset) {
- if (isa<PointerType>(ExpandTy)) {
- if (Result->getType()->isIntegerTy()) {
- Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy);
- Result = expandAddToGEP(SE.getUnknown(Result), IntTy, Base);
- } else {
- Result = expandAddToGEP(PostLoopOffset, IntTy, Result);
- }
- } else {
- Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateAdd(
- Result, expandCodeForImpl(PostLoopOffset, IntTy));
- }
+ Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
}
return Result;
@@ -1260,8 +1181,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
- V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- &*NewInsertPt);
+ V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
return V;
}
@@ -1269,7 +1189,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (!S->getStart()->isZero()) {
if (isa<PointerType>(S->getType())) {
Value *StartV = expand(SE.getPointerBase(S));
- return expandAddToGEP(SE.removePointerBase(S), Ty, StartV);
+ return expandAddToGEP(SE.removePointerBase(S), StartV);
}
SmallVector<const SCEV *, 4> NewOps(S->operands());
@@ -1292,8 +1212,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// specified loop.
BasicBlock *Header = L->getHeader();
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
- CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
- &Header->front());
+ CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
+ CanonicalIV->insertBefore(Header->begin());
rememberInstruction(CanonicalIV);
SmallSet<BasicBlock *, 4> PredSeen;
@@ -1361,34 +1281,25 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
- Value *V =
- expandCodeForImpl(S->getOperand(), S->getOperand()->getType());
+ Value *V = expand(S->getOperand());
return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
GetOptimalInsertionPointForCastOf(V));
}
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateTrunc(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateTrunc(V, S->getType());
}
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateZExt(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateZExt(V, S->getType(), "",
+ SE.isKnownNonNegative(S->getOperand()));
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
- Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeForImpl(
- S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())
- );
- return Builder.CreateSExt(V, Ty);
+ Value *V = expand(S->getOperand());
+ return Builder.CreateSExt(V, S->getType());
}
Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
@@ -1399,7 +1310,7 @@ Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
if (IsSequential)
LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
- Value *RHS = expandCodeForImpl(S->getOperand(i), Ty);
+ Value *RHS = expand(S->getOperand(i));
if (IsSequential && i != 0)
RHS = Builder.CreateFreeze(RHS);
Value *Sel;
@@ -1440,14 +1351,14 @@ Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
}
-Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
- Instruction *IP) {
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
+ BasicBlock::iterator IP) {
setInsertPoint(IP);
- Value *V = expandCodeForImpl(SH, Ty);
+ Value *V = expandCodeFor(SH, Ty);
return V;
}
-Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
// Expand the code for this SCEV.
Value *V = expand(SH);
@@ -1459,8 +1370,64 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty) {
return V;
}
-Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
- const Instruction *InsertPt) {
+static bool
+canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I,
+ SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
+ // If the instruction cannot be poison, it's always safe to reuse.
+ if (programUndefinedIfPoison(I))
+ return true;
+
+ // Otherwise, it is possible that I is more poisonous that S. Collect the
+ // poison-contributors of S, and then check whether I has any additional
+ // poison-contributors. Poison that is contributed through poison-generating
+ // flags is handled by dropping those flags instead.
+ SmallPtrSet<const Value *, 8> PoisonVals;
+ SE.getPoisonGeneratingValues(PoisonVals, S);
+
+ SmallVector<Value *> Worklist;
+ SmallPtrSet<Value *, 8> Visited;
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ // Avoid walking large instruction graphs.
+ if (Visited.size() > 16)
+ return false;
+
+ // Either the value can't be poison, or the S would also be poison if it
+ // is.
+ if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
+ continue;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // FIXME: Ignore vscale, even though it technically could be poison. Do this
+ // because SCEV currently assumes it can't be poison. Remove this special
+ // case once we proper model when vscale can be poison.
+ if (auto *II = dyn_cast<IntrinsicInst>(I);
+ II && II->getIntrinsicID() == Intrinsic::vscale)
+ continue;
+
+ if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false))
+ return false;
+
+ // If the instruction can't create poison, we can recurse to its operands.
+ if (I->hasPoisonGeneratingFlagsOrMetadata())
+ DropPoisonGeneratingInsts.push_back(I);
+
+ for (Value *Op : I->operands())
+ Worklist.push_back(Op);
+ }
+ return true;
+}
+
+Value *SCEVExpander::FindValueInExprValueMap(
+ const SCEV *S, const Instruction *InsertPt,
+ SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
if (!CanonicalMode && SE.containsAddRecurrence(S))
@@ -1470,20 +1437,24 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (isa<SCEVConstant>(S))
return nullptr;
- // Choose a Value from the set which dominates the InsertPt.
- // InsertPt should be inside the Value's parent loop so as not to break
- // the LCSSA form.
for (Value *V : SE.getSCEVValues(S)) {
Instruction *EntInst = dyn_cast<Instruction>(V);
if (!EntInst)
continue;
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
assert(EntInst->getFunction() == InsertPt->getFunction());
- if (S->getType() == V->getType() &&
- SE.DT.dominates(EntInst, InsertPt) &&
- (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
+ !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ continue;
+
+ // Make sure reusing the instruction is poison-safe.
+ if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts))
return V;
+ DropPoisonGeneratingInsts.clear();
}
return nullptr;
}
@@ -1497,7 +1468,7 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
- Instruction *InsertPt = &*Builder.GetInsertPoint();
+ BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
// We can move insertion point only if there is no div or rem operations
// otherwise we are risky to move it over the check for zero denominator.
@@ -1521,24 +1492,25 @@ Value *SCEVExpander::expand(const SCEV *S) {
L = L->getParentLoop()) {
if (SE.isLoopInvariant(S, L)) {
if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
- InsertPt = Preheader->getTerminator();
- else
+ if (BasicBlock *Preheader = L->getLoopPreheader()) {
+ InsertPt = Preheader->getTerminator()->getIterator();
+ } else {
// LSR sets the insertion point for AddRec start/step values to the
// block start to simplify value reuse, even though it's an invalid
// position. SCEVExpander must correct for this in all cases.
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ InsertPt = L->getHeader()->getFirstInsertionPt();
+ }
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ InsertPt = L->getHeader()->getFirstInsertionPt();
- while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
- (isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = &*std::next(InsertPt->getIterator());
+ while (InsertPt != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(&*InsertPt) ||
+ isa<DbgInfoIntrinsic>(&*InsertPt))) {
+ InsertPt = std::next(InsertPt);
}
break;
}
@@ -1546,26 +1518,40 @@ Value *SCEVExpander::expand(const SCEV *S) {
}
// Check to see if we already expanded this here.
- auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+ auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
if (I != InsertedExpressions.end())
return I->second;
SCEVInsertPointGuard Guard(Builder, this);
- Builder.SetInsertPoint(InsertPt);
+ Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
// Expand the expression into instructions.
- Value *V = FindValueInExprValueMap(S, InsertPt);
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
if (!V) {
V = visit(S);
V = fixupLCSSAFormFor(V);
} else {
- // If we're reusing an existing instruction, we are effectively CSEing two
- // copies of the instruction (with potentially different flags). As such,
- // we need to drop any poison generating flags unless we can prove that
- // said flags must be valid for all new users.
- if (auto *I = dyn_cast<Instruction>(V))
- if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
- I->dropPoisonGeneratingFlags();
+ for (Instruction *I : DropPoisonGeneratingInsts) {
+ I->dropPoisonGeneratingFlagsAndMetadata();
+ // See if we can re-infer from first principles any of the flags we just
+ // dropped.
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+ if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
+ auto *BO = cast<BinaryOperator>(I);
+ BO->setHasNoUnsignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(
+ ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
+ }
+ if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
+ auto *Src = NNI->getOperand(0);
+ if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+ Constant::getNullValue(Src->getType()), I,
+ DL).value_or(false))
+ NNI->setNonNeg(true);
+ }
+ }
}
// Remember the expanded value for this SCEV at this location.
//
@@ -1573,7 +1559,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// the expression at this insertion point. If the mapped value happened to be
// a postinc expansion, it could be reused by a non-postinc user, but only if
// its insertion point was already at the head of the loop.
- InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+ InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
return V;
}
@@ -1710,13 +1696,13 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
- Instruction *IP = nullptr;
+ BasicBlock::iterator IP;
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
- IP = &*PN->getParent()->getFirstInsertionPt();
+ IP = PN->getParent()->getFirstInsertionPt();
else
- IP = OrigInc->getNextNode();
+ IP = OrigInc->getNextNonDebugInstruction()->getIterator();
- IRBuilder<> Builder(IP);
+ IRBuilder<> Builder(IP->getParent(), IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.CreateTruncOrBitCast(
OrigInc, IsomorphicInc->getType(), IVName);
@@ -1734,7 +1720,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
- IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
+ IRBuilder<> Builder(L->getHeader(),
+ L->getHeader()->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
}
@@ -1744,9 +1731,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
- const Instruction *At,
- Loop *L) {
+bool SCEVExpander::hasRelatedExistingExpansion(const SCEV *S,
+ const Instruction *At,
+ Loop *L) {
using namespace llvm::PatternMatch;
SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -1763,17 +1750,18 @@ Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
- return LHS;
+ return true;
if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
- return RHS;
+ return true;
}
// Use expand's logic which is used for reusing a previous Value in
// ExprValueMap. Note that we don't currently model the cost of
// needing to drop poison generating flags on the instruction if we
// want to reuse it. We effectively assume that has zero cost.
- return FindValueInExprValueMap(S, At);
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
}
template<typename T> static InstructionCost costAndCollectOperands(
@@ -1951,7 +1939,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// If we can find an existing value for this scev available at the point "At"
// then consider the expression cheap.
- if (getRelatedExistingExpansion(S, &At, L))
+ if (hasRelatedExistingExpansion(S, &At, L))
return false; // Consider the expression to be free.
TargetTransformInfo::TargetCostKind CostKind =
@@ -1993,7 +1981,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// At the beginning of this function we already tried to find existing
// value for plain 'S'. Now try to lookup 'S + 1' since it is common
// pattern involving division. This is just a simple search heuristic.
- if (getRelatedExistingExpansion(
+ if (hasRelatedExistingExpansion(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
return false; // Consider it to be free.
@@ -2045,10 +2033,8 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
Instruction *IP) {
- Value *Expr0 =
- expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP);
- Value *Expr1 =
- expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+ Value *Expr0 = expand(Pred->getLHS(), IP);
+ Value *Expr1 = expand(Pred->getRHS(), IP);
Builder.SetInsertPoint(IP);
auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
@@ -2080,17 +2066,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Step >= 0, Start + |Step| * Backedge > Start
// and |Step| * Backedge doesn't unsigned overflow.
- IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
Builder.SetInsertPoint(Loc);
- Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc);
+ Value *TripCountVal = expand(ExitCount, Loc);
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
- Value *StepValue = expandCodeForImpl(Step, Ty, Loc);
- Value *NegStepValue =
- expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeForImpl(Start, ARTy, Loc);
+ Value *StepValue = expand(Step, Loc);
+ Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
+ Value *StartValue = expand(Start, Loc);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
@@ -2136,9 +2120,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
- if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
- StartValue = InsertNoopCastOfTo(
- StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ if (isa<PointerType>(ARTy)) {
Value *NegMulV = Builder.CreateNeg(MulV);
if (NeedPosCheck)
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
@@ -2171,7 +2153,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are
// dropping bits, then we have overflow (unless the step is zero).
- if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) {
+ if (SrcBits > DstBits) {
auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
auto *BackedgeCheck =
Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
@@ -2244,7 +2226,7 @@ Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
// instruction.
Type *ToTy;
if (DefI->getType()->isIntegerTy())
- ToTy = DefI->getType()->getPointerTo();
+ ToTy = PointerType::get(DefI->getContext(), 0);
else
ToTy = Type::getInt32Ty(DefI->getContext());
Instruction *User =
@@ -2306,12 +2288,6 @@ struct SCEVFindUnsafe {
}
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- const SCEV *Step = AR->getStepRecurrence(SE);
- if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
- IsUnsafe = true;
- return false;
- }
-
// For non-affine addrecs or in non-canonical mode we need a preheader
// to insert into.
if (!AR->getLoop()->getLoopPreheader() &&
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d3a9a41aef15..c09cf9c2325c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -271,7 +271,10 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
- bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly);
+ bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
+ bool hoistSuccIdenticalTerminatorToSwitchOrIf(
+ Instruction *TI, Instruction *I1,
+ SmallVectorImpl<Instruction *> &OtherSuccTIs);
bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
@@ -499,7 +502,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
return CI;
else
return cast<ConstantInt>(
- ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
}
return nullptr;
}
@@ -819,7 +822,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
static void
EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- llvm::erase_value(Cases, BB);
+ llvm::erase(Cases, BB);
}
/// Return true if there are any keys in C1 that exist in C2 as well.
@@ -1098,12 +1101,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Note that there may be multiple predecessor blocks, so we cannot move
// bonus instructions to a predecessor block.
for (Instruction &BonusInst : *BB) {
- if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
+ if (BonusInst.isTerminator())
continue;
Instruction *NewBonusInst = BonusInst.clone();
- if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
+ if (!isa<DbgInfoIntrinsic>(BonusInst) &&
+ PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
// Unless the instruction has the same !dbg location as the original
// branch, drop it. When we fold the bonus instructions we want to make
// sure we reset their debug locations in order to avoid stepping on
@@ -1113,7 +1117,6 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
RemapInstruction(NewBonusInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- VMap[&BonusInst] = NewBonusInst;
// If we speculated an instruction, we need to drop any metadata that may
// result in undefined behavior, as the metadata might have been valid
@@ -1123,8 +1126,16 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
NewBonusInst->dropUBImplyingAttrsAndMetadata();
NewBonusInst->insertInto(PredBlock, PTI->getIterator());
+ auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
+ RemapDPValueRange(NewBonusInst->getModule(), Range, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ if (isa<DbgInfoIntrinsic>(BonusInst))
+ continue;
+
NewBonusInst->takeName(&BonusInst);
BonusInst.setName(NewBonusInst->getName() + ".old");
+ VMap[&BonusInst] = NewBonusInst;
// Update (liveout) uses of bonus instructions,
// now that the bonus instruction has been cloned into predecessor.
@@ -1303,7 +1314,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
}
for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
NewSuccessors) {
- for (auto I : seq(0, NewSuccessor.second)) {
+ for (auto I : seq(NewSuccessor.second)) {
(void)I;
AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
}
@@ -1408,8 +1419,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
}
// If we would need to insert a select that uses the value of this invoke
-// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
-// can't hoist the invoke, as there is nowhere to put the select in this case.
+// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
+// need to do this), we can't hoist the invoke, as there is nowhere to put the
+// select in this case.
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
Instruction *I1, Instruction *I2) {
for (BasicBlock *Succ : successors(BB1)) {
@@ -1424,9 +1436,9 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
return true;
}
-// Get interesting characteristics of instructions that `HoistThenElseCodeToIf`
-// didn't hoist. They restrict what kind of instructions can be reordered
-// across.
+// Get interesting characteristics of instructions that
+// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
+// instructions can be reordered across.
enum SkipFlags {
SkipReadMem = 1,
SkipSideEffect = 2,
@@ -1484,7 +1496,7 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
-/// Helper function for HoistThenElseCodeToIf. Return true if identical
+/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
/// instructions \p I1 and \p I2 can and should be hoisted.
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
const TargetTransformInfo &TTI) {
@@ -1515,62 +1527,51 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
return true;
}
-/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
-/// in the two blocks up into the branch block. The caller of this function
-/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
-/// only perform hoisting in case both blocks only contain a terminator. In that
-/// case, only the original BI will be replaced and selects for PHIs are added.
-bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) {
+/// Hoist any common code in the successor blocks up into the block. This
+/// function guarantees that BB dominates all successors. If EqTermsOnly is
+/// given, only perform hoisting in case both blocks only contain a terminator.
+/// In that case, only the original BI will be replaced and selects for PHIs are
+/// added.
+bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
+ bool EqTermsOnly) {
// This does very trivial matching, with limited scanning, to find identical
- // instructions in the two blocks. In particular, we don't want to get into
- // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
// such, we currently just scan for obviously identical instructions in an
// identical order, possibly separated by the same number of non-identical
// instructions.
- BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
- BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+ unsigned int SuccSize = succ_size(BB);
+ if (SuccSize < 2)
+ return false;
// If either of the blocks has it's address taken, then we can't do this fold,
// because the code we'd hoist would no longer run when we jump into the block
// by it's address.
- if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
- return false;
+ for (auto *Succ : successors(BB))
+ if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
+ return false;
- BasicBlock::iterator BB1_Itr = BB1->begin();
- BasicBlock::iterator BB2_Itr = BB2->begin();
+ auto *TI = BB->getTerminator();
- Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
+ // The second of pair is a SkipFlags bitmask.
+ using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
+ SmallVector<SuccIterPair, 8> SuccIterPairs;
+ for (auto *Succ : successors(BB)) {
+ BasicBlock::iterator SuccItr = Succ->begin();
+ if (isa<PHINode>(*SuccItr))
+ return false;
+ SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
}
- if (isa<PHINode>(I1))
- return false;
-
- BasicBlock *BIParent = BI->getParent();
-
- bool Changed = false;
-
- auto _ = make_scope_exit([&]() {
- if (Changed)
- ++NumHoistCommonCode;
- });
// Check if only hoisting terminators is allowed. This does not add new
// instructions to the hoist location.
if (EqTermsOnly) {
// Skip any debug intrinsics, as they are free to hoist.
- auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
- auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
- if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
- return false;
- if (!I1NonDbg->isTerminator())
- return false;
+ for (auto &SuccIter : make_first_range(SuccIterPairs)) {
+ auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
+ if (!INonDbg->isTerminator())
+ return false;
+ }
// Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
}
@@ -1579,153 +1580,235 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) {
// many instructions we skip, serving as a compilation time control as well as
// preventing excessive increase of life ranges.
unsigned NumSkipped = 0;
+ // If we find an unreachable instruction at the beginning of a basic block, we
+ // can still hoist instructions from the rest of the basic blocks.
+ if (SuccIterPairs.size() > 2) {
+ erase_if(SuccIterPairs,
+ [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
+ if (SuccIterPairs.size() < 2)
+ return false;
+ }
- // Record any skipped instuctions that may read memory, write memory or have
- // side effects, or have implicit control flow.
- unsigned SkipFlagsBB1 = 0;
- unsigned SkipFlagsBB2 = 0;
+ bool Changed = false;
for (;;) {
+ auto *SuccIterPairBegin = SuccIterPairs.begin();
+ auto &BB1ItrPair = *SuccIterPairBegin++;
+ auto OtherSuccIterPairRange =
+ iterator_range(SuccIterPairBegin, SuccIterPairs.end());
+ auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
+
+ Instruction *I1 = &*BB1ItrPair.first;
+ auto *BB1 = I1->getParent();
+
+ // Skip debug info if it is not identical.
+ bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
+ Instruction *I2 = &*Iter;
+ return I1->isIdenticalToWhenDefined(I2);
+ });
+ if (!AllDbgInstsAreIdentical) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*++BB1ItrPair.first;
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*++SuccIter;
+ }
+ }
+
+ bool AllInstsAreIdentical = true;
+ bool HasTerminator = I1->isTerminator();
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter;
+ HasTerminator |= I2->isTerminator();
+ if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
+ AllInstsAreIdentical = false;
+ }
+
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
- if (I1->isTerminator() || I2->isTerminator()) {
+ if (HasTerminator) {
+ // Even if BB, which contains only one unreachable instruction, is ignored
+ // at the beginning of the loop, we can hoist the terminator instruction.
// If any instructions remain in the block, we cannot hoist terminators.
- if (NumSkipped || !I1->isIdenticalToWhenDefined(I2))
+ if (NumSkipped || !AllInstsAreIdentical)
return Changed;
- goto HoistTerminator;
+ SmallVector<Instruction *, 8> Insts;
+ for (auto &SuccIter : OtherSuccIterRange)
+ Insts.push_back(&*SuccIter);
+ return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
}
- if (I1->isIdenticalToWhenDefined(I2) &&
- // Even if the instructions are identical, it may not be safe to hoist
- // them if we have skipped over instructions with side effects or their
- // operands weren't hoisted.
- isSafeToHoistInstr(I1, SkipFlagsBB1) &&
- isSafeToHoistInstr(I2, SkipFlagsBB2) &&
- shouldHoistCommonInstructions(I1, I2, TTI)) {
- if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
- assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
+ if (AllInstsAreIdentical) {
+ unsigned SkipFlagsBB1 = BB1ItrPair.second;
+ AllInstsAreIdentical =
+ isSafeToHoistInstr(I1, SkipFlagsBB1) &&
+ all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
+ Instruction *I2 = &*Pair.first;
+ unsigned SkipFlagsBB2 = Pair.second;
+ // Even if the instructions are identical, it may not
+ // be safe to hoist them if we have skipped over
+ // instructions with side effects or their operands
+ // weren't hoisted.
+ return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
+ shouldHoistCommonInstructions(I1, I2, TTI);
+ });
+ }
+
+ if (AllInstsAreIdentical) {
+ BB1ItrPair.first++;
+ if (isa<DbgInfoIntrinsic>(I1)) {
// The debug location is an integral part of a debug info intrinsic
// and can't be separated from it or replaced. Instead of attempting
// to merge locations, simply hoist both copies of the intrinsic.
- BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
- BIParent->splice(BI->getIterator(), BB2, I2->getIterator());
+ I1->moveBeforePreserving(TI);
+ for (auto &SuccIter : OtherSuccIterRange) {
+ auto *I2 = &*SuccIter++;
+ assert(isa<DbgInfoIntrinsic>(I2));
+ I2->moveBeforePreserving(TI);
+ }
} else {
// For a normal instruction, we just move one to right before the
// branch, then replace all uses of the other with the first. Finally,
// we remove the now redundant second instruction.
- BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
- if (!I2->use_empty())
- I2->replaceAllUsesWith(I1);
- I1->andIRFlags(I2);
- combineMetadataForCSE(I1, I2, true);
-
- // I1 and I2 are being combined into a single instruction. Its debug
- // location is the merged locations of the original instructions.
- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
-
- I2->eraseFromParent();
+ I1->moveBeforePreserving(TI);
+ BB->splice(TI->getIterator(), BB1, I1->getIterator());
+ for (auto &SuccIter : OtherSuccIterRange) {
+ Instruction *I2 = &*SuccIter++;
+ assert(I2 != I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->andIRFlags(I2);
+ combineMetadataForCSE(I1, I2, true);
+ // I1 and I2 are being combined into a single instruction. Its debug
+ // location is the merged locations of the original instructions.
+ I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+ I2->eraseFromParent();
+ }
}
+ if (!Changed)
+ NumHoistCommonCode += SuccIterPairs.size();
Changed = true;
- ++NumHoistCommonInstrs;
+ NumHoistCommonInstrs += SuccIterPairs.size();
} else {
if (NumSkipped >= HoistCommonSkipLimit)
return Changed;
// We are about to skip over a pair of non-identical instructions. Record
// if any have characteristics that would prevent reordering instructions
// across them.
- SkipFlagsBB1 |= skippedInstrFlags(I1);
- SkipFlagsBB2 |= skippedInstrFlags(I2);
+ for (auto &SuccIterPair : SuccIterPairs) {
+ Instruction *I = &*SuccIterPair.first++;
+ SuccIterPair.second |= skippedInstrFlags(I);
+ }
++NumSkipped;
}
-
- I1 = &*BB1_Itr++;
- I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
- }
}
+}
- return Changed;
+bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
+ Instruction *TI, Instruction *I1,
+ SmallVectorImpl<Instruction *> &OtherSuccTIs) {
-HoistTerminator:
- // It may not be possible to hoist an invoke.
+ auto *BI = dyn_cast<BranchInst>(TI);
+
+ bool Changed = false;
+ BasicBlock *TIParent = TI->getParent();
+ BasicBlock *BB1 = I1->getParent();
+
+ // Use only for an if statement.
+ auto *I2 = *OtherSuccTIs.begin();
+ auto *BB2 = I2->getParent();
+ if (BI) {
+ assert(OtherSuccTIs.size() == 1);
+ assert(BI->getSuccessor(0) == I1->getParent());
+ assert(BI->getSuccessor(1) == I2->getParent());
+ }
+
+ // In the case of an if statement, we try to hoist an invoke.
// FIXME: Can we define a safety predicate for CallBr?
- if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return Changed;
+ // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
+ // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
+ if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
// TODO: callbr hoisting currently disabled pending further study.
if (isa<CallBrInst>(I1))
- return Changed;
+ return false;
for (BasicBlock *Succ : successors(BB1)) {
for (PHINode &PN : Succ->phis()) {
Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
+ for (Instruction *OtherSuccTI : OtherSuccTIs) {
+ Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
+ if (BB1V == BB2V)
+ continue;
- // Check for passingValueIsAlwaysUndefined here because we would rather
- // eliminate undefined control flow then converting it to a select.
- if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
- passingValueIsAlwaysUndefined(BB2V, &PN))
- return Changed;
+ // In the case of an if statement, check for
+ // passingValueIsAlwaysUndefined here because we would rather eliminate
+ // undefined control flow then converting it to a select.
+ if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
+ passingValueIsAlwaysUndefined(BB2V, &PN))
+ return false;
+ }
}
}
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
- NT->insertInto(BIParent, BI->getIterator());
+ NT->insertInto(TIParent, TI->getIterator());
if (!NT->getType()->isVoidTy()) {
I1->replaceAllUsesWith(NT);
- I2->replaceAllUsesWith(NT);
+ for (Instruction *OtherSuccTI : OtherSuccTIs)
+ OtherSuccTI->replaceAllUsesWith(NT);
NT->takeName(I1);
}
Changed = true;
- ++NumHoistCommonInstrs;
+ NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
// Ensure terminator gets a debug location, even an unknown one, in case
// it involves inlinable calls.
- NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+ SmallVector<DILocation *, 4> Locs;
+ Locs.push_back(I1->getDebugLoc());
+ for (auto *OtherSuccTI : OtherSuccTIs)
+ Locs.push_back(OtherSuccTI->getDebugLoc());
+ NT->setDebugLoc(DILocation::getMergedLocations(Locs));
// PHIs created below will adopt NT's merged DebugLoc.
IRBuilder<NoFolder> Builder(NT);
- // Hoisting one of the terminators from our successor is a great thing.
- // Unfortunately, the successors of the if/else blocks may have PHI nodes in
- // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
- // nodes, so we insert select instruction to compute the final result.
- std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
- for (BasicBlock *Succ : successors(BB1)) {
- for (PHINode &PN : Succ->phis()) {
- Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
+ // In the case of an if statement, hoisting one of the terminators from our
+ // successor is a great thing. Unfortunately, the successors of the if/else
+ // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
+ // must agree for all PHI nodes, so we insert select instruction to compute
+ // the final result.
+ if (BI) {
+ std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
+ for (BasicBlock *Succ : successors(BB1)) {
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
- // These values do not agree. Insert a select instruction before NT
- // that determines the right value.
- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (!SI) {
- // Propagate fast-math-flags from phi node to its replacement select.
- IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
- if (isa<FPMathOperator>(PN))
- Builder.setFastMathFlags(PN.getFastMathFlags());
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (!SI) {
+ // Propagate fast-math-flags from phi node to its replacement select.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ if (isa<FPMathOperator>(PN))
+ Builder.setFastMathFlags(PN.getFastMathFlags());
- SI = cast<SelectInst>(
- Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
- BB1V->getName() + "." + BB2V->getName(), BI));
- }
+ SI = cast<SelectInst>(Builder.CreateSelect(
+ BI->getCondition(), BB1V, BB2V,
+ BB1V->getName() + "." + BB2V->getName(), BI));
+ }
- // Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
- PN.setIncomingValue(i, SI);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
+ PN.setIncomingValue(i, SI);
+ }
}
}
@@ -1733,16 +1816,16 @@ HoistTerminator:
// Update any PHI nodes in our new successors.
for (BasicBlock *Succ : successors(BB1)) {
- AddPredecessorToBlock(Succ, BIParent, BB1);
+ AddPredecessorToBlock(Succ, TIParent, BB1);
if (DTU)
- Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ Updates.push_back({DominatorTree::Insert, TIParent, Succ});
}
if (DTU)
- for (BasicBlock *Succ : successors(BI))
- Updates.push_back({DominatorTree::Delete, BIParent, Succ});
+ for (BasicBlock *Succ : successors(TI))
+ Updates.push_back({DominatorTree::Delete, TIParent, Succ});
- EraseTerminatorAndDCECond(BI);
+ EraseTerminatorAndDCECond(TI);
if (DTU)
DTU->applyUpdates(Updates);
return Changed;
@@ -1808,10 +1891,19 @@ static bool canSinkInstructions(
}
const Instruction *I0 = Insts.front();
- for (auto *I : Insts)
+ for (auto *I : Insts) {
if (!I->isSameOperationAs(I0))
return false;
+ // swifterror pointers can only be used by a load or store; sinking a load
+ // or store would require introducing a select for the pointer operand,
+ // which isn't allowed for swifterror pointers.
+ if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
+ return false;
+ if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
+ return false;
+ }
+
// All instructions in Insts are known to be the same opcode. If they have a
// use, check that the only user is a PHI or in the same block as the
// instruction, because if a user is in the same block as an instruction we're
@@ -1952,8 +2044,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// Create a new PHI in the successor block and populate it.
auto *Op = I0->getOperand(O);
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
- auto *PN = PHINode::Create(Op->getType(), Insts.size(),
- Op->getName() + ".sink", &BBEnd->front());
+ auto *PN =
+ PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
+ PN->insertBefore(BBEnd->begin());
for (auto *I : Insts)
PN->addIncoming(I->getOperand(O), I->getParent());
NewOperands.push_back(PN);
@@ -1963,7 +2056,8 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// and move it to the start of the successor block.
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
I0->getOperandUse(O).set(NewOperands[O]);
- I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
// Update metadata and IR flags, and merge debug locations.
for (auto *I : Insts)
@@ -2765,8 +2859,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
Value *OrigV = PN.getIncomingValueForBlock(BB);
Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
- // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
- // Skip PHIs which are trivial.
+ // FIXME: Try to remove some of the duplication with
+ // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
if (ThenV == OrigV)
continue;
@@ -3009,7 +3103,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
// store %merge, %x.dest, !DIAssignID !2
// dbg.assign %merge, "x", ..., !2
for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) {
- if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; }))
+ if (llvm::is_contained(DAI->location_ops(), OrigV))
DAI->replaceVariableLocationOp(OrigV, S);
}
}
@@ -3036,6 +3130,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
}
// Hoist the instructions.
+ // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these
+ // instructions, in the same way that dbg.value intrinsics are dropped at the
+ // end of this block.
+ for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
+ It.dropDbgValues();
BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
std::prev(ThenBB->end()));
@@ -3207,6 +3306,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
TranslateMap[Cond] = CB;
+
+ // RemoveDIs: track instructions that we optimise away while folding, so
+ // that we can copy DPValues from them later.
+ BasicBlock::iterator SrcDbgCursor = BB->begin();
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
@@ -3241,6 +3344,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
TranslateMap[&*BBI] = N;
}
if (N) {
+ // Copy all debug-info attached to instructions from the last we
+ // successfully clone, up to this instruction (they might have been
+ // folded away).
+ for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
+ N->cloneDebugInfoFrom(&*SrcDbgCursor);
+ SrcDbgCursor = std::next(BBI);
+ // Clone debug-info on this instruction too.
+ N->cloneDebugInfoFrom(&*BBI);
+
// Register the new instruction with the assumption cache if necessary.
if (auto *Assume = dyn_cast<AssumeInst>(N))
if (AC)
@@ -3248,6 +3360,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
}
}
+ for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
+ InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
+ InsertPt->cloneDebugInfoFrom(BI);
+
BB->removePredecessor(EdgeBB);
BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
EdgeBI->setSuccessor(0, RealDest);
@@ -3652,22 +3768,22 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ Module *M = BB->getModule();
+
+ if (PredBlock->IsNewDbgInfoFormat) {
+ PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
+ for (DPValue &DPV : PredBlock->getTerminator()->getDbgValueRange()) {
+ RemapDPValue(M, &DPV, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
+
// Now that the Cond was cloned into the predecessor basic block,
// or/and the two conditions together.
Value *BICond = VMap[BI->getCondition()];
PBI->setCondition(
createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
- // Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB) {
- if (isa<DbgInfoIntrinsic>(I)) {
- Instruction *NewI = I.clone();
- RemapInstruction(NewI, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- NewI->insertBefore(PBI);
- }
- }
-
++NumFoldBranchToCommonDest;
return true;
}
@@ -3867,7 +3983,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
(!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
return V;
- PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
+ PHI->insertBefore(Succ->begin());
PHI->addIncoming(V, BB);
for (BasicBlock *PredBB : predecessors(Succ))
if (PredBB != BB)
@@ -3991,7 +4108,9 @@ static bool mergeConditionalStoreToAddress(
Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
QStore->getParent(), PPHI);
- IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+ BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
+ IRBuilder<> QB(PostBB, PostBBFirst);
+ QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
@@ -4002,9 +4121,11 @@ static bool mergeConditionalStoreToAddress(
QPred = QB.CreateNot(QPred);
Value *CombinedPred = QB.CreateOr(PPred, QPred);
- auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
+ BasicBlock::iterator InsertPt = QB.GetInsertPoint();
+ auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
/*Unreachable=*/false,
/*BranchWeights=*/nullptr, DTU);
+
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
@@ -4140,10 +4261,10 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// 2) We can sink side effecting instructions into BI's fallthrough
// successor provided they doesn't contribute to computation of
// BI's condition.
- Value *CondWB, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
- IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+ BasicBlock *IfTrueBB = PBI->getSuccessor(0);
+ BasicBlock *IfFalseBB = PBI->getSuccessor(1);
+ if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
+ !BI->getParent()->getSinglePredecessor())
return false;
if (!IfFalseBB->phis().empty())
return false; // TODO
@@ -4256,6 +4377,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (PBI->getSuccessor(PBIOp) == BB)
return false;
+ // If predecessor's branch probability to BB is too low don't merge branches.
+ SmallVector<uint32_t, 2> PredWeights;
+ if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
+ extractBranchWeights(*PBI, PredWeights) &&
+ (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
+
+ BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
+ PredWeights[PBIOp],
+ static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
+
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ if (CommonDestProb >= Likely)
+ return false;
+ }
+
// Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
@@ -5088,6 +5224,15 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
bool Changed = false;
+ // Ensure that any debug-info records that used to occur after the Unreachable
+ // are moved to in front of it -- otherwise they'll "dangle" at the end of
+ // the block.
+ BB->flushTerminatorDbgValues();
+
+ // Debug-info records on the unreachable inst itself should be deleted, as
+ // below we delete everything past the final executable instruction.
+ UI->dropDbgValues();
+
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
while (UI->getIterator() != BB->begin()) {
@@ -5104,6 +5249,10 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
// and we can therefore guarantee this block will be erased.
+ // If we're deleting this, we're deleting any subsequent dbg.values, so
+ // delete DPValue records of variable information.
+ BBI->dropDbgValues();
+
// Delete this instruction (any uses are guaranteed to be dead)
BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
BBI->eraseFromParent();
@@ -5667,7 +5816,7 @@ getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
- if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
+ if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
return false;
Pred = CaseDest;
CaseDest = I.getSuccessor(0);
@@ -5890,8 +6039,8 @@ static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
// Remove the switch.
- while (PHI->getBasicBlockIndex(SelectBB) >= 0)
- PHI->removeIncomingValue(SelectBB);
+ PHI->removeIncomingValueIf(
+ [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
PHI->addIncoming(SelectValue, SelectBB);
SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
@@ -6051,8 +6200,9 @@ SwitchLookupTable::SwitchLookupTable(
bool LinearMappingPossible = true;
APInt PrevVal;
APInt DistToPrev;
- // When linear map is monotonic, we can attach nsw.
- bool Wrapped = false;
+ // When linear map is monotonic and signed overflow doesn't happen on
+ // maximum index, we can attach nsw on Add and Mul.
+ bool NonMonotonic = false;
assert(TableSize >= 2 && "Should be a SingleValue table.");
// Check if there is the same distance between two consecutive values.
for (uint64_t I = 0; I < TableSize; ++I) {
@@ -6072,7 +6222,7 @@ SwitchLookupTable::SwitchLookupTable(
LinearMappingPossible = false;
break;
}
- Wrapped |=
+ NonMonotonic |=
Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
}
PrevVal = Val;
@@ -6080,7 +6230,10 @@ SwitchLookupTable::SwitchLookupTable(
if (LinearMappingPossible) {
LinearOffset = cast<ConstantInt>(TableContents[0]);
LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
- LinearMapValWrapped = Wrapped;
+ bool MayWrap = false;
+ APInt M = LinearMultiplier->getValue();
+ (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
+ LinearMapValWrapped = NonMonotonic || MayWrap;
Kind = LinearMapKind;
++NumLinearMaps;
return;
@@ -6503,9 +6656,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If the default destination is unreachable, or if the lookup table covers
// all values of the conditional variable, branch directly to the lookup table
// BB. Otherwise, check that the condition is within the case range.
- const bool DefaultIsReachable =
+ bool DefaultIsReachable =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
@@ -6536,6 +6688,28 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
BranchInst *RangeCheckBranch = nullptr;
+ // Grow the table to cover all possible index values to avoid the range check.
+ // It will use the default result to fill in the table hole later, so make
+ // sure it exist.
+ if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
+ ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
+ // Grow the table shouldn't have any size impact by checking
+ // WouldFitInRegister.
+ // TODO: Consider growing the table also when it doesn't fit in a register
+ // if no optsize is specified.
+ const uint64_t UpperBound = CR.getUpper().getLimitedValue();
+ if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, UpperBound, KV.second /* ResultType */);
+ })) {
+ // The default branch is unreachable after we enlarge the lookup table.
+ // Adjust DefaultIsReachable to reuse code path.
+ TableSize = UpperBound;
+ DefaultIsReachable = false;
+ }
+ }
+
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
if (DTU)
@@ -6697,9 +6871,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// This transform can be done speculatively because it is so cheap - it
// results in a single rotate operation being inserted.
- // FIXME: It's possible that optimizing a switch on powers of two might also
- // be beneficial - flag values are often powers of two and we could use a CLZ
- // as the key function.
// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
@@ -6744,6 +6915,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
+/// Tries to transform switch of powers of two to reduce switch range.
+/// For example, switch like:
+/// switch (C) { case 1: case 2: case 64: case 128: }
+/// will be transformed to:
+/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
+///
+/// This transformation allows better lowering and could allow transforming into
+/// a lookup table.
+static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ Value *Condition = SI->getCondition();
+ LLVMContext &Context = SI->getContext();
+ auto *CondTy = cast<IntegerType>(Condition->getType());
+
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+
+ const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
+ {Condition, ConstantInt::getTrue(Context)}),
+ TTI::TCK_SizeAndLatency);
+
+ if (CttzIntrinsicCost > TTI::TCC_Basic)
+ // Inserting intrinsic is too expensive.
+ return false;
+
+ // Only bother with this optimization if there are more than 3 switch cases.
+ // SDAG will only bother creating jump tables for 4 or more cases.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // We perform this optimization only for switches with
+ // unreachable default case.
+ // This assumtion will save us from checking if `Condition` is a power of two.
+ if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
+ return false;
+
+ // Check that switch cases are powers of two.
+ SmallVector<uint64_t, 4> Values;
+ for (const auto &Case : SI->cases()) {
+ uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
+ if (llvm::has_single_bit(CaseValue))
+ Values.push_back(CaseValue);
+ else
+ return false;
+ }
+
+ // isSwichDense requires case values to be sorted.
+ llvm::sort(Values);
+ if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
+ llvm::countr_zero(Values.front()) + 1))
+ // Transform is unable to generate dense switch.
+ return false;
+
+ Builder.SetInsertPoint(SI);
+
+ // Replace each case with its trailing zeros number.
+ for (auto &Case : SI->cases()) {
+ auto *OrigValue = Case.getCaseValue();
+ Case.setValue(ConstantInt::get(OrigValue->getType(),
+ OrigValue->getValue().countr_zero()));
+ }
+
+ // Replace condition with its trailing zeros number.
+ auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
+ Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
+
+ SI->setCondition(ConditionTrailingZeros);
+
+ return true;
+}
+
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
@@ -6791,9 +7036,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ return requestResimplify();
+
if (ReduceSwitchRange(SI, Builder, DL, TTI))
return requestResimplify();
+ if (HoistCommon &&
+ hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
+ return requestResimplify();
+
return false;
}
@@ -6978,7 +7230,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (Options.SpeculateBlocks &&
+ FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
return false;
@@ -7048,7 +7301,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (Options.SpeculateBlocks &&
+ FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
@@ -7058,7 +7312,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts))
+ if (HoistCommon && hoistCommonCodeFromSuccessors(
+ BI->getParent(), !Options.HoistCommonInsts))
return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index a28916bc9baf..722ed03db3de 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -539,7 +539,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
for (auto *ICI : ICmpUsers) {
bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
- Instruction *Ext = nullptr;
+ IRBuilder<> Builder(ICI);
+ Value *Ext = nullptr;
// For signed/unsigned predicate, replace the old comparison with comparison
// of immediate IV against sext/zext of the invariant argument. If we can
// use either sext or zext (i.e. we are dealing with equality predicate),
@@ -550,18 +551,18 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
if (CanUseZExt(ICI)) {
assert(DoesZExtCollapse && "Unprofitable zext?");
- Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
+ Ext = Builder.CreateZExt(Op1, IVTy, "zext");
Pred = ICmpInst::getUnsignedPredicate(Pred);
} else {
assert(DoesSExtCollapse && "Unprofitable sext?");
- Ext = new SExtInst(Op1, IVTy, "sext", ICI);
+ Ext = Builder.CreateSExt(Op1, IVTy, "sext");
assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
}
bool Changed;
L->makeLoopInvariant(Ext, Changed);
(void)Changed;
- ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext);
- ICI->replaceAllUsesWith(NewICI);
+ auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
+ ICI->replaceAllUsesWith(NewCmp);
DeadInsts.emplace_back(ICI);
}
@@ -659,12 +660,12 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
// Get the symbolic expression for this instruction.
const SCEV *IV = SE->getSCEV(IVOperand);
- unsigned MaskBits;
+ int MaskBits;
if (UseInst->getOpcode() == CastInst::SIToFP)
- MaskBits = SE->getSignedRange(IV).getMinSignedBits();
+ MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
else
- MaskBits = SE->getUnsignedRange(IV).getActiveBits();
- unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+ MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
+ int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
if (MaskBits <= DestNumSigBits) {
for (User *U : UseInst->users()) {
// Match for fptosi/fptoui of sitofp and with same type.
@@ -908,8 +909,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
if (replaceIVUserWithLoopInvariant(UseInst))
continue;
- // Go further for the bitcast ''prtoint ptr to i64'
- if (isa<PtrToIntInst>(UseInst))
+ // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
+ // by truncation
+ if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
for (Use &U : UseInst->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (replaceIVUserWithLoopInvariant(User))
@@ -1373,16 +1375,32 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
- if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap())
- ExtendOperExpr = SE->getSignExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())
- ExtendOperExpr = SE->getZeroExtendExpr(
- SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) &&
+ !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) {
+ ExtKind = ExtendKind::Unknown;
+
+ // For a non-negative NarrowDef, we can choose either type of
+ // extension. We want to use the current extend kind if legal
+ // (see above), and we only hit this code if we need to check
+ // the opposite case.
+ if (DU.NeverNegative) {
+ if (OBO->hasNoSignedWrap()) {
+ ExtKind = ExtendKind::Sign;
+ } else if (OBO->hasNoUnsignedWrap()) {
+ ExtKind = ExtendKind::Zero;
+ }
+ }
+ }
+
+ const SCEV *ExtendOperExpr =
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx));
+ if (ExtKind == ExtendKind::Sign)
+ ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
+ else if (ExtKind == ExtendKind::Zero)
+ ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
else
return {nullptr, ExtendKind::Unknown};
@@ -1493,10 +1511,6 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
- if (!InsertPt)
- return false;
- IRBuilder<> Builder(InsertPt);
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
@@ -1673,7 +1687,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
"Not a LCSSA Phi?");
WidePN->addIncoming(WideBO, LoopExitingBlock);
- Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt());
+ Builder.SetInsertPoint(User->getParent(),
+ User->getParent()->getFirstInsertionPt());
auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
User->replaceAllUsesWith(TruncPN);
DeadInsts.emplace_back(User);
@@ -1726,7 +1741,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
+ BasicBlock *WidePhiBB = WidePhi->getParent();
+ IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
@@ -1786,65 +1802,70 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
return nullptr;
}
- // Does this user itself evaluate to a recurrence after widening?
- WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
- if (!WideAddRec.first)
- WideAddRec = getWideRecurrence(DU);
-
- assert((WideAddRec.first == nullptr) ==
- (WideAddRec.second == ExtendKind::Unknown));
- if (!WideAddRec.first) {
- // If use is a loop condition, try to promote the condition instead of
- // truncating the IV first.
- if (widenLoopCompare(DU))
+ auto tryAddRecExpansion = [&]() -> Instruction* {
+ // Does this user itself evaluate to a recurrence after widening?
+ WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+ if (!WideAddRec.first)
+ WideAddRec = getWideRecurrence(DU);
+ assert((WideAddRec.first == nullptr) ==
+ (WideAddRec.second == ExtendKind::Unknown));
+ if (!WideAddRec.first)
return nullptr;
- // We are here about to generate a truncate instruction that may hurt
- // performance because the scalar evolution expression computed earlier
- // in WideAddRec.first does not indicate a polynomial induction expression.
- // In that case, look at the operands of the use instruction to determine
- // if we can still widen the use instead of truncating its operand.
- if (widenWithVariantUse(DU))
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = nullptr;
+ if (WideAddRec.first == WideIncExpr &&
+ Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = cloneIVUser(DU, WideAddRec.first);
+ if (!WideUse)
+ return nullptr;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec.first != SE->getSCEV(WideUse)) {
+ LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
+ << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
+ << "\n");
+ DeadInsts.emplace_back(WideUse);
return nullptr;
+ };
- // This user does not evaluate to a recurrence after widening, so don't
- // follow it. Instead insert a Trunc to kill off the original use,
- // eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT, LI);
- return nullptr;
- }
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
- // Reuse the IV increment that SCEVExpander created as long as it dominates
- // NarrowUse.
- Instruction *WideUse = nullptr;
- if (WideAddRec.first == WideIncExpr &&
- Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
- WideUse = WideInc;
- else {
- WideUse = cloneIVUser(DU, WideAddRec.first);
- if (!WideUse)
- return nullptr;
- }
- // Evaluation of WideAddRec ensured that the narrow expression could be
- // extended outside the loop without overflow. This suggests that the wide use
- // evaluates to the same expression as the extended narrow use, but doesn't
- // absolutely guarantee it. Hence the following failsafe check. In rare cases
- // where it fails, we simply throw away the newly created wide use.
- if (WideAddRec.first != SE->getSCEV(WideUse)) {
- LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
- << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
- << "\n");
- DeadInsts.emplace_back(WideUse);
+ ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+ };
+
+ if (auto *I = tryAddRecExpansion())
+ return I;
+
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (widenLoopCompare(DU))
return nullptr;
- }
- // if we reached this point then we are going to replace
- // DU.NarrowUse with WideUse. Reattach DbgValue then.
- replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantUse(DU))
+ return nullptr;
- ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
- // Returning WideUse pushes it on the worklist.
- return WideUse;
+ // This user does not evaluate to a recurrence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ truncateIVUse(DU, DT, LI);
+ return nullptr;
}
/// Add eligible users of NarrowDef to NarrowIVUsers.
@@ -1944,13 +1965,15 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
WideInc =
- cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
- WideIncExpr = SE->getSCEV(WideInc);
- // Propagate the debug location associated with the original loop increment
- // to the new (widened) increment.
- auto *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ if (WideInc) {
+ WideIncExpr = SE->getSCEV(WideInc);
+ // Propagate the debug location associated with the original loop
+ // increment to the new (widened) increment.
+ auto *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ }
}
LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5b0951252c07..760a626c8b6f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
return ConstantInt::get(RetTy, Result);
}
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
const DataLayout &DL) {
- if (!isOnlyUsedInZeroComparison(CI))
+ if (!isOnlyUsedInComparisonWithZero(CI))
return false;
if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
@@ -1136,7 +1148,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+ return CI->getArgOperand(0);
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
@@ -1164,7 +1176,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+ return CI->getArgOperand(0);
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
@@ -1174,16 +1186,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = castToCStr(CI->getArgOperand(0), B);
- Result =
- B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
- return B.CreateBitCast(Result, CI->getType());
+ return B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), CI->getArgOperand(0),
+ Offset, "strstr");
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ return emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
}
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -1380,7 +1389,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
if (isOnlyUsedInEqualityComparison(CI, SrcStr))
// S is dereferenceable so it's safe to load from it and fold
// memchr(S, C, N) == S to N && *S == C for any C and N.
- // TODO: This is safe even even for nonconstant S.
+ // TODO: This is safe even for nonconstant S.
return memChrToCharCompare(CI, Size, B, DL);
// From now on we need a constant length and constant array.
@@ -1522,12 +1531,10 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
// memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
if (Len == 1) {
- Value *LHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
+ Value *LHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), LHS, "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(B.getInt8Ty(), RHS, "rhsc"),
+ CI->getType(), "rhsv");
return B.CreateSub(LHSV, RHSV, "chardiff");
}
@@ -1833,7 +1840,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
StringRef CallerName = CI->getFunction()->getName();
if (!CallerName.empty() && CallerName.back() == 'f' &&
CallerName.size() == (CalleeName.size() + 1) &&
- CallerName.startswith(CalleeName))
+ CallerName.starts_with(CalleeName))
return nullptr;
}
@@ -2368,8 +2375,8 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
- Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
- : Intrinsic::maxnum;
+ Intrinsic::ID IID = Callee->getName().starts_with("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
return copyFlags(
*CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
@@ -3066,7 +3073,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(Dest, B);
+ Value *Ptr = Dest;
B.CreateStore(V, Ptr);
Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -3093,9 +3100,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
return ConstantInt::get(CI->getType(), SrcLen - 1);
} else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
// sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
- // Handle mismatched pointer types (goes away with typeless pointers?).
- V = B.CreatePointerCast(V, B.getInt8PtrTy());
- Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
return B.CreateIntCast(PtrDiff, CI->getType(), false);
}
@@ -3261,7 +3265,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
if (!CI->getArgOperand(3)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(DstArg, B);
+ Value *Ptr = DstArg;
B.CreateStore(V, Ptr);
Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -3397,8 +3401,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
// If this is writing one byte, turn it into fputc.
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(B.getInt8Ty(),
- castToCStr(CI->getArgOperand(0), B), "char");
+ Value *Char = B.CreateLoad(B.getInt8Ty(), CI->getArgOperand(0), "char");
Type *IntTy = B.getIntNTy(TLI->getIntSize());
Value *Cast = B.CreateIntCast(Char, IntTy, /*isSigned*/ true, "chari");
Value *NewCI = emitFPutC(Cast, CI->getArgOperand(3), B, TLI);
diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 0ff88e8b4612..6094f36a77f4 100644
--- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -18,8 +18,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
using namespace llvm;
@@ -66,21 +64,3 @@ PreservedAnalyses StripGCRelocates::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-struct StripGCRelocatesLegacy : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- StripGCRelocatesLegacy() : FunctionPass(ID) {
- initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {}
-
- bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); }
-};
-char StripGCRelocatesLegacy::ID = 0;
-} // namespace
-
-INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates",
- "Strip gc.relocates inserted through RewriteStatepointsForGC",
- true, false)
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index c3ae43e567b0..8b4f34209e85 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -68,8 +68,6 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 2b706858cbed..d5468909dd4e 100644
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -16,33 +16,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
-char UnifyFunctionExitNodesLegacyPass::ID = 0;
-
-UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass()
- : FunctionPass(ID) {
- initializeUnifyFunctionExitNodesLegacyPassPass(
- *PassRegistry::getPassRegistry());
-}
-
-INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn",
- "Unify function exit nodes", false, false)
-
-Pass *llvm::createUnifyFunctionExitNodesPass() {
- return new UnifyFunctionExitNodesLegacyPass();
-}
-
-void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
- AnalysisUsage &AU) const {
- // We preserve the non-critical-edgeness property
- AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
-}
-
namespace {
bool unifyUnreachableBlocks(Function &F) {
@@ -110,16 +86,6 @@ bool unifyReturnBlocks(Function &F) {
}
} // namespace
-// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting
-// all returns to unconditional branches to this new basic block. Also, unify
-// all unreachable blocks.
-bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) {
- bool Changed = false;
- Changed |= unifyUnreachableBlocks(F);
- Changed |= unifyReturnBlocks(F);
- return Changed;
-}
-
PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
FunctionAnalysisManager &AM) {
bool Changed = false;
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8c781f59ff5a..2f37f7f972cb 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() {
INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
@@ -234,6 +231,8 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+
return runImpl(LI, DT);
}
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 91c743f17764..51e1e824dd26 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -21,7 +21,6 @@ using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
- initializeAssumeBuilderPassLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeCanonicalizeFreezeInLoopsPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
@@ -30,9 +29,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLowerInvokeLegacyPassPass(Registry);
initializeLowerSwitchLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
- initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
- initializeStripGCRelocatesLegacyPass(Registry);
- initializePredicateInfoPrinterLegacyPassPass(Registry);
initializeFixIrreduciblePass(Registry);
initializeUnifyLoopExitsLegacyPassPass(Registry);
}
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 3446e31cc2ef..71d0f09e4771 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -145,6 +146,7 @@ public:
Value *mapValue(const Value *V);
void remapInstruction(Instruction *I);
void remapFunction(Function &F);
+ void remapDPValue(DPValue &DPV);
Constant *mapConstant(const Constant *C) {
return cast_or_null<Constant>(mapValue(C));
@@ -535,6 +537,39 @@ Value *Mapper::mapValue(const Value *V) {
return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
}
+void Mapper::remapDPValue(DPValue &V) {
+ // Remap variables and DILocations.
+ auto *MappedVar = mapMetadata(V.getVariable());
+ auto *MappedDILoc = mapMetadata(V.getDebugLoc());
+ V.setVariable(cast<DILocalVariable>(MappedVar));
+ V.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc)));
+
+ // Find Value operands and remap those.
+ SmallVector<Value *, 4> Vals, NewVals;
+ for (Value *Val : V.location_ops())
+ Vals.push_back(Val);
+ for (Value *Val : Vals)
+ NewVals.push_back(mapValue(Val));
+
+ // If there are no changes to the Value operands, finished.
+ if (Vals == NewVals)
+ return;
+
+ bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals;
+
+ // Otherwise, do some replacement.
+ if (!IgnoreMissingLocals &&
+ llvm::any_of(NewVals, [&](Value *V) { return V == nullptr; })) {
+ V.setKillLocation();
+ } else {
+ // Either we have all non-empty NewVals, or we're permitted to ignore
+ // missing locals.
+ for (unsigned int I = 0; I < Vals.size(); ++I)
+ if (NewVals[I])
+ V.replaceVariableLocationOp(I, NewVals[I]);
+ }
+}
+
Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
Function *F = cast<Function>(mapValue(BA.getFunction()));
@@ -1179,6 +1214,17 @@ void ValueMapper::remapInstruction(Instruction &I) {
FlushingMapper(pImpl)->remapInstruction(&I);
}
+void ValueMapper::remapDPValue(Module *M, DPValue &V) {
+ FlushingMapper(pImpl)->remapDPValue(V);
+}
+
+void ValueMapper::remapDPValueRange(
+ Module *M, iterator_range<DPValue::self_iterator> Range) {
+ for (DPValue &DPV : Range) {
+ remapDPValue(M, DPV);
+ }
+}
+
void ValueMapper::remapFunction(Function &F) {
FlushingMapper(pImpl)->remapFunction(F);
}