aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp320
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp477
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp261
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp82
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp734
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp (renamed from contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp)89
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp108
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp129
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp485
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp403
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp235
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp178
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp753
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp1874
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp1130
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp113
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp28
55 files changed, 6045 insertions, 3153 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 84a66e1e96d2..ccdcf7cbce38 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -17,9 +17,6 @@
#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-
-#include <iostream>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 7ff73fcdada7..3daff3b4430b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -52,6 +52,7 @@ namespace {
bool isUsefullToPreserve(Attribute::AttrKind Kind) {
switch (Kind) {
case Attribute::NonNull:
+ case Attribute::NoUndef:
case Attribute::Alignment:
case Attribute::Dereferenceable:
case Attribute::DereferenceableOrNull:
@@ -69,7 +70,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
default:
return RK;
case Attribute::NonNull:
- RK.WasOn = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ RK.WasOn = getUnderlyingObject(RK.WasOn);
return RK;
case Attribute::Alignment: {
Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
@@ -145,7 +146,7 @@ struct AssumeBuilderState {
if (!RK.WasOn)
return true;
if (RK.WasOn->getType()->isPointerTy()) {
- Value *UnderlyingPtr = GetUnderlyingObject(RK.WasOn, M->getDataLayout());
+ Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn);
if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr))
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 085d91031cf9..6bcd42c4c6d8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -105,7 +105,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
for (BasicBlock *BB : BBs)
if (DTU)
@@ -136,9 +136,10 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return !DeadBlocks.empty();
}
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
+bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceResults *MemDep) {
- if (!isa<PHINode>(BB->begin())) return;
+ if (!isa<PHINode>(BB->begin()))
+ return false;
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
@@ -151,6 +152,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
PN->eraseFromParent();
}
+ return true;
}
bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI,
@@ -228,19 +230,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
- Updates.reserve(1 + (2 * succ_size(BB)));
+ SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB),
+ succ_end(BB));
+ Updates.reserve(1 + (2 * UniqueSuccessors.size()));
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
// respectively, it is beneficial to have all insert updates first. Deleting
// edges first may lead to unreachable blocks, followed by inserting edges
// making the blocks reachable again. Such DT updates lead to high compile
// times. We add inserts before deletes here to reduce compile time.
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
// This successor of BB may already have PredBB as a predecessor.
- if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
- Updates.push_back({DominatorTree::Insert, PredBB, *I});
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- Updates.push_back({DominatorTree::Delete, BB, *I});
+ if (!llvm::is_contained(successors(PredBB), UniqueSuccessor))
+ Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor});
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
@@ -285,11 +289,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Add unreachable to now empty BB.
new UnreachableInst(BB->getContext(), BB);
- // Eliminate duplicate/redundant dbg.values. This seems to be a good place to
- // do that since we might end up with redundant dbg.values describing the
- // entry PHI node post-splice.
- RemoveRedundantDbgInstrs(PredBB);
-
// Inherit predecessors name if it exists.
if (!PredBB->hasName())
PredBB->takeName(BB);
@@ -306,7 +305,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
isa<UnreachableInst>(BB->getTerminator()) &&
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
@@ -498,14 +497,16 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
}
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
Instruction *LatchTerm = BB->getTerminator();
if (SplitCriticalEdge(
LatchTerm, SuccNum,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(),
+ BBName))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -515,14 +516,15 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
+ return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName,
+ /*Before=*/true);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
}
unsigned
@@ -540,9 +542,16 @@ llvm::SplitAllCriticalEdges(Function &F,
return NumBroken;
}
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU, const Twine &BBName) {
+static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ const Twine &BBName, bool Before) {
+ if (Before) {
+ DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ return splitBlockBefore(Old, SplitPt,
+ DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU,
+ BBName);
+ }
BasicBlock::iterator SplitIt = SplitPt->getIterator();
while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
++SplitIt;
@@ -556,7 +565,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, *LI);
- if (DT)
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
+ succ_end(New));
+ Updates.push_back({DominatorTree::Insert, Old, New});
+ Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size());
+ for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) {
+ Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld});
+ Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld});
+ }
+
+ DTU->applyUpdates(Updates);
+ } else if (DT)
// Old dominates New. New node dominates all other nodes dominated by Old.
if (DomTreeNode *OldNode = DT->getNode(Old)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -574,14 +596,94 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
return New;
}
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName,
+ Before);
+}
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, const Twine &BBName,
+ bool Before) {
+ return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName,
+ Before);
+}
+
+BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ const Twine &BBName) {
+
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
+ ++SplitIt;
+ std::string Name = BBName.str();
+ BasicBlock *New = Old->splitBasicBlock(
+ SplitIt, Name.empty() ? Old->getName() + ".split" : Name,
+ /* Before=*/true);
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LI)
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+ // New dominates Old. The predecessor nodes of the Old node dominate
+ // New node.
+ SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
+ pred_end(New));
+ DTUpdates.push_back({DominatorTree::Insert, New, Old});
+ DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size());
+ for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) {
+ DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New});
+ DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old});
+ }
+
+ DTU->applyUpdates(DTUpdates);
+
+ // Move MemoryAccesses still tracked in Old, but part of New now.
+ // Update accesses in successor blocks accordingly.
+ if (MSSAU) {
+ MSSAU->applyUpdates(DTUpdates, DTU->getDomTree());
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+ }
+ return New;
+}
+
/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
bool PreserveLCSSA, bool &HasLoopExit) {
// Update dominator tree if available.
- if (DT) {
+ if (DTU) {
+ // Recalculation of DomTree is needed when updating a forward DomTree and
+ // the Entry BB is replaced.
+ if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) {
+ // The entry block was removed and there is no external interface for
+ // the dominator tree to be notified of this change. In this corner-case
+ // we recalculate the entire tree.
+ DTU->recalculate(*NewBB->getParent());
+ } else {
+ // Split block expects NewBB to have a non-empty set of predecessors.
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
+ Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
+ Updates.reserve(Updates.size() + 2 * UniquePreds.size());
+ for (auto *UniquePred : UniquePreds) {
+ Updates.push_back({DominatorTree::Insert, UniquePred, NewBB});
+ Updates.push_back({DominatorTree::Delete, UniquePred, OldBB});
+ }
+ DTU->applyUpdates(Updates);
+ }
+ } else if (DT) {
if (OldBB == DT->getRootNode()->getBlock()) {
assert(NewBB == &NewBB->getParent()->getEntryBlock());
DT->setNewRoot(NewBB);
@@ -599,6 +701,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
if (!LI)
return;
+ if (DTU && DTU->hasDomTree())
+ DT = &DTU->getDomTree();
assert(DT && "DT should be available to update LoopInfo!");
Loop *L = LI->getLoopFor(OldBB);
@@ -732,11 +836,17 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
+
+static BasicBlock *
+SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DomTreeUpdater *DTU,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
// Do not attempt to split that which cannot be split.
if (!BB->canSplitPredecessors())
return nullptr;
@@ -747,8 +857,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
SmallVector<BasicBlock*, 2> NewBBs;
std::string NewName = std::string(Suffix) + ".split-lp";
- SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
- LI, MSSAU, PreserveLCSSA);
+ SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs,
+ DTU, DT, LI, MSSAU, PreserveLCSSA);
return NewBBs[0];
}
@@ -758,12 +868,22 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ Loop *L = nullptr;
+ BasicBlock *OldLatch = nullptr;
// Splitting the predecessors of a loop header creates a preheader block.
- if (LI && LI->isLoopHeader(BB))
+ if (LI && LI->isLoopHeader(BB)) {
+ L = LI->getLoopFor(BB);
// Using the loop start line number prevents debuggers stepping into the
// loop body for this instruction.
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
- else
+ BI->setDebugLoc(L->getStartLoc());
+
+ // If BB is the header of the Loop, it is possible that the loop is
+ // modified, such that the current latch does not remain the latch of the
+ // loop. If that is the case, the loop metadata from the current latch needs
+ // to be applied to the new latch.
+ OldLatch = L->getLoopLatch();
+ } else
BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
@@ -790,7 +910,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
+ UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA,
HasLoopExit);
if (!Preds.empty()) {
@@ -798,16 +918,41 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
}
+ if (OldLatch) {
+ BasicBlock *NewLatch = L->getLoopLatch();
+ if (NewLatch != OldLatch) {
+ MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop");
+ NewLatch->getTerminator()->setMetadata("llvm.loop", MD);
+ OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
+ }
+ }
+
return NewBB;
}
-void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix1, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI,
+ MSSAU, PreserveLCSSA);
+}
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU,
+ /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA);
+}
+
+static void SplitLandingPadPredecessorsImpl(
+ BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1,
+ const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -832,8 +977,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
- HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU,
+ PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
@@ -868,7 +1013,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU,
PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
@@ -905,6 +1050,29 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
}
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(
+ OrigBB, Preds, Suffix1, Suffix2, NewBBs,
+ /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA);
+}
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
+ return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2,
+ NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU,
+ PreserveLCSSA);
+}
+
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred,
DomTreeUpdater *DTU) {
@@ -964,14 +1132,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
-Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
- bool Unreachable,
- MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI,
- BasicBlock *ThenBlock) {
+static Instruction *
+SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DomTreeUpdater *DTU, DominatorTree *DT,
+ LoopInfo *LI, BasicBlock *ThenBlock) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ if (DTU) {
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
+ succ_end(Tail));
+ Updates.push_back({DominatorTree::Insert, Head, Tail});
+ Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size());
+ for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) {
+ Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead});
+ Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead});
+ }
+ }
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
Instruction *CheckTerm;
@@ -980,17 +1158,24 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
if (Unreachable)
CheckTerm = new UnreachableInst(C, ThenBlock);
- else
+ else {
CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, ThenBlock, Tail});
+ }
CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
} else
CheckTerm = ThenBlock->getTerminator();
BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+ BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond);
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, Head, ThenBlock});
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
- if (DT) {
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ else if (DT) {
if (DomTreeNode *OldNode = DT->getNode(Head)) {
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -1016,6 +1201,27 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
return CheckTerm;
}
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights,
+ /*DTU=*/nullptr, DT, LI, ThenBlock);
+}
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights,
+ DomTreeUpdater *DTU, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
+ return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
+ BranchWeights, DTU, /*DT=*/nullptr, LI,
+ ThenBlock);
+}
+
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
@@ -1326,11 +1532,11 @@ BasicBlock *llvm::CreateControlFlowHub(
SmallVector<DominatorTree::UpdateType, 16> Updates;
if (DTU) {
for (auto In : Incoming) {
+ Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
for (auto Succ : successors(In)) {
if (Outgoing.count(Succ))
Updates.push_back({DominatorTree::Delete, In, Succ});
}
- Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock});
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 39fb504cf7b7..939a1a3a868d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -134,9 +134,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
}
}
-BasicBlock *
-llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
- const CriticalEdgeSplittingOptions &Options) {
+BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
@@ -158,22 +158,21 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<BasicBlock *, 4> LoopPreds;
// Check if extra modifications will be required to preserve loop-simplify
// form after splitting. If it would require splitting blocks with IndirectBr
- // terminators, bail out if preserving loop-simplify form is requested.
+ // or CallBr terminators, bail out if preserving loop-simplify form is
+ // requested.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
- // The only that we can break LoopSimplify form by splitting a critical
- // edge is if after the split there exists some edge from TIL to DestBB
- // *and* the only edge into DestBB from outside of TIL is that of
+ // The only way that we can break LoopSimplify form by splitting a
+ // critical edge is if after the split there exists some edge from TIL to
+ // DestBB *and* the only edge into DestBB from outside of TIL is that of
// NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
// is the new exit block and it has no non-loop predecessors. If the
// second isn't true, then DestBB was not in LoopSimplify form prior to
// the split as it had a non-loop predecessor. In both of these cases,
// the predecessor must be directly in TIL, not in a subloop, or again
// LoopSimplify doesn't hold.
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
- ++I) {
- BasicBlock *P = *I;
+ for (BasicBlock *P : predecessors(DestBB)) {
if (P == TIBB)
continue; // The new block is known.
if (LI->getLoopFor(P) != TIL) {
@@ -186,7 +185,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// Loop-simplify form can be preserved, if we can split all in-loop
// predecessors.
if (any_of(LoopPreds, [](BasicBlock *Pred) {
- return isa<IndirectBrInst>(Pred->getTerminator());
+ const Instruction *T = Pred->getTerminator();
+ if (const auto *CBR = dyn_cast<CallBrInst>(T))
+ return CBR->getDefaultDest() != Pred;
+ return isa<IndirectBrInst>(T);
})) {
if (Options.PreserveLoopSimplify)
return nullptr;
@@ -196,8 +198,13 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
}
// Create a new basic block, linking it into the CFG.
- BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
- TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ BasicBlock *NewBB = nullptr;
+ if (BBName.str() != "")
+ NewBB = BasicBlock::Create(TI->getContext(), BBName);
+ else
+ NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." +
+ DestBB->getName() +
+ "_crit_edge");
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
@@ -270,7 +277,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<DominatorTree::UpdateType, 3> Updates;
Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
- if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
+ if (!llvm::is_contained(successors(TIBB), DestBB))
Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
if (DT)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index c64ad147fdfe..f4afa3ad4623 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -31,14 +31,22 @@ using namespace llvm;
//- Infer Attributes ---------------------------------------------------------//
STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumInaccessibleMemOnly,
+ "Number of functions inferred as inaccessiblememonly");
STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
+STATISTIC(NumInaccessibleMemOrArgMemOnly,
+ "Number of functions inferred as inaccessiblemem_or_argmemonly");
STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly");
+STATISTIC(NumSExtArg, "Number of arguments inferred as signext");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
+STATISTIC(NumWillReturn, "Number of functions inferred as willreturn");
static bool setDoesNotAccessMemory(Function &F) {
if (F.doesNotAccessMemory())
@@ -48,6 +56,14 @@ static bool setDoesNotAccessMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesInaccessibleMemory(Function &F) {
+ if (F.onlyAccessesInaccessibleMemory())
+ return false;
+ F.setOnlyAccessesInaccessibleMemory();
+ ++NumInaccessibleMemOnly;
+ return true;
+}
+
static bool setOnlyReadsMemory(Function &F) {
if (F.onlyReadsMemory())
return false;
@@ -64,6 +80,14 @@ static bool setOnlyAccessesArgMemory(Function &F) {
return true;
}
+static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) {
+ if (F.onlyAccessesInaccessibleMemOrArgMem())
+ return false;
+ F.setOnlyAccessesInaccessibleMemOrArgMem();
+ ++NumInaccessibleMemOrArgMemOnly;
+ return true;
+}
+
static bool setDoesNotThrow(Function &F) {
if (F.doesNotThrow())
return false;
@@ -104,6 +128,48 @@ static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
return true;
}
+static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::WriteOnly);
+ ++NumWriteOnlyArg;
+ return true;
+}
+
+static bool setSignExtendedArg(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::SExt))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::SExt);
+ ++NumSExtArg;
+ return true;
+}
+
+static bool setRetNoUndef(Function &F) {
+ if (!F.getReturnType()->isVoidTy() &&
+ !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) {
+ F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ ++NumNoUndef;
+ return true;
+ }
+ return false;
+}
+
+static bool setArgsNoUndef(Function &F) {
+ bool Changed = false;
+ for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) {
+ if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) {
+ F.addParamAttr(ArgNo, Attribute::NoUndef);
+ ++NumNoUndef;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool setRetAndArgsNoUndef(Function &F) {
+ return setRetNoUndef(F) | setArgsNoUndef(F);
+}
+
static bool setRetNonNull(Function &F) {
assert(F.getReturnType()->isPointerTy() &&
"nonnull applies only to pointers");
@@ -136,6 +202,14 @@ static bool setDoesNotFreeMemory(Function &F) {
return true;
}
+static bool setWillReturn(Function &F) {
+ if (F.hasFnAttribute(Attribute::WillReturn))
+ return false;
+ F.addFnAttr(Attribute::WillReturn);
+ ++NumWillReturn;
+ return true;
+}
+
bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
const TargetLibraryInfo &TLI) {
Function *F = M->getFunction(Name);
@@ -163,12 +237,15 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strchr:
case LibFunc_strrchr:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_strtol:
case LibFunc_strtod:
@@ -178,26 +255,31 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_strtold:
case LibFunc_strtoull:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_strcpy:
case LibFunc_strncpy:
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotAlias(F, 1);
- LLVM_FALLTHROUGH;
case LibFunc_strcat:
case LibFunc_strncat:
+ Changed |= setWillReturn(F);
Changed |= setReturnedArg(F, 0);
LLVM_FALLTHROUGH;
case LibFunc_stpcpy:
case LibFunc_stpncpy:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
return Changed;
case LibFunc_strxfrm:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
@@ -206,51 +288,70 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_strspn: // 0,1
case LibFunc_strncmp: // 0,1
case LibFunc_strcspn: // 0,1
- case LibFunc_strcoll: // 0,1
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_strcoll:
case LibFunc_strcasecmp: // 0,1
case LibFunc_strncasecmp: //
+ // Those functions may depend on the locale, which may be accessed through
+ // global memory.
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_strstr:
case LibFunc_strpbrk:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_strtok:
case LibFunc_strtok_r:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_setbuf:
case LibFunc_setvbuf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strdup:
case LibFunc_strndup:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_stat:
case LibFunc_statvfs:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -258,70 +359,95 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_sprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_snprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
case LibFunc_setitimer:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_system:
// May throw; "system" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_malloc:
+ case LibFunc_vec_malloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_memcmp:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memchr:
case LibFunc_memrchr:
- Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_modf:
case LibFunc_modff:
case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_memcpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotAlias(F, 1);
Changed |= setReturnedArg(F, 0);
- Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_memmove:
- Changed |= setReturnedArg(F, 0);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setReturnedArg(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotAlias(F, 1);
- Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
@@ -329,38 +455,57 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_memalign:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
+ Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_mkdir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_mktime:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_realloc:
+ case LibFunc_vec_realloc:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
+ case LibFunc_reallocf:
+ Changed |= setRetNoUndef(F);
+ Changed |= setWillReturn(F);
+ return Changed;
case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_rewind:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_rmdir:
case LibFunc_remove:
case LibFunc_realpath:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_rename:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -368,6 +513,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_readlink:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -375,35 +521,52 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_write:
// May throw; "write" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_aligned_alloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyWritesMemory(F, 1);
+ Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_bcmp:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_bzero:
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
return Changed;
case LibFunc_calloc:
+ case LibFunc_vec_calloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_chmod:
case LibFunc_chown:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
@@ -411,6 +574,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_ctermid:
case LibFunc_clearerr:
case LibFunc_closedir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
@@ -420,14 +584,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_atoll:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_access:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_fopen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -436,13 +603,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fdopen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_feof:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_free:
+ case LibFunc_vec_free:
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_fseek:
case LibFunc_ftell:
case LibFunc_fgetc:
@@ -456,10 +635,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_flockfile:
case LibFunc_funlockfile:
case LibFunc_ftrylockfile:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_ferror:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F);
@@ -467,26 +648,38 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_fputc:
case LibFunc_fputc_unlocked:
case LibFunc_fstat:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
case LibFunc_frexp:
case LibFunc_frexpf:
case LibFunc_frexpl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
case LibFunc_fstatvfs:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_fgets:
case LibFunc_fgets_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
case LibFunc_fread:
case LibFunc_fread_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
return Changed;
case LibFunc_fwrite:
case LibFunc_fwrite_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
@@ -494,6 +687,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fputs:
case LibFunc_fputs_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -501,23 +695,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fscanf:
case LibFunc_fprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fgetpos:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_getc:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_getlogin_r:
+ Changed |= setRetAndArgsNoUndef(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
case LibFunc_getc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_getenv:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 0);
@@ -525,37 +731,45 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_gets:
case LibFunc_getchar:
case LibFunc_getchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_getitimer:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_getpwnam:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_ungetc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_uname:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_unlink:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_unsetenv:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_utime:
case LibFunc_utimes:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -564,30 +778,36 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_putc:
case LibFunc_putc_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_puts:
case LibFunc_printf:
case LibFunc_perror:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_pread:
// May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_pwrite:
// May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_putchar:
case LibFunc_putchar_unlocked:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
return Changed;
case LibFunc_popen:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -596,15 +816,18 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_pclose:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_vscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_vsscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -612,28 +835,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_vfscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_valloc:
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
case LibFunc_vprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_vfprintf:
case LibFunc_vsprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_vsnprintf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 2);
@@ -641,20 +871,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_open:
// May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_opendir:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_tmpfile:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
return Changed;
case LibFunc_times:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
@@ -666,24 +900,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotAccessMemory(F);
return Changed;
case LibFunc_lstat:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_lchown:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_qsort:
// May throw; places call through function pointer.
+ // Cannot give undef pointer/size
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 3);
return Changed;
case LibFunc_dunder_strdup:
case LibFunc_dunder_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
@@ -693,14 +932,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_under_IO_getc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_under_IO_putc:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_dunder_isoc99_scanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
@@ -708,12 +950,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_stat64:
case LibFunc_lstat64:
case LibFunc_statvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_dunder_isoc99_sscanf:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -721,6 +965,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_fopen64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
@@ -730,20 +975,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_fseeko64:
case LibFunc_ftello64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_tmpfile64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
return Changed;
case LibFunc_fstat64:
case LibFunc_fstatvfs64:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
case LibFunc_open64:
// May throw; "open" is a valid pthread cancellation point.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
@@ -751,21 +1000,67 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
// Currently some platforms have the restrict keyword on the arguments to
// gettimeofday. To be conservative, do not add noalias to gettimeofday's
// arguments.
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
+ case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow)
+ case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow)
+ case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow)
+ case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow)
+ Changed |= setDoesNotThrow(F);
+ LLVM_FALLTHROUGH;
+ case LibFunc_ZdlPv: // delete(void*)
+ case LibFunc_ZdlPvj: // delete(void*, unsigned int)
+ case LibFunc_ZdlPvm: // delete(void*, unsigned long)
+ case LibFunc_ZdaPv: // delete[](void*)
+ case LibFunc_ZdaPvj: // delete[](void*, unsigned int)
+ case LibFunc_ZdaPvm: // delete[](void*, unsigned long)
+ case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t)
+ case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t)
+ case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t)
+ case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t)
+ case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t)
+ case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t);
+ Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
+ Changed |= setArgsNoUndef(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow)
+ case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow)
+ case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow)
+ case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow)
+ case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow)
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow)
+ case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow)
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow)
+ // Nothrow operator new may return null pointer
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
+ Changed |= setRetNoUndef(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
+ return Changed;
case LibFunc_Znwj: // new(unsigned int)
case LibFunc_Znwm: // new(unsigned long)
case LibFunc_Znaj: // new[](unsigned int)
case LibFunc_Znam: // new[](unsigned long)
+ case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t)
+ case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t)
+ case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t)
+ case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t)
case LibFunc_msvc_new_int: // new(unsigned int)
case LibFunc_msvc_new_longlong: // new(unsigned long long)
case LibFunc_msvc_new_array_int: // new[](unsigned int)
case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
+ Changed |= setOnlyAccessesInaccessibleMemory(F);
// Operator new always returns a nonnull noalias pointer
+ Changed |= setRetNoUndef(F);
Changed |= setRetNonNull(F);
Changed |= setRetDoesNotAlias(F);
+ Changed |= setWillReturn(F);
return Changed;
// TODO: add LibFunc entries for:
// case LibFunc_memset_pattern4:
@@ -773,15 +1068,155 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
+ case LibFunc_memset:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setWillReturn(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyWritesMemory(F, 0);
+ return Changed;
// int __nvvm_reflect(const char *)
case LibFunc_nvvm_reflect:
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotAccessMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
-
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ Changed |= setSignExtendedArg(F, 1);
+ Changed |= setWillReturn(F);
+ return Changed;
+ case LibFunc_abs:
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acosh:
+ case LibFunc_acoshf:
+ case LibFunc_acoshl:
+ case LibFunc_acosl:
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asinh:
+ case LibFunc_asinhf:
+ case LibFunc_asinhl:
+ case LibFunc_asinl:
+ case LibFunc_atan:
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2l:
+ case LibFunc_atanf:
+ case LibFunc_atanh:
+ case LibFunc_atanhf:
+ case LibFunc_atanhl:
+ case LibFunc_atanl:
+ case LibFunc_cbrt:
+ case LibFunc_cbrtf:
+ case LibFunc_cbrtl:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ case LibFunc_cos:
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
+ case LibFunc_cospi:
+ case LibFunc_cospif:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_expm1:
+ case LibFunc_expm1f:
+ case LibFunc_expm1l:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ case LibFunc_fmodl:
+ case LibFunc_isascii:
+ case LibFunc_isdigit:
+ case LibFunc_labs:
+ case LibFunc_llabs:
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10l:
+ case LibFunc_log1p:
+ case LibFunc_log1pf:
+ case LibFunc_log1pl:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ case LibFunc_logb:
+ case LibFunc_logbf:
+ case LibFunc_logbl:
+ case LibFunc_logf:
+ case LibFunc_logl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_powl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ case LibFunc_sin:
+ case LibFunc_sincospif_stret:
+ case LibFunc_sinf:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl:
+ case LibFunc_sinl:
+ case LibFunc_sinpi:
+ case LibFunc_sinpif:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_strnlen:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ case LibFunc_tanhl:
+ case LibFunc_tanl:
+ case LibFunc_toascii:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotFreeMemory(F);
+ Changed |= setWillReturn(F);
+ return Changed;
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
@@ -930,6 +1365,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return CI;
}
+Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(
+ LibFunc_mempcpy, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {Dst, Src, Len}, B, TLI);
+}
+
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
@@ -969,7 +1413,7 @@ Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
Args, B, TLI, /*IsVaArgs=*/true);
@@ -979,7 +1423,7 @@ Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
ArrayRef<Value *> VariadicArgs, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ llvm::append_range(Args, VariadicArgs);
return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
{B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
/*IsVaArgs=*/true);
@@ -1087,12 +1531,15 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
StringRef Name, IRBuilderBase &B,
- const AttributeList &Attrs) {
+ const AttributeList &Attrs,
+ const TargetLibraryInfo *TLI = nullptr) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
Op1->getType(), Op2->getType());
+ if (TLI != nullptr)
+ inferLibFuncAttributes(M, Name, *TLI);
CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1128,7 +1575,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
StringRef Name = getFloatFnName(TLI, Op1->getType(),
DoubleFn, FloatFn, LongDoubleFn);
- return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+ return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI);
}
Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
index 52e859361c59..b2763900e154 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -96,11 +96,12 @@ void CallGraphUpdater::reanalyzeFunction(Function &Fn) {
}
}
-void CallGraphUpdater::registerOutlinedFunction(Function &NewFn) {
+void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn,
+ Function &NewFn) {
if (CG)
CG->addToCallGraph(&NewFn);
else if (LCG)
- LCG->addNewFunctionIntoSCC(NewFn, *SCC);
+ LCG->addSplitFunction(OriginalFn, NewFn);
}
void CallGraphUpdater::removeFunction(Function &DeadFn) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 5a47c1fd0b6c..bf08bf274737 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -112,9 +112,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
Builder.SetInsertPoint(&MergeBlock->front());
PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : OrigInst->users())
- UsersToUpdate.push_back(U);
+ SmallVector<User *, 16> UsersToUpdate(OrigInst->users());
for (User *U : UsersToUpdate)
U->replaceUsesOfWith(OrigInst, Phi);
Phi->addIncoming(OrigInst, OrigInst->getParent());
@@ -165,9 +163,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
// Save the users of the calling instruction. These uses will be changed to
// use the bitcast after we create it.
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : CB.users())
- UsersToUpdate.push_back(U);
+ SmallVector<User *, 16> UsersToUpdate(CB.users());
// Determine an appropriate location to create the bitcast for the return
// value. The location depends on if we have a call or invoke instruction.
@@ -430,10 +426,11 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
}
}
for (; I < NumArgs; I++) {
- // Vararg functions can have more arguments than paramters.
+ // Vararg functions can have more arguments than parameters.
assert(Callee->isVarArg());
if (CB.paramHasAttr(I, Attribute::StructRet)) {
- *FailureReason = "SRet arg to vararg function";
+ if (FailureReason)
+ *FailureReason = "SRet arg to vararg function";
return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 1ae17c64b8f6..1f649fe6c748 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -109,7 +109,7 @@ void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
auto *ValueToFr = U.get();
assert(L->contains(UserI->getParent()) &&
"Should not process an instruction that isn't inside the loop");
- if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, UserI, &DT))
+ if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT))
return;
LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n");
@@ -176,7 +176,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
assert(StepI && "Step instruction should have been found");
// Drop flags from the step instruction.
- if (!isGuaranteedNotToBeUndefOrPoison(StepI, StepI, &DT)) {
+ if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) {
LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n");
StepI->dropPoisonGeneratingFlags();
SE.forgetValue(StepI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 788983c15690..51a49574e55d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -36,6 +37,8 @@
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "clone-function"
+
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
@@ -137,15 +140,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
MD[SP].reset(SP);
}
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- OldFunc->getAllMetadata(MDs);
- for (auto MD : MDs) {
- NewFunc->addMetadata(
- MD.first,
- *MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
- }
+ // Everything else beyond this point deals with function instructions,
+ // so if we are dealing with a function declaration, we're done.
+ if (OldFunc->isDeclaration())
+ return;
// When we remap instructions, we want to avoid duplicating inlined
// DISubprograms, so record all subprograms we find as we duplicate
@@ -157,7 +155,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
- //
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
const BasicBlock &BB = *BI;
@@ -196,6 +193,19 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (DIType *Type : DIFinder.types())
VMap.MD()[Type].reset(Type);
+ // Duplicate the metadata that is attached to the cloned function.
+ // Subprograms/CUs/types that were already mapped to themselves won't be
+ // duplicated.
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ OldFunc->getAllMetadata(MDs);
+ for (auto MD : MDs) {
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
@@ -426,9 +436,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
CodeInfo->OperandBundleCallSites.push_back(NewInst);
// Recursively clone any reachable successor blocks.
- const Instruction *TI = BB->getTerminator();
- for (const BasicBlock *Succ : successors(TI))
- ToClone.push_back(Succ);
+ append_range(ToClone, successors(BB->getTerminator()));
}
if (CodeInfo) {
@@ -668,8 +676,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
- if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
- I->getSinglePredecessor() == &*I)) {
+ if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) {
BasicBlock *DeadBB = &*I++;
DeleteDeadBlock(DeadBB);
continue;
@@ -877,3 +884,108 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
return NewBB;
}
+
+void llvm::cloneNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ StringRef Ext, LLVMContext &Context) {
+ MDBuilder MDB(Context);
+
+ for (auto *ScopeList : NoAliasDeclScopes) {
+ for (auto &MDOperand : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ AliasScopeNode SNANode(MD);
+
+ std::string Name;
+ auto ScopeName = SNANode.getName();
+ if (!ScopeName.empty())
+ Name = (Twine(ScopeName) + ":" + Ext).str();
+ else
+ Name = std::string(Ext);
+
+ MDNode *NewScope = MDB.createAnonymousAliasScope(
+ const_cast<MDNode *>(SNANode.getDomain()), Name);
+ ClonedScopes.insert(std::make_pair(MD, NewScope));
+ }
+ }
+ }
+}
+
+void llvm::adaptNoAliasScopes(
+ Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ LLVMContext &Context) {
+ auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
+ bool NeedsReplacement = false;
+ SmallVector<Metadata *, 8> NewScopeList;
+ for (auto &MDOp : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
+ if (auto *NewMD = ClonedScopes.lookup(MD)) {
+ NewScopeList.push_back(NewMD);
+ NeedsReplacement = true;
+ continue;
+ }
+ NewScopeList.push_back(MD);
+ }
+ }
+ if (NeedsReplacement)
+ return MDNode::get(Context, NewScopeList);
+ return nullptr;
+ };
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
+ if (auto *NewScopeList = CloneScopeList(Decl->getScopeList()))
+ Decl->setScopeList(NewScopeList);
+
+ auto replaceWhenNeeded = [&](unsigned MD_ID) {
+ if (const MDNode *CSNoAlias = I->getMetadata(MD_ID))
+ if (auto *NewScopeList = CloneScopeList(CSNoAlias))
+ I->setMetadata(MD_ID, NewScopeList);
+ };
+ replaceWhenNeeded(LLVMContext::MD_noalias);
+ replaceWhenNeeded(LLVMContext::MD_alias_scope);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart,
+ Instruction *IEnd, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context);
+ // Identify instructions using metadata that needs adaptation
+ assert(IStart->getParent() == IEnd->getParent() && "different basic block ?");
+ auto ItStart = IStart->getIterator();
+ auto ItEnd = IEnd->getIterator();
+ ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range
+ for (auto &I : llvm::make_range(ItStart, ItEnd))
+ adaptNoAliasScopes(&I, ClonedScopes, Context);
+}
+
+void llvm::identifyNoAliasScopesToClone(
+ ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+ for (BasicBlock *BB : BBs)
+ for (Instruction &I : *BB)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
index 2c8c3abb2922..a6327bbf21bc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -117,10 +117,17 @@ std::unique_ptr<Module> llvm::CloneModule(
//
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I->getAllMetadata(MDs);
+ for (auto MD : MDs)
+ GV->addMetadata(MD.first,
+ *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+
if (I->isDeclaration())
continue;
- GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
if (!ShouldCloneDefinition(&*I)) {
// Skip after setting the correct linkage for an external reference.
GV->setLinkage(GlobalValue::ExternalLinkage);
@@ -129,12 +136,6 @@ std::unique_ptr<Module> llvm::CloneModule(
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- I->getAllMetadata(MDs);
- for (auto MD : MDs)
- GV->addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
-
copyComdat(GV, &*I);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 8cdbb9d35652..390925a03b73 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -535,6 +535,46 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
continue;
}
+ // Find bitcasts in the outlined region that have lifetime marker users
+ // outside that region. Replace the lifetime marker use with an
+ // outside region bitcast to avoid unnecessary alloca/reload instructions
+ // and extra lifetime markers.
+ SmallVector<Instruction *, 2> LifetimeBitcastUsers;
+ for (User *U : AI->users()) {
+ if (!definedInRegion(Blocks, U))
+ continue;
+
+ if (U->stripInBoundsConstantOffsets() != AI)
+ continue;
+
+ Instruction *Bitcast = cast<Instruction>(U);
+ for (User *BU : Bitcast->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
+ if (!IntrInst)
+ continue;
+
+ if (!IntrInst->isLifetimeStartOrEnd())
+ continue;
+
+ if (definedInRegion(Blocks, IntrInst))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
+ << *Bitcast << " in out-of-region lifetime marker "
+ << *IntrInst << "\n");
+ LifetimeBitcastUsers.push_back(IntrInst);
+ }
+ }
+
+ for (Instruction *I : LifetimeBitcastUsers) {
+ Module *M = AIFunc->getParent();
+ LLVMContext &Ctx = M->getContext();
+ auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ CastInst *CastI =
+ CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
+ I->replaceUsesOfWith(I->getOperand(1), CastI);
+ }
+
// Follow any bitcasts.
SmallVector<Instruction *, 2> Bitcasts;
SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
@@ -728,8 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits(
NewBB = BasicBlock::Create(ExitBB->getContext(),
ExitBB->getName() + ".split",
ExitBB->getParent(), ExitBB);
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
- pred_end(ExitBB));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
for (BasicBlock *PredBB : Preds)
if (Blocks.count(PredBB))
PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
@@ -895,6 +934,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::WriteOnly:
case Attribute::ZExt:
case Attribute::ImmArg:
+ case Attribute::ByRef:
case Attribute::EndAttrKinds:
case Attribute::EmptyKey:
case Attribute::TombstoneKey:
@@ -902,9 +942,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
case Attribute::Cold:
+ case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
case Attribute::MinSize:
+ case Attribute::NoCallback:
case Attribute::NoDuplicate:
case Attribute::NoFree:
case Attribute::NoImplicitFloat:
@@ -930,6 +972,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StrictFP:
case Attribute::UWTable:
case Attribute::NoCfCheck:
+ case Attribute::MustProgress:
+ case Attribute::NoProfile:
break;
}
@@ -1434,7 +1478,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// function arguments, as the parameters don't correspond to anything at the
// source level.
assert(OldSP->getUnit() && "Missing compile unit for subprogram");
- DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolvedNodes=*/false,
+ DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
OldSP->getUnit());
auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
@@ -1505,7 +1549,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
// function.
for (Instruction &I : instructions(NewFunc)) {
if (const DebugLoc &DL = I.getDebugLoc())
- I.setDebugLoc(DebugLoc::get(DL.getLine(), DL.getCol(), NewSP));
+ I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
// Loop info metadata may contain line locations. Fix them up.
auto updateLoopInfoLoc = [&Ctx,
@@ -1516,7 +1560,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
}
if (!TheCall.getDebugLoc())
- TheCall.setDebugLoc(DebugLoc::get(0, 0, OldSP));
+ TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
}
@@ -1739,7 +1783,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
const Function &NewFunc,
AssumptionCache *AC) {
for (auto AssumeVH : AC->assumptions()) {
- CallInst *I = dyn_cast_or_null<CallInst>(AssumeVH);
+ auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
if (!I)
continue;
@@ -1751,12 +1795,12 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
// that were previously in the old function, but that have now been moved
// to the new function.
for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
- CallInst *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
+ auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
if (!AffectedCI)
continue;
if (AffectedCI->getFunction() != &OldFunc)
return true;
- auto *AssumedInst = dyn_cast<Instruction>(AffectedCI->getOperand(0));
+ auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
if (AssumedInst->getFunction() != &OldFunc)
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 08047dc0f96e..ce982c7403aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -355,35 +355,32 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
// Check if there exists instructions which may throw, may synchonize, or may
// never return, from I to InsertPoint.
if (!isSafeToSpeculativelyExecute(&I))
- if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
- [](Instruction *I) {
- if (I->mayThrow())
- return true;
-
- const CallBase *CB = dyn_cast<CallBase>(I);
- if (!CB)
- return false;
- if (!CB->hasFnAttr(Attribute::WillReturn))
- return true;
- if (!CB->hasFnAttr(Attribute::NoSync))
- return true;
-
- return false;
- })) {
+ if (llvm::any_of(InstsToCheck, [](Instruction *I) {
+ if (I->mayThrow())
+ return true;
+
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+ if (!CB->hasFnAttr(Attribute::WillReturn))
+ return true;
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ return false;
+ })) {
return reportInvalidCandidate(I, MayThrowException);
}
// Check if I has any output/flow/anti dependences with instructions from \p
// StartInst to \p EndInst.
- if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
- [&DI, &I](Instruction *CurInst) {
- auto DepResult = DI->depends(&I, CurInst, true);
- if (DepResult &&
- (DepResult->isOutput() || DepResult->isFlow() ||
- DepResult->isAnti()))
- return true;
- return false;
- }))
+ if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) {
+ auto DepResult = DI->depends(&I, CurInst, true);
+ if (DepResult && (DepResult->isOutput() || DepResult->isFlow() ||
+ DepResult->isAnti()))
+ return true;
+ return false;
+ }))
return reportInvalidCandidate(I, HasDependences);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
index 8f98d81a3d79..3e4d53c10dc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -198,6 +199,18 @@ bool llvm::applyDebugifyMetadata(
return true;
}
+static bool applyDebugify(Function &F) {
+ Module &M = *F.getParent();
+ auto FuncIt = F.getIterator();
+ return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ "FunctionDebugify: ", /*ApplyToMF=*/nullptr);
+}
+
+static bool applyDebugify(Module &M) {
+ return applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF=*/nullptr);
+}
+
bool llvm::stripDebugifyMetadata(Module &M) {
bool Changed = false;
@@ -226,9 +239,7 @@ bool llvm::stripDebugifyMetadata(Module &M) {
NamedMDNode *NMD = M.getModuleFlagsMetadata();
if (!NMD)
return Changed;
- SmallVector<MDNode *, 4> Flags;
- for (MDNode *Flag : NMD->operands())
- Flags.push_back(Flag);
+ SmallVector<MDNode *, 4> Flags(NMD->operands());
NMD->clearOperands();
for (MDNode *Flag : Flags) {
MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1));
@@ -383,10 +394,7 @@ bool checkDebugifyMetadata(Module &M,
/// ModulePass for attaching synthetic debug info to everything, used with the
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
- bool runOnModule(Module &M) override {
- return applyDebugifyMetadata(M, M.functions(),
- "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
- }
+ bool runOnModule(Module &M) override { return applyDebugify(M); }
DebugifyModulePass() : ModulePass(ID) {}
@@ -400,12 +408,7 @@ struct DebugifyModulePass : public ModulePass {
/// FunctionPass for attaching synthetic debug info to instructions within a
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
- bool runOnFunction(Function &F) override {
- Module &M = *F.getParent();
- auto FuncIt = F.getIterator();
- return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
- "FunctionDebugify: ", /*ApplyToMF*/ nullptr);
- }
+ bool runOnFunction(Function &F) override { return applyDebugify(F); }
DebugifyFunctionPass() : FunctionPass(ID) {}
@@ -472,9 +475,32 @@ private:
} // end anonymous namespace
-ModulePass *createDebugifyModulePass() { return new DebugifyModulePass(); }
+void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
+ std::error_code EC;
+ raw_fd_ostream OS{Path, EC};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
+ return;
+ }
+
+ OS << "Pass Name" << ',' << "# of missing debug values" << ','
+ << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
+ << "Missing/Expected location ratio" << '\n';
+ for (const auto &Entry : Map) {
+ StringRef Pass = Entry.first;
+ DebugifyStatistics Stats = Entry.second;
+
+ OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
+ << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
+ << Stats.getEmptyLocationRatio() << '\n';
+ }
+}
+
+ModulePass *llvm::createDebugifyModulePass() {
+ return new DebugifyModulePass();
+}
-FunctionPass *createDebugifyFunctionPass() {
+FunctionPass *llvm::createDebugifyFunctionPass() {
return new DebugifyFunctionPass();
}
@@ -484,15 +510,15 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-ModulePass *createCheckDebugifyModulePass(bool Strip,
- StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
+ModulePass *llvm::createCheckDebugifyModulePass(bool Strip,
+ StringRef NameOfWrappedPass,
+ DebugifyStatsMap *StatsMap) {
return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
}
-FunctionPass *createCheckDebugifyFunctionPass(bool Strip,
- StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
+FunctionPass *
+llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass,
+ DebugifyStatsMap *StatsMap) {
return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
}
@@ -503,6 +529,41 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
return PreservedAnalyses::all();
}
+static bool isIgnoredPass(StringRef PassID) {
+ return isSpecialPass(PassID, {"PassManager", "PassAdaptor",
+ "AnalysisManagerProxy", "PrintFunctionPass",
+ "PrintModulePass", "BitcodeWriterPass",
+ "ThinLTOBitcodeWriterPass", "VerifierPass"});
+}
+
+void DebugifyEachInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) {
+ if (isIgnoredPass(P))
+ return;
+ if (any_isa<const Function *>(IR))
+ applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)));
+ else if (any_isa<const Module *>(IR))
+ applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)));
+ });
+ PIC.registerAfterPassCallback([this](StringRef P, Any IR,
+ const PreservedAnalyses &PassPA) {
+ if (isIgnoredPass(P))
+ return;
+ if (any_isa<const Function *>(IR)) {
+ auto &F = *const_cast<Function *>(any_cast<const Function *>(IR));
+ Module &M = *F.getParent();
+ auto It = F.getIterator();
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true, &StatsMap);
+ } else if (any_isa<const Module *>(IR)) {
+ auto &M = *const_cast<Module *>(any_cast<const Module *>(IR));
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, &StatsMap);
+ }
+ });
+}
+
char DebugifyModulePass::ID = 0;
static RegisterPass<DebugifyModulePass> DM("debugify",
"Attach debug info to everything");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index f84ff9e5aad1..26f8e21952cc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -83,7 +83,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
if (!EntryFunc.empty()) {
DebugLoc DL;
if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
Changed = true;
@@ -97,19 +97,14 @@ static bool runOnFunction(Function &F, bool PostInlining) {
continue;
// If T is preceded by a musttail call, that's the real terminator.
- Instruction *Prev = T->getPrevNode();
- if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev))
- Prev = BCI->getPrevNode();
- if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) {
- if (CI->isMustTailCall())
- T = CI;
- }
+ if (CallInst *CI = BB.getTerminatingMustTailCall())
+ T = CI;
DebugLoc DL;
if (DebugLoc TerminatorDL = T->getDebugLoc())
DL = TerminatorDL;
else if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(0, 0, SP);
+ DL = DILocation::get(SP->getContext(), 0, 0, SP);
insertCall(F, ExitFunc, T, DL);
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index cae9d9ee6d70..accedd5b4ee0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -41,6 +41,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
+ if (CallInst *CI = CurBB->getTerminatingMustTailCall())
+ TI = CI;
Builder.SetInsertPoint(TI);
return &Builder;
}
@@ -54,11 +56,12 @@ IRBuilder<> *EscapeEnumerator::Next() {
return nullptr;
// Find all 'call' instructions that may throw.
+ // We cannot tranform calls with musttail tag.
SmallVector<Instruction *, 16> Calls;
for (BasicBlock &BB : F)
for (Instruction &II : BB)
if (CallInst *CI = dyn_cast<CallInst>(&II))
- if (!CI->doesNotThrow())
+ if (!CI->doesNotThrow() && !CI->isMustTailCall())
Calls.push_back(CI);
if (Calls.empty())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
index c5dfbf9d92d1..732b00635e29 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -183,11 +183,11 @@ evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
std::function<Constant *(Constant *)> Func) {
Constant *Val;
while (!(Val = Func(Ptr))) {
- // If Ty is a struct, we can convert the pointer to the struct
+ // If Ty is a non-opaque struct, we can convert the pointer to the struct
// into a pointer to its first member.
// FIXME: This could be extended to support arrays as well.
Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
- if (!isa<StructType>(Ty))
+ if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque())
break;
IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
@@ -210,11 +210,7 @@ static Constant *getInitializer(Constant *C) {
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- auto findMemLoc = [this](Constant *Ptr) {
- DenseMap<Constant *, Constant *>::const_iterator I =
- MutatedMemory.find(Ptr);
- return I != MutatedMemory.end() ? I->second : nullptr;
- };
+ auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); };
if (Constant *Val = findMemLoc(P))
return Val;
@@ -551,6 +547,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
++CurInst;
continue;
+ } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) {
+ LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n");
+ ++CurInst;
+ continue;
}
LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 460ba9e97fc6..44af95eef67d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -66,6 +66,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/InitializePasses.h"
@@ -104,7 +105,7 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -304,11 +305,9 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
return Changed;
}
-bool FixIrreducible::runOnFunction(Function &F) {
+static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
@@ -318,13 +317,10 @@ bool FixIrreducible::runOnFunction(Function &F) {
// Any SCCs reduced are now already in the list of top-level loops, so simply
// add them all to the worklist.
- for (auto L : LI) {
- WorkList.push_back(L);
- }
+ append_range(WorkList, LI);
while (!WorkList.empty()) {
- auto L = WorkList.back();
- WorkList.pop_back();
+ auto L = WorkList.pop_back_val();
LLVM_DEBUG(dbgs() << "visiting loop with header "
<< L->getHeader()->getName() << "\n");
Changed |= makeReducible(LI, DT, *L);
@@ -335,3 +331,21 @@ bool FixIrreducible::runOnFunction(Function &F) {
return Changed;
}
+
+bool FixIrreducible::runOnFunction(Function &F) {
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ return FixIrreducibleImpl(F, LI, DT);
+}
+
+PreservedAnalyses FixIrreduciblePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ if (!FixIrreducibleImpl(F, LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 101cb232d8ae..2696557a719f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -124,12 +124,17 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
Type *TyL = LA.getValueAsType();
Type *TyR = RA.getValueAsType();
- if (TyL && TyR)
- return cmpTypes(TyL, TyR);
+ if (TyL && TyR) {
+ if (int Res = cmpTypes(TyL, TyR))
+ return Res;
+ continue;
+ }
// Two pointers, at least one null, so the comparison result is
// independent of the value of a real pointer.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
+ if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR))
+ return Res;
+ continue;
}
if (LA < RA)
return -1;
@@ -286,6 +291,7 @@ int FunctionComparator::cmpConstants(const Constant *L,
switch (L->getValueID()) {
case Value::UndefValueVal:
+ case Value::PoisonValueVal:
case Value::ConstantTokenNoneVal:
return TypesRes;
case Value::ConstantIntVal: {
@@ -488,12 +494,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
case Type::ScalableVectorTyID: {
auto *STyL = cast<VectorType>(TyL);
auto *STyR = cast<VectorType>(TyR);
- if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable)
- return cmpNumbers(STyL->getElementCount().Scalable,
- STyR->getElementCount().Scalable);
- if (STyL->getElementCount().Min != STyR->getElementCount().Min)
- return cmpNumbers(STyL->getElementCount().Min,
- STyR->getElementCount().Min);
+ if (STyL->getElementCount().isScalable() !=
+ STyR->getElementCount().isScalable())
+ return cmpNumbers(STyL->getElementCount().isScalable(),
+ STyR->getElementCount().isScalable());
+ if (STyL->getElementCount() != STyR->getElementCount())
+ return cmpNumbers(STyL->getElementCount().getKnownMinValue(),
+ STyR->getElementCount().getKnownMinValue());
return cmpTypes(STyL->getElementType(), STyR->getElementType());
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index fe58f0e0fe40..f782396be7b6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -136,7 +136,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::Stored;
}
}
- } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<AddrSpaceCastInst>(I)) {
// Skip over bitcasts and GEPs; we don't care about the type or offset
// of the pointer.
if (analyzeGlobalAux(I, GS, VisitedUsers))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 4cfc9358499a..4dbcbf80d3da 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -30,7 +30,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight(
void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
CallInst *Guard, bool UseWC) {
OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
- SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
+ SmallVector<Value *, 4> Args(drop_begin(Guard->args()));
auto *CheckBB = Guard->getParent();
auto *DeoptBlockTerm =
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
deleted file mode 100644
index ea93f99d69e3..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Generating inliner statistics for imported functions, mostly useful for
-// ThinLTO.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <iomanip>
-#include <sstream>
-using namespace llvm;
-
-ImportedFunctionsInliningStatistics::InlineGraphNode &
-ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
-
- auto &ValueLookup = NodesMap[F.getName()];
- if (!ValueLookup) {
- ValueLookup = std::make_unique<InlineGraphNode>();
- ValueLookup->Imported = F.hasMetadata("thinlto_src_module");
- }
- return *ValueLookup;
-}
-
-void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
- const Function &Callee) {
-
- InlineGraphNode &CallerNode = createInlineGraphNode(Caller);
- InlineGraphNode &CalleeNode = createInlineGraphNode(Callee);
- CalleeNode.NumberOfInlines++;
-
- if (!CallerNode.Imported && !CalleeNode.Imported) {
- // Direct inline from not imported callee to not imported caller, so we
- // don't have to add this to graph. It might be very helpful if you wanna
- // get the inliner statistics in compile step where there are no imported
- // functions. In this case the graph would be empty.
- CalleeNode.NumberOfRealInlines++;
- return;
- }
-
- CallerNode.InlinedCallees.push_back(&CalleeNode);
- if (!CallerNode.Imported) {
- // We could avoid second lookup, but it would make the code ultra ugly.
- auto It = NodesMap.find(Caller.getName());
- assert(It != NodesMap.end() && "The node should be already there.");
- // Save Caller as a starting node for traversal. The string has to be one
- // from map because Caller can disappear (and function name with it).
- NonImportedCallers.push_back(It->first());
- }
-}
-
-void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
- ModuleName = M.getName();
- for (const auto &F : M.functions()) {
- if (F.isDeclaration())
- continue;
- AllFunctions++;
- ImportedFunctions += int(F.hasMetadata("thinlto_src_module"));
- }
-}
-static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
- const char *PercentageOfMsg,
- bool LineEnd = true) {
- double Result = 0;
- if (All != 0)
- Result = 100 * static_cast<double>(Fraction) / All;
-
- std::stringstream Str;
- Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result
- << "% of " << PercentageOfMsg << "]";
- if (LineEnd)
- Str << "\n";
- return Str.str();
-}
-
-void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
- calculateRealInlines();
- NonImportedCallers.clear();
-
- int32_t InlinedImportedFunctionsCount = 0;
- int32_t InlinedNotImportedFunctionsCount = 0;
-
- int32_t InlinedImportedFunctionsToImportingModuleCount = 0;
- int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0;
-
- const auto SortedNodes = getSortedNodes();
- std::string Out;
- Out.reserve(5000);
- raw_string_ostream Ostream(Out);
-
- Ostream << "------- Dumping inliner stats for [" << ModuleName
- << "] -------\n";
-
- if (Verbose)
- Ostream << "-- List of inlined functions:\n";
-
- for (const auto &Node : SortedNodes) {
- assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines);
- if (Node->second->NumberOfInlines == 0)
- continue;
-
- if (Node->second->Imported) {
- InlinedImportedFunctionsCount++;
- InlinedImportedFunctionsToImportingModuleCount +=
- int(Node->second->NumberOfRealInlines > 0);
- } else {
- InlinedNotImportedFunctionsCount++;
- InlinedNotImportedFunctionsToImportingModuleCount +=
- int(Node->second->NumberOfRealInlines > 0);
- }
-
- if (Verbose)
- Ostream << "Inlined "
- << (Node->second->Imported ? "imported " : "not imported ")
- << "function [" << Node->first() << "]"
- << ": #inlines = " << Node->second->NumberOfInlines
- << ", #inlines_to_importing_module = "
- << Node->second->NumberOfRealInlines << "\n";
- }
-
- auto InlinedFunctionsCount =
- InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount;
- auto NotImportedFuncCount = AllFunctions - ImportedFunctions;
- auto ImportedNotInlinedIntoModule =
- ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount;
-
- Ostream << "-- Summary:\n"
- << "All functions: " << AllFunctions
- << ", imported functions: " << ImportedFunctions << "\n"
- << getStatString("inlined functions", InlinedFunctionsCount,
- AllFunctions, "all functions")
- << getStatString("imported functions inlined anywhere",
- InlinedImportedFunctionsCount, ImportedFunctions,
- "imported functions")
- << getStatString("imported functions inlined into importing module",
- InlinedImportedFunctionsToImportingModuleCount,
- ImportedFunctions, "imported functions",
- /*LineEnd=*/false)
- << getStatString(", remaining", ImportedNotInlinedIntoModule,
- ImportedFunctions, "imported functions")
- << getStatString("non-imported functions inlined anywhere",
- InlinedNotImportedFunctionsCount,
- NotImportedFuncCount, "non-imported functions")
- << getStatString(
- "non-imported functions inlined into importing module",
- InlinedNotImportedFunctionsToImportingModuleCount,
- NotImportedFuncCount, "non-imported functions");
- Ostream.flush();
- dbgs() << Out;
-}
-
-void ImportedFunctionsInliningStatistics::calculateRealInlines() {
- // Removing duplicated Callers.
- llvm::sort(NonImportedCallers);
- NonImportedCallers.erase(
- std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
- NonImportedCallers.end());
-
- for (const auto &Name : NonImportedCallers) {
- auto &Node = *NodesMap[Name];
- if (!Node.Visited)
- dfs(Node);
- }
-}
-
-void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) {
- assert(!GraphNode.Visited);
- GraphNode.Visited = true;
- for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) {
- InlinedFunctionNode->NumberOfRealInlines++;
- if (!InlinedFunctionNode->Visited)
- dfs(*InlinedFunctionNode);
- }
-}
-
-ImportedFunctionsInliningStatistics::SortedNodesTy
-ImportedFunctionsInliningStatistics::getSortedNodes() {
- SortedNodesTy SortedNodes;
- SortedNodes.reserve(NodesMap.size());
- for (const NodesMapTy::value_type& Node : NodesMap)
- SortedNodes.push_back(&Node);
-
- llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
- const SortedNodesTy::value_type &Rhs) {
- if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
- return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
- if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
- return Lhs->second->NumberOfRealInlines >
- Rhs->second->NumberOfRealInlines;
- return Lhs->first() < Rhs->first();
- });
- return SortedNodes;
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 9d8f59d62d6d..a2b72e4e7f03 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -77,7 +78,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
if (CI.isNoBuiltin() || !CI.getCalledFunction())
return;
- const std::string ScalarName = std::string(CI.getCalledFunction()->getName());
+ StringRef ScalarName = CI.getCalledFunction()->getName();
+
// Nothing to be done if the TLI thinks the function is not
// vectorizable.
if (!TLI.isFunctionVectorizable(ScalarName))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index b0b7ca484798..0ac8fa537f4e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -79,6 +79,12 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
cl::Hidden,
cl::desc("Convert noalias attributes to metadata during inlining."));
+static cl::opt<bool>
+ UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true),
+ cl::desc("Use the llvm.experimental.noalias.scope.decl "
+ "intrinsic during inlining."));
+
// Disabled by default, because the added alignment assumptions may increase
// compile-time and block optimizations. This option is not suitable for use
// with frontends that emit comprehensive parameter alignment annotations.
@@ -771,146 +777,158 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
-/// When inlining a call site that has !llvm.mem.parallel_loop_access or
-/// llvm.access.group metadata, that metadata should be propagated to all
-/// memory-accessing cloned instructions.
-static void PropagateParallelLoopAccessMetadata(CallBase &CB,
- ValueToValueMapTy &VMap) {
- MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
- if (!M && !CallAccessGroup)
+/// When inlining a call site that has !llvm.mem.parallel_loop_access,
+/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
+/// be propagated to all memory-accessing cloned instructions.
+static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
+ MDNode *MemParallelLoopAccess =
+ CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
+ MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
+ if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
return;
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
VMI != VMIE; ++VMI) {
- if (!VMI->second)
+ // Check that key is an instruction, to skip the Argument mapping, which
+ // points to an instruction in the original function, not the inlined one.
+ if (!VMI->second || !isa<Instruction>(VMI->first))
continue;
Instruction *NI = dyn_cast<Instruction>(VMI->second);
if (!NI)
continue;
- if (M) {
- if (MDNode *PM =
- NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
- M = MDNode::concatenate(PM, M);
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- } else if (NI->mayReadOrWriteMemory()) {
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- }
- }
+ // This metadata is only relevant for instructions that access memory.
+ if (!NI->mayReadOrWriteMemory())
+ continue;
- if (NI->mayReadOrWriteMemory()) {
- MDNode *UnitedAccGroups = uniteAccessGroups(
- NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
- NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
+ if (MemParallelLoopAccess) {
+ // TODO: This probably should not overwrite MemParalleLoopAccess.
+ MemParallelLoopAccess = MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access),
+ MemParallelLoopAccess);
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access,
+ MemParallelLoopAccess);
}
+
+ if (AccessGroup)
+ NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
+ NI->getMetadata(LLVMContext::MD_access_group), AccessGroup));
+
+ if (AliasScope)
+ NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope));
+
+ if (NoAlias)
+ NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_noalias), NoAlias));
}
}
-/// When inlining a function that contains noalias scope metadata,
-/// this metadata needs to be cloned so that the inlined blocks
-/// have different "unique scopes" at every call site. Were this not done, then
-/// aliasing scopes from a function inlined into a caller multiple times could
-/// not be differentiated (and this would lead to miscompiles because the
-/// non-aliasing property communicated by the metadata could have
-/// call-site-specific control dependencies).
-static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
- const Function *CalledFunc = CB.getCalledFunction();
+/// Utility for cloning !noalias and !alias.scope metadata. When a code region
+/// using scoped alias metadata is inlined, the aliasing relationships may not
+/// hold between the two version. It is necessary to create a deep clone of the
+/// metadata, putting the two versions in separate scope domains.
+class ScopedAliasMetadataDeepCloner {
+ using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
SetVector<const MDNode *> MD;
-
- // Note: We could only clone the metadata if it is already used in the
- // caller. I'm omitting that check here because it might confuse
- // inter-procedural alias analysis passes. We can revisit this if it becomes
- // an efficiency or overhead problem.
-
- for (const BasicBlock &I : *CalledFunc)
- for (const Instruction &J : I) {
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
+ MetadataMap MDMap;
+ void addRecursiveMetadataUses();
+
+public:
+ ScopedAliasMetadataDeepCloner(const Function *F);
+
+ /// Create a new clone of the scoped alias metadata, which will be used by
+ /// subsequent remap() calls.
+ void clone();
+
+ /// Remap instructions in the given VMap from the original to the cloned
+ /// metadata.
+ void remap(ValueToValueMapTy &VMap);
+};
+
+ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
+ const Function *F) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
MD.insert(M);
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
+ if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
MD.insert(M);
- }
- if (MD.empty())
- return;
+ // We also need to clone the metadata in noalias intrinsics.
+ if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ MD.insert(Decl->getScopeList());
+ }
+ }
+ addRecursiveMetadataUses();
+}
- // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
- // the set.
+void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
while (!Queue.empty()) {
const MDNode *M = cast<MDNode>(Queue.pop_back_val());
- for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
- if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
- if (MD.insert(M1))
- Queue.push_back(M1);
+ for (const Metadata *Op : M->operands())
+ if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
+ if (MD.insert(OpMD))
+ Queue.push_back(OpMD);
}
+}
+
+void ScopedAliasMetadataDeepCloner::clone() {
+ assert(MDMap.empty() && "clone() already called ?");
- // Now we have a complete set of all metadata in the chains used to specify
- // the noalias scopes and the lists of those scopes.
SmallVector<TempMDTuple, 16> DummyNodes;
- DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
for (const MDNode *I : MD) {
- DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+ DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None));
MDMap[I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
// metadata references with either a dummy node or an already-created new
// node.
+ SmallVector<Metadata *, 4> NewOps;
for (const MDNode *I : MD) {
- SmallVector<Metadata *, 4> NewOps;
- for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
- const Metadata *V = I->getOperand(i);
- if (const MDNode *M = dyn_cast<MDNode>(V))
+ for (const Metadata *Op : I->operands()) {
+ if (const MDNode *M = dyn_cast<MDNode>(Op))
NewOps.push_back(MDMap[M]);
else
- NewOps.push_back(const_cast<Metadata *>(V));
+ NewOps.push_back(const_cast<Metadata *>(Op));
}
- MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
+ MDNode *NewM = MDNode::get(I->getContext(), NewOps);
MDTuple *TempM = cast<MDTuple>(MDMap[I]);
assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
+ NewOps.clear();
}
+}
- // Now replace the metadata in the new inlined instructions with the
- // repacements from the map.
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (!VMI->second)
+void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
+ if (MDMap.empty())
+ return; // Nothing to do.
+
+ for (auto Entry : VMap) {
+ // Check that key is an instruction, to skip the Argument mapping, which
+ // points to an instruction in the original function, not the inlined one.
+ if (!Entry->second || !isa<Instruction>(Entry->first))
continue;
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
+ Instruction *I = dyn_cast<Instruction>(Entry->second);
+ if (!I)
continue;
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had alias scope metadata (a list of scopes to
- // which instructions inside it might belong), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope))
- NI->setMetadata(LLVMContext::MD_alias_scope, M);
- }
+ if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
+ I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]);
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had noalias metadata (a list of scopes with
- // which instructions inside it don't alias), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_noalias, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias))
- NI->setMetadata(LLVMContext::MD_noalias, M);
- }
+ if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
+ I->setMetadata(LLVMContext::MD_noalias, MDMap[M]);
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
+ Decl->setScopeList(MDMap[Decl->getScopeList()]);
}
}
@@ -967,6 +985,17 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
// property of the callee, but also all control dependencies in the caller.
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
NewScopes.insert(std::make_pair(A, NewScope));
+
+ if (UseNoAliasIntrinsic) {
+ // Introduce a llvm.experimental.noalias.scope.decl for the noalias
+ // argument.
+ MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
+ auto *NoAliasDecl =
+ IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
+ // Ignore the result for now. The result will be used when the
+ // llvm.noalias intrinsic is introduced.
+ (void)NoAliasDecl;
+ }
}
// Iterate over all new instructions in the map; for all memory-access
@@ -1037,7 +1066,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
SmallVector<const Value *, 4> Objects;
- GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
+ getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
for (const Value *O : Objects)
ObjSet.insert(O);
@@ -1245,7 +1274,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
Function *CalledFunc = CB.getCalledFunction();
for (Argument &Arg : CalledFunc->args()) {
unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
- if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) {
+ if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) {
if (!DTCalculated) {
DT.recalculate(*CB.getCaller());
DTCalculated = true;
@@ -1448,8 +1477,8 @@ static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
LLVMContext &Ctx,
DenseMap<const MDNode *, MDNode *> &IANodes) {
auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
- return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
- IA);
+ return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
+ OrigDL.getScope(), IA);
}
/// Update inlined instructions' line numbers to
@@ -1573,8 +1602,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
int64_t CallCount =
- std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
- CalleeEntryCount.getCount());
+ std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount());
updateProfileCallee(Callee, -CallCount, &VMap);
}
@@ -1765,6 +1793,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Keep a list of pair (dst, src) to emit byval initializations.
SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ // When inlining a function that contains noalias scope metadata,
+ // this metadata needs to be cloned so that the inlined blocks
+ // have different "unique scopes" at every call site.
+ // Track the metadata that must be cloned. Do this before other changes to
+ // the function, so that we do not get in trouble when inlining caller ==
+ // callee.
+ ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
+
auto &DL = Caller->getParent()->getDataLayout();
// Calculate the vector of arguments to pass into the function cloner, which
@@ -1855,11 +1891,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
ChildOB.Inputs.size());
- MergedDeoptArgs.insert(MergedDeoptArgs.end(),
- ParentDeopt->Inputs.begin(),
- ParentDeopt->Inputs.end());
- MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
- ChildOB.Inputs.end());
+ llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
+ llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
}
@@ -1885,8 +1918,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
fixupLineNumbers(Caller, FirstNewBlock, &CB,
CalledFunc->getSubprogram() != nullptr);
- // Clone existing noalias metadata if necessary.
- CloneAliasScopeMetadata(CB, VMap);
+ // Now clone the inlined noalias scope metadata.
+ SAMetadataCloner.clone();
+ SAMetadataCloner.remap(VMap);
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
@@ -1895,8 +1929,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// function which feed into its return value.
AddReturnAttributes(CB, VMap);
- // Propagate llvm.mem.parallel_loop_access if necessary.
- PropagateParallelLoopAccessMetadata(CB, VMap);
+ // Propagate metadata on the callsite if necessary.
+ PropagateCallSiteMetadata(CB, VMap);
// Register any cloned assumptions.
if (IFI.GetAssumptionCache)
@@ -2061,7 +2095,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
dyn_cast<ConstantInt>(AI->getArraySize())) {
auto &DL = Caller->getParent()->getDataLayout();
Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
// Don't add markers for zero-sized allocas.
@@ -2070,9 +2104,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Check that array size doesn't saturate uint64_t and doesn't
// overflow when it's multiplied by type size.
- if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+ if (!AllocaTypeSize.isScalable() &&
+ AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
- AllocaTypeSize) {
+ AllocaTypeSize.getFixedSize()) {
AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
AllocaArraySize * AllocaTypeSize);
}
@@ -2198,10 +2233,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// match the callee's return type, we also need to change the return type of
// the intrinsic.
if (Caller->getReturnType() == CB.getType()) {
- auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
+ llvm::erase_if(Returns, [](ReturnInst *RI) {
return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
});
- Returns.erase(NewEnd, Returns.end());
} else {
SmallVector<ReturnInst *, 8> NormalReturns;
Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
@@ -2225,8 +2259,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
auto *CurBB = RI->getParent();
RI->eraseFromParent();
- SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
- DeoptCall->arg_end());
+ SmallVector<Value *, 4> CallArgs(DeoptCall->args());
SmallVector<OperandBundleDef, 1> OpBundles;
DeoptCall->getOperandBundlesAsDefs(OpBundles);
@@ -2463,7 +2496,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// If we inlined any musttail calls and the original return is now
// unreachable, delete it. It can only contain a bitcast and ret.
- if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ if (InlinedMustTailCalls && pred_empty(AfterCallBB))
AfterCallBB->eraseFromParent();
// We should always be able to fold the entry block of the function into the
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 8e339fe46d45..f3499c9c8aed 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -13,43 +13,52 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/InstructionNamer.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
+
using namespace llvm;
namespace {
- struct InstNamer : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {
- initializeInstNamerPass(*PassRegistry::getPassRegistry());
- }
+void nameInstructions(Function &F) {
+ for (auto &Arg : F.args()) {
+ if (!Arg.hasName())
+ Arg.setName("arg");
+ }
- void getAnalysisUsage(AnalysisUsage &Info) const override {
- Info.setPreservesAll();
+ for (BasicBlock &BB : F) {
+ if (!BB.hasName())
+ BB.setName("bb");
+
+ for (Instruction &I : BB) {
+ if (!I.hasName() && !I.getType()->isVoidTy())
+ I.setName("i");
}
+ }
+}
- bool runOnFunction(Function &F) override {
- for (auto &Arg : F.args())
- if (!Arg.hasName())
- Arg.setName("arg");
+struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
- for (BasicBlock &BB : F) {
- if (!BB.hasName())
- BB.setName("bb");
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
+ Info.setPreservesAll();
+ }
- for (Instruction &I : BB)
- if (!I.hasName() && !I.getType()->isVoidTy())
- I.setName("i");
- }
- return true;
- }
- };
+ bool runOnFunction(Function &F) override {
+ nameInstructions(F);
+ return true;
+ }
+};
char InstNamer::ID = 0;
-}
+ } // namespace
INITIALIZE_PASS(InstNamer, "instnamer",
"Assign names to anonymous instructions", false, false)
@@ -61,3 +70,9 @@ char &llvm::InstructionNamerID = InstNamer::ID;
FunctionPass *llvm::createInstructionNamerPass() {
return new InstNamer();
}
+
+PreservedAnalyses InstructionNamerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ nameInstructions(F);
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index b1a1c564d217..7437701f5339 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
@@ -77,12 +78,15 @@ static bool isExitBlock(BasicBlock *BB,
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
const DominatorTree &DT, const LoopInfo &LI,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE, IRBuilderBase &Builder,
+ SmallVectorImpl<PHINode *> *PHIsToRemove) {
SmallVector<Use *, 16> UsesToRewrite;
- SmallSetVector<PHINode *, 16> PHIsToRemove;
+ SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
PredIteratorCache PredCache;
bool Changed = false;
+ IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
+
// Cache the Loop ExitBlocks across this loop. We expect to get a lot of
// instructions within the same loops, computing the exit blocks is
// expensive, and we're not mutating the loop structure.
@@ -107,6 +111,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(U);
@@ -151,12 +159,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// If we already inserted something for this BB, don't reprocess it.
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
-
- PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa", &ExitBB->front());
+ Builder.SetInsertPoint(&ExitBB->front());
+ PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa");
// Get the debug location from the original instruction.
PN->setDebugLoc(I->getDebugLoc());
- // Add inputs from inside the loop for this PHI.
+
+ // Add inputs from inside the loop for this PHI. This is valid
+ // because `I` dominates `ExitBB` (checked above). This implies
+ // that every incoming block/edge is dominated by `I` as well,
+ // i.e. we can add uses of `I` to those incoming edges/append to the incoming
+ // blocks without violating the SSA dominance property.
for (BasicBlock *Pred : PredCache.get(ExitBB)) {
PN->addIncoming(I, Pred);
@@ -190,15 +203,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Rewrite all uses outside the loop in terms of the new PHIs we just
// inserted.
for (Use *UseToRewrite : UsesToRewrite) {
- // If this use is in an exit block, rewrite to use the newly inserted PHI.
- // This is required for correctness because SSAUpdate doesn't handle uses
- // in the same block. It assumes the PHI we inserted is at the end of the
- // block.
Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
+
+ // For practical purposes, we consider that the use in a PHI
+ // occurs in the respective predecessor block. For more info,
+ // see the `phi` doc in LangRef and the LCSSA doc.
if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UseToRewrite);
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses
+ // in the same block. It assumes the PHI we inserted is at the end of the
+ // block.
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
UseToRewrite->set(&UserBB->front());
continue;
@@ -248,27 +265,29 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Worklist.push_back(PostProcessPN);
// Keep track of PHI nodes that we want to remove because they did not have
- // any uses rewritten. If the new PHI is used, store it so that we can
- // try to propagate dbg.value intrinsics to it.
- SmallVector<PHINode *, 2> NeedDbgValues;
+ // any uses rewritten.
for (PHINode *PN : AddedPHIs)
if (PN->use_empty())
- PHIsToRemove.insert(PN);
- else
- NeedDbgValues.push_back(PN);
- insertDebugValuesForPHIs(InstBB, NeedDbgValues);
+ LocalPHIsToRemove.insert(PN);
+
Changed = true;
}
- // Remove PHI nodes that did not have any uses rewritten. We need to redo the
- // use_empty() check here, because even if the PHI node wasn't used when added
- // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
- // not guaranteed to handle trees/cycles of PHI nodes that only are used by
- // each other. Such situations has only been noticed when the input IR
- // contains unreachable code, and leaving some extra redundant PHI nodes in
- // such situations is considered a minor problem.
- for (PHINode *PN : PHIsToRemove)
- if (PN->use_empty())
- PN->eraseFromParent();
+
+ // Remove PHI nodes that did not have any uses rewritten or add them to
+ // PHIsToRemove, so the caller can remove them after some additional cleanup.
+ // We need to redo the use_empty() check here, because even if the PHI node
+ // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be
+ // using it. This cleanup is not guaranteed to handle trees/cycles of PHI
+ // nodes that only are used by each other. Such situations has only been
+ // noticed when the input IR contains unreachable code, and leaving some extra
+ // redundant PHI nodes in such situations is considered a minor problem.
+ if (PHIsToRemove) {
+ PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end());
+ } else {
+ for (PHINode *PN : LocalPHIsToRemove)
+ if (PN->use_empty())
+ PN->eraseFromParent();
+ }
return Changed;
}
@@ -276,12 +295,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
static void computeBlocksDominatingExits(
Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
- SmallVector<BasicBlock *, 8> BBWorklist;
-
// We start from the exit blocks, as every block trivially dominates itself
// (not strictly).
- for (BasicBlock *BB : ExitBlocks)
- BBWorklist.push_back(BB);
+ SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
while (!BBWorklist.empty()) {
BasicBlock *BB = BBWorklist.pop_back_val();
@@ -369,7 +385,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
Worklist.push_back(&I);
}
}
- Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
+
+ IRBuilder<> Builder(L.getHeader()->getContext());
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
// If we modified the code, remove any caches about the loop from SCEV to
// avoid dangling entries.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index da40c342af3a..477ea458c763 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -91,6 +91,24 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "local"
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd");
+
+static cl::opt<bool> PHICSEDebugHash(
+ "phicse-debug-hash",
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true),
+#else
+ cl::init(false),
+#endif
+ cl::Hidden,
+ cl::desc("Perform extra assertion checking to verify that PHINodes's hash "
+ "function is well-behaved w.r.t. its isEqual predicate"));
+
+static cl::opt<unsigned> PHICSENumPHISmallSize(
+ "phicse-num-phi-smallsize", cl::init(32), cl::Hidden,
+ cl::desc(
+ "When the basic block contains not more than this number of PHI nodes, "
+ "perform a (faster!) exhaustive search instead of set-driven one."));
// Max recursion depth for collectBitParts used when detecting bswap and
// bitreverse idioms
@@ -116,27 +134,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Branch - See if we are conditional jumping on constant
if (auto *BI = dyn_cast<BranchInst>(T)) {
if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+
BasicBlock *Dest1 = BI->getSuccessor(0);
BasicBlock *Dest2 = BI->getSuccessor(1);
- if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
- // Are we branching on constant?
- // YES. Change to unconditional branch...
- BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
- BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
-
- // Let the basic block know that we are letting go of it. Based on this,
- // it will adjust it's PHI nodes.
- OldDest->removePredecessor(BB);
-
- // Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Destination);
- BI->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
- return true;
- }
-
if (Dest2 == Dest1) { // Conditional branch to same location?
// This branch matches something like this:
// br bool %cond, label %Dest, label %Dest
@@ -154,6 +155,25 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
+
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}});
+ return true;
+ }
+
return false;
}
@@ -170,6 +190,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
TheOnlyDest = SI->case_begin()->getCaseSuccessor();
}
+ bool Changed = false;
+
// Figure out which case it goes to.
for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
// Found case matching a constant operand?
@@ -208,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
DefaultDest->removePredecessor(ParentBB);
i = SI->removeCase(i);
e = SI->case_end();
- if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, ParentBB, DefaultDest}});
+ Changed = true;
continue;
}
@@ -236,19 +256,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(SI->getNumSuccessors() - 1);
+
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
// Remove entries from PHI nodes which we no longer branch to...
+ BasicBlock *SuccToKeep = TheOnlyDest;
for (BasicBlock *Succ : successors(SI)) {
+ if (DTU && Succ != TheOnlyDest)
+ RemovedSuccessors.insert(Succ);
// Found case matching a constant operand?
- if (Succ == TheOnlyDest) {
- TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
+ if (Succ == SuccToKeep) {
+ SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest
} else {
Succ->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
}
}
@@ -257,8 +277,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -296,7 +321,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SI->eraseFromParent();
return true;
}
- return false;
+ return Changed;
}
if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {
@@ -304,22 +329,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (auto *BA =
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(IBI->getNumDestinations() - 1);
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
+ BasicBlock *SuccToKeep = TheOnlyDest;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- if (IBI->getDestination(i) == TheOnlyDest) {
- TheOnlyDest = nullptr;
+ BasicBlock *DestBB = IBI->getDestination(i);
+ if (DTU && DestBB != TheOnlyDest)
+ RemovedSuccessors.insert(DestBB);
+ if (IBI->getDestination(i) == SuccToKeep) {
+ SuccToKeep = nullptr;
} else {
- BasicBlock *ParentBB = IBI->getParent();
- BasicBlock *DestBB = IBI->getDestination(i);
- DestBB->removePredecessor(ParentBB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
+ DestBB->removePredecessor(BB);
}
}
Value *Address = IBI->getAddress();
@@ -336,13 +359,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
// 'unreachable' instruction.
- if (TheOnlyDest) {
+ if (SuccToKeep) {
BB->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), BB);
}
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
}
@@ -392,6 +420,14 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
}
+ if (auto *CB = dyn_cast<CallBase>(I)) {
+ // Treat calls that may not return as alive.
+ // TODO: Remove the intrinsic escape hatch once all intrinsics set
+ // willreturn properly.
+ if (!CB->willReturn() && !isa<IntrinsicInst>(I))
+ return false;
+ }
+
if (!I->mayHaveSideEffects())
return true;
@@ -453,21 +489,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
- Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
+ Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<WeakTrackingVH, 16> DeadInsts;
DeadInsts.push_back(I);
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
return true;
}
bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
unsigned S = 0, E = DeadInsts.size(), Alive = 0;
for (; S != E; ++S) {
auto *I = cast<Instruction>(DeadInsts[S]);
@@ -478,13 +517,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
}
if (Alive == E)
return false;
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+ AboutToDeleteCallback);
return true;
}
void llvm::RecursivelyDeleteTriviallyDeadInstructions(
SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ std::function<void(Value *)> AboutToDeleteCallback) {
// Process the dead instruction list until empty.
while (!DeadInsts.empty()) {
Value *V = DeadInsts.pop_back_val();
@@ -498,6 +539,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
// Don't lose the debug info while deleting the instructions.
salvageDebugInfo(*I);
+ if (AboutToDeleteCallback)
+ AboutToDeleteCallback(I);
+
// Null out all of the instruction's operands to see if any operand becomes
// dead as we go.
for (Use &OpU : I->operands()) {
@@ -675,34 +719,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Control Flow Graph Restructuring.
//
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DomTreeUpdater *DTU) {
- // This only adjusts blocks with PHI nodes.
- if (!isa<PHINode>(BB->begin()))
- return;
-
- // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
- // them down. This will leave us with single entry phi nodes and other phis
- // that can be removed.
- BB->removePredecessor(Pred, true);
-
- WeakTrackingVH PhiIt = &BB->front();
- while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
- PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
- Value *OldPhiIt = PhiIt;
-
- if (!recursivelySimplifyInstruction(PN))
- continue;
-
- // If recursive simplification ended up deleting the next PHI node we would
- // iterate to, then our iterator is invalid, restart scanning from the top
- // of the block.
- if (PhiIt != OldPhiIt) PhiIt = &BB->front();
- }
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
-}
-
void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
DomTreeUpdater *DTU) {
@@ -727,13 +743,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, PredBB});
// This predecessor of PredBB may already have DestBB as a successor.
- if (llvm::find(successors(*I), DestBB) == succ_end(*I))
+ if (!llvm::is_contained(successors(*I), DestBB))
Updates.push_back({DominatorTree::Insert, *I, DestBB});
+ Updates.push_back({DominatorTree::Delete, *I, PredBB});
}
+ Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
}
// Zap anything that took the address of DestBB. Not doing this will give the
@@ -1038,14 +1054,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- Updates.push_back({DominatorTree::Delete, BB, Succ});
// All predecessors of BB will be moved to Succ.
- for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, BB});
+ SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB));
+ Updates.reserve(Updates.size() + 2 * Predecessors.size());
+ for (auto *Predecessor : Predecessors) {
// This predecessor of BB may already have Succ as a successor.
- if (llvm::find(successors(*I), Succ) == succ_end(*I))
- Updates.push_back({DominatorTree::Insert, *I, Succ});
+ if (!llvm::is_contained(successors(Predecessor), Succ))
+ Updates.push_back({DominatorTree::Insert, Predecessor, Succ});
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
}
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
if (isa<PHINode>(Succ->begin())) {
@@ -1101,7 +1119,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
"applying corresponding DTU updates.");
if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
+ DTU->applyUpdates(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
@@ -1109,7 +1127,39 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
return true;
}
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) {
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ bool Changed = false;
+
+ // Examine each PHI.
+ // Note that increment of I must *NOT* be in the iteration_expression, since
+ // we don't want to immediately advance when we restart from the beginning.
+ for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) {
+ ++I;
+ // Is there an identical PHI node in this basic block?
+ // Note that we only look in the upper square's triangle,
+ // we already checked that the lower triangle PHI's aren't identical.
+ for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) {
+ if (!DuplicatePN->isIdenticalToWhenDefined(PN))
+ continue;
+ // A duplicate. Replace this PHI with the base PHI.
+ ++NumPHICSEs;
+ DuplicatePN->replaceAllUsesWith(PN);
+ DuplicatePN->eraseFromParent();
+ Changed = true;
+
+ // The RAUW can change PHIs that we already visited.
+ I = BB->begin();
+ break; // Start over from the beginning.
+ }
+ }
+ return Changed;
+}
+
+static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) {
// This implementation doesn't currently consider undef operands
// specially. Theoretically, two phis which are identical except for
// one having an undef where the other doesn't could be collapsed.
@@ -1123,7 +1173,13 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return DenseMapInfo<PHINode *>::getTombstoneKey();
}
- static unsigned getHashValue(PHINode *PN) {
+ static bool isSentinel(PHINode *PN) {
+ return PN == getEmptyKey() || PN == getTombstoneKey();
+ }
+
+ // WARNING: this logic must be kept in sync with
+ // Instruction::isIdenticalToWhenDefined()!
+ static unsigned getHashValueImpl(PHINode *PN) {
// Compute a hash value on the operands. Instcombine will likely have
// sorted them, which helps expose duplicates, but we have to check all
// the operands to be safe in case instcombine hasn't run.
@@ -1132,16 +1188,37 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
hash_combine_range(PN->block_begin(), PN->block_end())));
}
- static bool isEqual(PHINode *LHS, PHINode *RHS) {
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
+ static unsigned getHashValue(PHINode *PN) {
+#ifndef NDEBUG
+ // If -phicse-debug-hash was specified, return a constant -- this
+ // will force all hashing to collide, so we'll exhaustively search
+ // the table for a match, and the assertion in isEqual will fire if
+ // there's a bug causing equal keys to hash differently.
+ if (PHICSEDebugHash)
+ return 0;
+#endif
+ return getHashValueImpl(PN);
+ }
+
+ static bool isEqualImpl(PHINode *LHS, PHINode *RHS) {
+ if (isSentinel(LHS) || isSentinel(RHS))
return LHS == RHS;
return LHS->isIdenticalTo(RHS);
}
+
+ static bool isEqual(PHINode *LHS, PHINode *RHS) {
+ // These comparisons are nontrivial, so assert that equality implies
+ // hash equality (DenseMap demands this as an invariant).
+ bool Result = isEqualImpl(LHS, RHS);
+ assert(!Result || (isSentinel(LHS) && LHS == RHS) ||
+ getHashValueImpl(LHS) == getHashValueImpl(RHS));
+ return Result;
+ }
};
// Set of unique PHINodes.
DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
+ PHISet.reserve(4 * PHICSENumPHISmallSize);
// Examine each PHI.
bool Changed = false;
@@ -1149,6 +1226,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
auto Inserted = PHISet.insert(PN);
if (!Inserted.second) {
// A duplicate. Replace this PHI with its duplicate.
+ ++NumPHICSEs;
PN->replaceAllUsesWith(*Inserted.first);
PN->eraseFromParent();
Changed = true;
@@ -1163,54 +1241,63 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return Changed;
}
-/// enforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-static Align enforceKnownAlignment(Value *V, Align Alignment, Align PrefAlign,
- const DataLayout &DL) {
- assert(PrefAlign > Alignment);
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ if (
+#ifndef NDEBUG
+ !PHICSEDebugHash &&
+#endif
+ hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize))
+ return EliminateDuplicatePHINodesNaiveImpl(BB);
+ return EliminateDuplicatePHINodesSetBasedImpl(BB);
+}
+/// If the specified pointer points to an object that we control, try to modify
+/// the object's alignment to PrefAlign. Returns a minimum known alignment of
+/// the value after the operation, which may be lower than PrefAlign.
+///
+/// Increating value alignment isn't often possible though. If alignment is
+/// important, a more reliable approach is to simply align all global variables
+/// and allocation instructions to their preferred alignment from the beginning.
+static Align tryEnforceAlignment(Value *V, Align PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- // TODO: ideally, computeKnownBits ought to have used
- // AllocaInst::getAlignment() in its computation already, making
- // the below max redundant. But, as it turns out,
- // stripPointerCasts recurses through infinite layers of bitcasts,
- // while computeKnownBits is not allowed to traverse more than 6
- // levels.
- Alignment = std::max(AI->getAlign(), Alignment);
- if (PrefAlign <= Alignment)
- return Alignment;
+ // TODO: Ideally, this function would not be called if PrefAlign is smaller
+ // than the current alignment, as the known bits calculation should have
+ // already taken it into account. However, this is not always the case,
+ // as computeKnownBits() has a depth limit, while stripPointerCasts()
+ // doesn't.
+ Align CurrentAlign = AI->getAlign();
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
if (DL.exceedsNaturalStackAlignment(PrefAlign))
- return Alignment;
+ return CurrentAlign;
AI->setAlignment(PrefAlign);
return PrefAlign;
}
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// TODO: as above, this shouldn't be necessary.
- Alignment = max(GO->getAlign(), Alignment);
- if (PrefAlign <= Alignment)
- return Alignment;
+ Align CurrentAlign = GO->getPointerAlignment(DL);
+ if (PrefAlign <= CurrentAlign)
+ return CurrentAlign;
// If there is a large requested alignment and we can, bump up the alignment
// of the global. If the memory we set aside for the global may not be the
// memory used by the final program then it is impossible for us to reliably
// enforce the preferred alignment.
if (!GO->canIncreaseAlignment())
- return Alignment;
+ return CurrentAlign;
GO->setAlignment(PrefAlign);
return PrefAlign;
}
- return Alignment;
+ return Align(1);
}
Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
@@ -1232,7 +1319,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
if (PrefAlign && *PrefAlign > Alignment)
- Alignment = enforceKnownAlignment(V, Alignment, *PrefAlign, DL);
+ Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL));
// We don't need to make any adjustment.
return Alignment;
@@ -1270,16 +1357,22 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
/// least n bits.
static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
const DataLayout &DL = DII->getModule()->getDataLayout();
- uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
- if (auto FragmentSize = DII->getFragmentSizeInBits())
- return ValueSize >= *FragmentSize;
+ TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
+ if (Optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) {
+ assert(!ValueSize.isScalable() &&
+ "Fragments don't work on scalable types.");
+ return ValueSize.getFixedSize() >= *FragmentSize;
+ }
// We can't always calculate the size of the DI variable (e.g. if it is a
// VLA). Try to use the size of the alloca that the dbg intrinsic describes
// intead.
if (DII->isAddressOfVariable())
if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation()))
- if (auto FragmentSize = AI->getAllocationSizeInBits(DL))
- return ValueSize >= *FragmentSize;
+ if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
+ assert(ValueSize.isScalable() == FragmentSize->isScalable() &&
+ "Both sizes should agree on the scalable flag.");
+ return TypeSize::isKnownGE(ValueSize, *FragmentSize);
+ }
// Could not determine size of variable. Conservatively return false.
return false;
}
@@ -1294,7 +1387,7 @@ static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
MDNode *Scope = DeclareLoc.getScope();
DILocation *InlinedAt = DeclareLoc.getInlinedAt();
// Produce an unknown location with the correct scope / inlinedAt fields.
- return DebugLoc::get(0, 0, Scope, InlinedAt);
+ return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt);
}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
@@ -1911,8 +2004,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return false;
}
-unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
+std::pair<unsigned, unsigned>
+llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
+ unsigned NumDeadDbgInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
@@ -1925,30 +2020,31 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
EndInst = Inst;
continue;
}
- if (!isa<DbgInfoIntrinsic>(Inst))
+ if (isa<DbgInfoIntrinsic>(Inst))
+ ++NumDeadDbgInst;
+ else
++NumDeadInst;
Inst->eraseFromParent();
}
- return NumDeadInst;
+ return {NumDeadInst, NumDeadDbgInst};
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
bool PreserveLCSSA, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
BasicBlock *BB = I->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
if (MSSAU)
MSSAU->changeToUnreachable(I);
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
+
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- if (DTU)
- Updates.reserve(BB->getTerminator()->getNumSuccessors());
for (BasicBlock *Successor : successors(BB)) {
Successor->removePredecessor(BB, PreserveLCSSA);
if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
+ UniqueSuccessors.insert(Successor);
}
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
@@ -1970,13 +2066,18 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.reserve(UniqueSuccessors.size());
+ for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
+ DTU->applyUpdates(Updates);
+ }
return NumInstrsRemoved;
}
CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) {
- SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<Value *, 8> Args(II->args());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
CallInst *NewCall = CallInst::Create(II->getFunctionType(),
@@ -2017,7 +2118,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -2033,7 +2134,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BB->getInstList().pop_back();
// Create the new invoke instruction.
- SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+ SmallVector<Value *, 8> InvokeArgs(CI->args());
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -2164,8 +2265,7 @@ static bool markAliveBlocks(Function &F,
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, BB, UnwindDestBB}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
} else
changeToCall(II, DTU);
Changed = true;
@@ -2194,6 +2294,7 @@ static bool markAliveBlocks(Function &F,
}
};
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
// Set of unique CatchPads.
SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
@@ -2203,14 +2304,22 @@ static bool markAliveBlocks(Function &F,
E = CatchSwitch->handler_end();
I != E; ++I) {
BasicBlock *HandlerBB = *I;
+ ++NumPerSuccessorCases[HandlerBB];
auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ --NumPerSuccessorCases[HandlerBB];
CatchSwitch->removeHandler(I);
--I;
--E;
Changed = true;
}
}
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, BB, I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
@@ -2254,7 +2363,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}});
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2270,28 +2379,39 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return Changed;
assert(Reachable.size() < F.size());
- NumRemoved += F.size() - Reachable.size();
- SmallSetVector<BasicBlock *, 8> DeadBlockSet;
+ // Are there any blocks left to actually delete?
+ SmallSetVector<BasicBlock *, 8> BlocksToRemove;
for (BasicBlock &BB : F) {
// Skip reachable basic blocks
if (Reachable.count(&BB))
continue;
- DeadBlockSet.insert(&BB);
+ // Skip already-deleted blocks
+ if (DTU && DTU->isBBPendingDeletion(&BB))
+ continue;
+ BlocksToRemove.insert(&BB);
}
+ if (BlocksToRemove.empty())
+ return Changed;
+
+ Changed = true;
+ NumRemoved += BlocksToRemove.size();
+
if (MSSAU)
- MSSAU->removeBlocks(DeadBlockSet);
+ MSSAU->removeBlocks(BlocksToRemove);
- // Loop over all of the basic blocks that are not reachable, dropping all of
+ // Loop over all of the basic blocks that are up for removal, dropping all of
// their internal references. Update DTU if available.
std::vector<DominatorTree::UpdateType> Updates;
- for (auto *BB : DeadBlockSet) {
+ for (auto *BB : BlocksToRemove) {
+ SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
for (BasicBlock *Successor : successors(BB)) {
- if (!DeadBlockSet.count(Successor))
+ // Only remove references to BB in reachable successors of BB.
+ if (Reachable.count(Successor))
Successor->removePredecessor(BB);
if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
+ UniqueSuccessors.insert(Successor);
}
BB->dropAllReferences();
if (DTU) {
@@ -2305,27 +2425,22 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
new UnreachableInst(BB->getContext(), BB);
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
+ Updates.reserve(Updates.size() + UniqueSuccessors.size());
+ for (auto *UniqueSuccessor : UniqueSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
}
}
if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
- bool Deleted = false;
- for (auto *BB : DeadBlockSet) {
- if (DTU->isBBPendingDeletion(BB))
- --NumRemoved;
- else
- Deleted = true;
+ DTU->applyUpdates(Updates);
+ for (auto *BB : BlocksToRemove)
DTU->deleteBB(BB);
- }
- if (!Deleted)
- return false;
} else {
- for (auto *BB : DeadBlockSet)
+ for (auto *BB : BlocksToRemove)
BB->eraseFromParent();
}
- return true;
+ return Changed;
}
void llvm::combineMetadata(Instruction *K, const Instruction *J,
@@ -2570,10 +2685,13 @@ bool llvm::callsGCLeafFunction(const CallBase *Call,
if (F->hasFnAttribute("gc-leaf-function"))
return true;
- if (auto IID = F->getIntrinsicID())
+ if (auto IID = F->getIntrinsicID()) {
// Most LLVM intrinsics do not take safepoints.
return IID != Intrinsic::experimental_gc_statepoint &&
- IID != Intrinsic::experimental_deoptimize;
+ IID != Intrinsic::experimental_deoptimize &&
+ IID != Intrinsic::memcpy_element_unordered_atomic &&
+ IID != Intrinsic::memmove_element_unordered_atomic;
+ }
}
// Lib calls can be materialized by some passes, and won't be
@@ -2701,7 +2819,7 @@ struct BitPart {
/// Analyze the specified subexpression and see if it is capable of providing
/// pieces of a bswap or bitreverse. The subexpression provides a potential
-/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in
/// the output of the expression came from a corresponding bit in some other
/// value. This function is recursive, and the end result is a mapping of
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
@@ -2713,6 +2831,10 @@ struct BitPart {
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
/// [0-7].
///
+/// For vector types, all analysis is performed at the per-element level. No
+/// cross-element analysis is supported (shuffle/insertion/reduction), and all
+/// constant masks must be splatted across all elements.
+///
/// To avoid revisiting values, the BitPart results are memoized into the
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
@@ -2730,7 +2852,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return I->second;
auto &Result = BPS[V] = None;
- auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ auto BitWidth = V->getType()->getScalarSizeInBits();
// Prevent stack overflow by limiting the recursion depth
if (Depth == BitPartRecursionMaxDepth) {
@@ -2738,13 +2860,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
}
- if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Value *X, *Y;
+ const APInt *C;
+
// If this is an or instruction, it may be an inner node of the bswap.
- if (I->getOpcode() == Instruction::Or) {
- auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ if (match(V, m_Or(m_Value(X), m_Value(Y)))) {
+ const auto &A =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &B =
+ collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!A || !B)
return Result;
@@ -2753,31 +2878,31 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
Result = BitPart(A->Provider, BitWidth);
- for (unsigned i = 0; i < A->Provenance.size(); ++i) {
- if (A->Provenance[i] != BitPart::Unset &&
- B->Provenance[i] != BitPart::Unset &&
- A->Provenance[i] != B->Provenance[i])
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) {
+ if (A->Provenance[BitIdx] != BitPart::Unset &&
+ B->Provenance[BitIdx] != BitPart::Unset &&
+ A->Provenance[BitIdx] != B->Provenance[BitIdx])
return Result = None;
- if (A->Provenance[i] == BitPart::Unset)
- Result->Provenance[i] = B->Provenance[i];
+ if (A->Provenance[BitIdx] == BitPart::Unset)
+ Result->Provenance[BitIdx] = B->Provenance[BitIdx];
else
- Result->Provenance[i] = A->Provenance[i];
+ Result->Provenance[BitIdx] = A->Provenance[BitIdx];
}
return Result;
}
// If this is a logical shift by a constant, recurse then shift the result.
- if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
- unsigned BitShift =
- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) {
+ const APInt &BitShift = *C;
+
// Ensure the shift amount is defined.
- if (BitShift > BitWidth)
+ if (BitShift.uge(BitWidth))
return Result;
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2785,11 +2910,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// Perform the "shift" on BitProvenance.
auto &P = Result->Provenance;
if (I->getOpcode() == Instruction::Shl) {
- P.erase(std::prev(P.end(), BitShift), P.end());
- P.insert(P.begin(), BitShift, BitPart::Unset);
+ P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end());
+ P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset);
} else {
- P.erase(P.begin(), std::next(P.begin(), BitShift));
- P.insert(P.end(), BitShift, BitPart::Unset);
+ P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue()));
+ P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset);
}
return Result;
@@ -2797,44 +2922,102 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is a logical 'and' with a mask that clears bits, recurse then
// unset the appropriate bits.
- if (I->getOpcode() == Instruction::And &&
- isa<ConstantInt>(I->getOperand(1))) {
- APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
- const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (match(V, m_And(m_Value(X), m_APInt(C)))) {
+ const APInt &AndMask = *C;
// Check that the mask allows a multiple of 8 bits for a bswap, for an
// early exit.
unsigned NumMaskedBits = AndMask.countPopulation();
- if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+ if (!MatchBitReversals && (NumMaskedBits % 8) != 0)
return Result;
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
- for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
// If the AndMask is zero for this bit, clear the bit.
- if ((AndMask & Bit) == 0)
- Result->Provenance[i] = BitPart::Unset;
+ if (AndMask[BitIdx] == 0)
+ Result->Provenance[BitIdx] = BitPart::Unset;
return Result;
}
// If this is a zext instruction zero extend the result.
- if (I->getOpcode() == Instruction::ZExt) {
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
+ if (match(V, m_ZExt(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = BitPart(Res->Provider, BitWidth);
- auto NarrowBitWidth =
- cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth();
- for (unsigned i = 0; i < NarrowBitWidth; ++i)
- Result->Provenance[i] = Res->Provenance[i];
- for (unsigned i = NarrowBitWidth; i < BitWidth; ++i)
- Result->Provenance[i] = BitPart::Unset;
+ auto NarrowBitWidth = X->getType()->getScalarSizeInBits();
+ for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = Res->Provenance[BitIdx];
+ for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitPart::Unset;
+ return Result;
+ }
+
+ // BITREVERSE - most likely due to us previous matching a partial
+ // bitreverse.
+ if (match(V, m_BitReverse(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx];
+ return Result;
+ }
+
+ // BSWAP - most likely due to us previous matching a partial bswap.
+ if (match(V, m_BSwap(m_Value(X)))) {
+ const auto &Res =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ if (!Res)
+ return Result;
+
+ unsigned ByteWidth = BitWidth / 8;
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) {
+ unsigned ByteBitOfs = ByteIdx * 8;
+ for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx)
+ Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] =
+ Res->Provenance[ByteBitOfs + BitIdx];
+ }
+ return Result;
+ }
+
+ // Funnel 'double' shifts take 3 operands, 2 inputs and the shift
+ // amount (modulo).
+ // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+ // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+ if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) ||
+ match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) {
+ // We can treat fshr as a fshl by flipping the modulo amount.
+ unsigned ModAmt = C->urem(BitWidth);
+ if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr)
+ ModAmt = BitWidth - ModAmt;
+
+ const auto &LHS =
+ collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &RHS =
+ collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+
+ // Check we have both sources and they are from the same provider.
+ if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider)
+ return Result;
+
+ unsigned StartBitRHS = BitWidth - ModAmt;
+ Result = BitPart(LHS->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx)
+ Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx];
+ for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx)
+ Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS];
return Result;
}
}
@@ -2842,8 +3025,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
// the input value to the bswap/bitreverse.
Result = BitPart(V, BitWidth);
- for (unsigned i = 0; i < BitWidth; ++i)
- Result->Provenance[i] = i;
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = BitIdx;
return Result;
}
@@ -2870,65 +3053,92 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
return false;
if (!MatchBSwaps && !MatchBitReversals)
return false;
- IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
- if (!ITy || ITy->getBitWidth() > 128)
- return false; // Can't do vectors or integers > 128 bits.
- unsigned BW = ITy->getBitWidth();
-
- unsigned DemandedBW = BW;
- IntegerType *DemandedTy = ITy;
- if (I->hasOneUse()) {
- if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) {
- DemandedTy = cast<IntegerType>(Trunc->getType());
- DemandedBW = DemandedTy->getBitWidth();
- }
- }
+ Type *ITy = I->getType();
+ if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
+ return false; // Can't do integer/elements > 128 bits.
+
+ Type *DemandedTy = ITy;
+ if (I->hasOneUse())
+ if (auto *Trunc = dyn_cast<TruncInst>(I->user_back()))
+ DemandedTy = Trunc->getType();
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
if (!Res)
return false;
- auto &BitProvenance = Res->Provenance;
+ ArrayRef<int8_t> BitProvenance = Res->Provenance;
+ assert(all_of(BitProvenance,
+ [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) &&
+ "Illegal bit provenance index");
+
+ // If the upper bits are zero, then attempt to perform as a truncated op.
+ if (BitProvenance.back() == BitPart::Unset) {
+ while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset)
+ BitProvenance = BitProvenance.drop_back();
+ if (BitProvenance.empty())
+ return false; // TODO - handle null value?
+ DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size());
+ if (auto *IVecTy = dyn_cast<VectorType>(ITy))
+ DemandedTy = VectorType::get(DemandedTy, IVecTy);
+ }
+
+ // Check BitProvenance hasn't found a source larger than the result type.
+ unsigned DemandedBW = DemandedTy->getScalarSizeInBits();
+ if (DemandedBW > ITy->getScalarSizeInBits())
+ return false;
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
- bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true;
- for (unsigned i = 0; i < DemandedBW; ++i) {
- OKForBSwap &=
- bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW);
- OKForBitReverse &=
- bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW);
+ APInt DemandedMask = APInt::getAllOnesValue(DemandedBW);
+ bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
+ bool OKForBitReverse = MatchBitReversals;
+ for (unsigned BitIdx = 0;
+ (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) {
+ if (BitProvenance[BitIdx] == BitPart::Unset) {
+ DemandedMask.clearBit(BitIdx);
+ continue;
+ }
+ OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx,
+ DemandedBW);
+ OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx],
+ BitIdx, DemandedBW);
}
Intrinsic::ID Intrin;
- if (OKForBSwap && MatchBSwaps)
+ if (OKForBSwap)
Intrin = Intrinsic::bswap;
- else if (OKForBitReverse && MatchBitReversals)
+ else if (OKForBitReverse)
Intrin = Intrinsic::bitreverse;
else
return false;
- if (ITy != DemandedTy) {
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
- Value *Provider = Res->Provider;
- IntegerType *ProviderTy = cast<IntegerType>(Provider->getType());
- // We may need to truncate the provider.
- if (DemandedTy != ProviderTy) {
- auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy,
- "trunc", I);
- InsertedInsts.push_back(Trunc);
- Provider = Trunc;
- }
- auto *CI = CallInst::Create(F, Provider, "rev", I);
- InsertedInsts.push_back(CI);
- auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
+ Value *Provider = Res->Provider;
+
+ // We may need to truncate the provider.
+ if (DemandedTy != Provider->getType()) {
+ auto *Trunc =
+ CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I);
+ InsertedInsts.push_back(Trunc);
+ Provider = Trunc;
+ }
+
+ Instruction *Result = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(Result);
+
+ if (!DemandedMask.isAllOnesValue()) {
+ auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
+ Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
+ InsertedInsts.push_back(Result);
+ }
+
+ // We may need to zeroextend back to the result type.
+ if (ITy != Result->getType()) {
+ auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I);
InsertedInsts.push_back(ExtInst);
- return true;
}
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
- InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
return true;
}
@@ -3020,44 +3230,6 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
}
}
-using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
-AllocaInst *llvm::findAllocaForValue(Value *V,
- AllocaForValueMapTy &AllocaForValue) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI;
- // See if we've already calculated (or started to calculate) alloca for a
- // given value.
- AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end())
- return I->second;
- // Store 0 while we're calculating alloca for value V to avoid
- // infinite recursion if the value references itself.
- AllocaForValue[V] = nullptr;
- AllocaInst *Res = nullptr;
- if (CastInst *CI = dyn_cast<CastInst>(V))
- Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : PN->incoming_values()) {
- // Allow self-referencing phi-nodes.
- if (IncValue == PN)
- continue;
- AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
- // AI for incoming values should exist and should all be equal.
- if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
- return nullptr;
- Res = IncValueAI;
- }
- } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
- Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
- } else {
- LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
- << *V << "\n");
- }
- if (Res)
- AllocaForValue[V] = Res;
- return Res;
-}
-
Value *llvm::invertCondition(Value *Condition) {
// First: Check if it's a constant
if (Constant *C = dyn_cast<Constant>(Condition))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index c653aacbee6c..cb5fee7d28e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -1,4 +1,4 @@
-//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
+//===- LoopPeel.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements some loop unrolling utilities for peeling loops
-// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
-// unrolling loops with compile-time constant trip counts.
-//
+// Loop Peeling Utilities.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -49,10 +47,24 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "loop-unroll"
+#define DEBUG_TYPE "loop-peel"
STATISTIC(NumPeeled, "Number of loops peeled");
+static cl::opt<unsigned> UnrollPeelCount(
+ "unroll-peel-count", cl::Hidden,
+ cl::desc("Set the unroll peeling count, for testing purposes"));
+
+static cl::opt<bool>
+ UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
+ cl::desc("Allows loops to be peeled when the dynamic "
+ "trip count is known to be low."));
+
+static cl::opt<bool>
+ UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
+ cl::init(false), cl::Hidden,
+ cl::desc("Allows loop nests to be peeled."));
+
static cl::opt<unsigned> UnrollPeelMaxCount(
"unroll-peel-max-count", cl::init(7), cl::Hidden,
cl::desc("Max average trip count which will cause loop peeling."));
@@ -103,7 +115,12 @@ bool llvm::canPeel(Loop *L) {
// This can be an indication of two different things:
// 1) The loop is not rotated.
// 2) The loop contains irreducible control flow that involves the latch.
- if (L->getLoopLatch() != L->getExitingBlock())
+ const BasicBlock *Latch = L->getLoopLatch();
+ if (Latch != L->getExitingBlock())
+ return false;
+
+ // Peeling is only supported if the latch is a branch.
+ if (!isa<BranchInst>(Latch->getTerminator()))
return false;
return true;
@@ -215,11 +232,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// consider AddRecs of the loop we are trying to peel.
if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
continue;
- bool Increasing;
if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
- !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
+ !SE.getMonotonicPredicateType(LeftAR, Pred))
continue;
- (void)Increasing;
// Check if extending the current DesiredPeelCount lets us evaluate Pred
// or !Pred in the loop body statically.
@@ -278,9 +293,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE) {
+ unsigned &TripCount, ScalarEvolution &SE,
+ unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -292,7 +307,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
- if (!PP.AllowLoopNestsPeeling && !L->empty())
+ if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
return;
// If the user provided a peel count, use that.
@@ -322,7 +337,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// maximum number of iterations among these values, thus turning all those
// Phis into invariants.
// First, check that we can peel at least one iteration.
- if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
// Store the pre-calculated values here.
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
@@ -342,7 +357,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// Pay respect to limitations implied by loop size and the max peel count.
unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
DesiredPeelCount = std::max(DesiredPeelCount,
countToEliminateCompares(*L, MaxPeelCount, SE));
@@ -385,7 +400,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (*PeelCount) {
if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
- (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
+ (LoopSize * (*PeelCount + 1) <= Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
PP.PeelCount = *PeelCount;
@@ -396,7 +411,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
<< "\n");
- LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
}
}
}
@@ -491,7 +506,7 @@ static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
- SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
LoopInfo *LI) {
@@ -599,6 +614,40 @@ static void cloneLoopBlocks(
LVMap[KV.first] = KV.second;
}
+TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
+ TargetTransformInfo::PeelingPreferences PP;
+
+ // Set the default values.
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+
+ // Get the target specifc values.
+ TTI.getPeelingPreferences(L, SE, PP);
+
+ // User specified values using cl::opt.
+ if (UnrollingSpecficValues) {
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ PP.PeelCount = UnrollPeelCount;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ PP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+ }
+
+ // User specifed values provided by argument.
+ if (UserAllowPeeling.hasValue())
+ PP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+
+ return PP;
+}
+
/// Peel off the first \p PeelCount iterations of loop \p L.
///
/// Note that this does not peel them off as a single straight-line block.
@@ -609,8 +658,8 @@ static void cloneLoopBlocks(
/// for the bulk of dynamic execution, can be further simplified by scalar
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, bool PreserveLCSSA) {
+ ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA) {
assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 8804bba975b6..b678efdc8d88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -12,7 +12,6 @@
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -36,6 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -44,6 +44,8 @@ using namespace llvm;
#define DEBUG_TYPE "loop-rotate"
+STATISTIC(NumNotRotatedDueToHeaderSize,
+ "Number of loops not rotated due to the header size");
STATISTIC(NumRotated, "Number of loops rotated");
static cl::opt<bool>
@@ -64,15 +66,17 @@ class LoopRotate {
const SimplifyQuery &SQ;
bool RotationOnly;
bool IsUtilMode;
+ bool PrepareForLTO;
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
+ const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
+ bool PrepareForLTO)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
- IsUtilMode(IsUtilMode) {}
+ IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
bool processLoop(Loop *L);
private:
@@ -300,7 +304,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO);
if (Metrics.notDuplicatable) {
LLVM_DEBUG(
dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
@@ -320,8 +324,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
<< " instructions, which is more than the threshold ("
<< MaxHeaderSize << " instructions): ";
L->dump());
+ ++NumNotRotatedDueToHeaderSize;
return Rotated;
}
+
+ // When preparing for LTO, avoid rotating loops with calls that could be
+ // inlined during the LTO stage.
+ if (PrepareForLTO && Metrics.NumInlineCandidates > 0)
+ return Rotated;
}
// Now, this loop is suitable for rotation.
@@ -391,6 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
break;
}
+ // Remember the local noalias scope declarations in the header. After the
+ // rotation, they must be duplicated and the scope must be cloned. This
+ // avoids unwanted interaction across iterations.
+ SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions;
+ for (Instruction &I : *OrigHeader)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclInstructions.push_back(Decl);
+
while (I != E) {
Instruction *Inst = &*I++;
@@ -451,6 +469,69 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
}
}
+ if (!NoAliasDeclInstructions.empty()) {
+ // There are noalias scope declarations:
+ // (general):
+ // Original: OrigPre { OrigHeader NewHeader ... Latch }
+ // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
+ //
+ // with D: llvm.experimental.noalias.scope.decl,
+ // U: !noalias or !alias.scope depending on D
+ // ... { D U1 U2 } can transform into:
+ // (0) : ... { D U1 U2 } // no relevant rotation for this part
+ // (1) : ... D' { U1 U2 D } // D is part of OrigHeader
+ // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
+ //
+ // We now want to transform:
+ // (1) -> : ... D' { D U1 U2 D'' }
+ // (2) -> : ... D' U1' { D U2 D'' U1'' }
+ // D: original llvm.experimental.noalias.scope.decl
+ // D', U1': duplicate with replaced scopes
+ // D'', U1'': different duplicate with replaced scopes
+ // This ensures a safe fallback to 'may_alias' introduced by the rotate,
+ // as U1'' and U1' scopes will not be compatible wrt to the local restrict
+
+ // Clone the llvm.experimental.noalias.decl again for the NewHeader.
+ Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
+ LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
+ << *NAD << "\n");
+ Instruction *NewNAD = NAD->clone();
+ NewNAD->insertBefore(NewHeaderInsertionPoint);
+ }
+
+ // Scopes must now be duplicated, once for OrigHeader and once for
+ // OrigPreHeader'.
+ {
+ auto &Context = NewHeader->getContext();
+
+ SmallVector<MDNode *, 8> NoAliasDeclScopes;
+ for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions)
+ NoAliasDeclScopes.push_back(NAD->getScopeList());
+
+ LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
+ "h.rot");
+ LLVM_DEBUG(OrigHeader->dump());
+
+ // Keep the compile time impact low by only adapting the inserted block
+ // of instructions in the OrigPreHeader. This might result in slightly
+ // more aliasing between these instructions and those that were already
+ // present, but it will be much faster when the original PreHeader is
+ // large.
+ LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
+ auto *FirstDecl =
+ cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
+ auto *LastInst = &OrigPreheader->back();
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
+ Context, "pre.rot");
+ LLVM_DEBUG(OrigPreheader->dump());
+
+ LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
+ LLVM_DEBUG(NewHeader->dump());
+ }
+ }
+
// Along with all the other instructions, we just cloned OrigHeader's
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
@@ -496,12 +577,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
- DT->applyUpdates(Updates);
if (MSSAU) {
- MSSAU->applyUpdates(Updates, *DT);
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
+ } else {
+ DT->applyUpdates(Updates);
}
}
@@ -575,7 +657,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ BasicBlock *PredBB = OrigHeader->getUniquePredecessor();
+ bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+ if (DidMerge)
+ RemoveRedundantDbgInstrs(PredBB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -739,13 +824,8 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1),
- bool IsUtilMode = true) {
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ bool IsUtilMode = true, bool PrepareForLTO) {
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
- IsUtilMode);
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
+ IsUtilMode, PrepareForLTO);
return LR.processLoop(L);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index a8445e94e55a..2e104334ad96 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -163,7 +163,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
/// if it's not already in there. Stop predecessor traversal when we reach
/// StopBlock.
static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
- std::set<BasicBlock*> &Blocks) {
+ SmallPtrSetImpl<BasicBlock *> &Blocks) {
SmallVector<BasicBlock *, 8> Worklist;
Worklist.push_back(InputBB);
do {
@@ -171,10 +171,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
if (Blocks.insert(BB).second && BB != StopBlock)
// If BB is not already processed and it is not a stop block then
// insert its predecessor in the work list
- for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- BasicBlock *WBB = *I;
- Worklist.push_back(WBB);
- }
+ append_range(Worklist, predecessors(BB));
} while (!Worklist.empty());
}
@@ -308,9 +305,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Determine which blocks should stay in L and which should be moved out to
// the Outer loop now.
- std::set<BasicBlock*> BlocksInL;
- for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
- BasicBlock *P = *PI;
+ SmallPtrSet<BasicBlock *, 4> BlocksInL;
+ for (BasicBlock *P : predecessors(Header)) {
if (DT->dominates(Header, P))
addBlockAndPredsToSet(P, Header, BlocksInL);
}
@@ -683,7 +679,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI, MSSAU))
+ if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,
@@ -691,7 +687,7 @@ ReprocessLoop:
LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
<< ExitingBlock->getName() << "\n");
- assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ assert(pred_empty(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -836,8 +832,8 @@ bool LoopSimplify::runOnFunction(Function &F) {
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
+ for (auto *L : *LI)
+ Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
#ifndef NDEBUG
if (PreserveLCSSA) {
@@ -866,9 +862,9 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
// after simplifying the loops. MemorySSA is preserved if it exists.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ for (auto *L : *LI)
Changed |=
- simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
+ simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 3875c631f839..d4cd57405239 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -59,6 +59,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -108,14 +109,15 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
/// insert a phi-node, otherwise LCSSA will be broken.
/// The function is just a helper function for llvm::UnrollLoop that returns
/// true if this situation occurs, indicating that LCSSA needs to be fixed.
-static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
+static bool needToInsertPhisForLCSSA(Loop *L,
+ const std::vector<BasicBlock *> &Blocks,
LoopInfo *LI) {
for (BasicBlock *BB : Blocks) {
if (LI->getLoopFor(BB) == L)
continue;
for (Instruction &I : *BB) {
for (Use &U : I.operands()) {
- if (auto Def = dyn_cast<Instruction>(U)) {
+ if (const auto *Def = dyn_cast<Instruction>(U)) {
Loop *DefLoop = LI->getLoopFor(Def->getParent());
if (!DefLoop)
continue;
@@ -286,14 +288,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA, Loop **RemainderLoop) {
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
+ if (!L->getLoopPreheader()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (!LatchBlock) {
+ if (!L->getLoopLatch()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
return LoopUnrollResult::Unmodified;
}
@@ -304,37 +304,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can unroll loops that have
- // (1) single latch; and
- // (2a) latch is unconditional; or
- // (2b) latch is conditional and is an exiting block
- // FIXME: The implementation can be extended to work with more complicated
- // cases, e.g. loops with multiple latches.
- BasicBlock *Header = L->getHeader();
- BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
- // A conditional branch which exits the loop, which can be optimized to an
- // unconditional branch in the unrolled loop in some cases.
- BranchInst *ExitingBI = nullptr;
- bool LatchIsExiting = L->isLoopExiting(LatchBlock);
- if (LatchIsExiting)
- ExitingBI = LatchBI;
- else if (BasicBlock *ExitingBlock = L->getExitingBlock())
- ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
- LLVM_DEBUG(
- dbgs() << "Can't unroll; a conditional latch must exit the loop");
- return LoopUnrollResult::Unmodified;
- }
- LLVM_DEBUG({
- if (ExitingBI)
- dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
- << "\n";
- else
- dbgs() << " No single exiting block\n";
- });
-
- if (Header->hasAddressTaken()) {
+ if (L->getHeader()->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
LLVM_DEBUG(
dbgs() << " Won't unroll loop: address of header block is taken.\n");
@@ -363,20 +333,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = ULO.Count == ULO.TripCount;
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
-
- // Go through all exits of L and see if there are any phi-nodes there. We just
- // conservatively assume that they're inserted to preserve LCSSA form, which
- // means that complete unrolling might break this form. We need to either fix
- // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
- // now we just recompute LCSSA for the outer loop, but it should be possible
- // to fix it in-place.
- bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
- any_of(ExitBlocks, [](const BasicBlock *BB) {
- return isa<PHINode>(BB->begin());
- });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
@@ -401,12 +357,63 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
BasicBlock *ExitingBlock = L->getLoopLatch();
assert(ExitingBlock && "Loop without exiting block?");
assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
- Preheader = L->getLoopPreheader();
ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
}
+ // All these values should be taken only after peeling because they might have
+ // changed.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
+
+ // Go through all exits of L and see if there are any phi-nodes there. We just
+ // conservatively assume that they're inserted to preserve LCSSA form, which
+ // means that complete unrolling might break this form. We need to either fix
+ // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
+ // now we just recompute LCSSA for the outer loop, but it should be possible
+ // to fix it in-place.
+ bool NeedToFixLCSSA =
+ PreserveLCSSA && CompletelyUnroll &&
+ any_of(ExitBlocks,
+ [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
+
+ // The current loop unroll pass can unroll loops that have
+ // (1) single latch; and
+ // (2a) latch is unconditional; or
+ // (2b) latch is conditional and is an exiting block
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
+ BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ // A conditional branch which exits the loop, which can be optimized to an
+ // unconditional branch in the unrolled loop in some cases.
+ BranchInst *ExitingBI = nullptr;
+ bool LatchIsExiting = L->isLoopExiting(LatchBlock);
+ if (LatchIsExiting)
+ ExitingBI = LatchBI;
+ else if (BasicBlock *ExitingBlock = L->getExitingBlock())
+ ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
+ // If the peeling guard is changed this assert may be relaxed or even
+ // deleted.
+ assert(!Peeled && "Peeling guard changed!");
+ LLVM_DEBUG(
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
+ return LoopUnrollResult::Unmodified;
+ }
+ LLVM_DEBUG({
+ if (ExitingBI)
+ dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
+ << "\n";
+ else
+ dbgs() << " No single exiting block\n";
+ });
+
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
LLVM_DEBUG(
@@ -583,6 +590,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -676,6 +688,15 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
AC->registerAssumption(II);
}
}
+
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string ext = (Twine("It") + Twine(It)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), ext);
+ }
}
// Loop over the PHI nodes in the original block, setting incoming values.
@@ -863,9 +884,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
- UnrolledLoopBlocks.end(), Dest),
- UnrolledLoopBlocks.end());
+ llvm::erase_value(UnrolledLoopBlocks, Dest);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index dd628f3e7e0c..6e32a2b865aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -148,8 +148,7 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
}
while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
if (!Visit(I))
return false;
@@ -459,14 +458,6 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
// finish up connecting the blocks and phi nodes. At this point LastValueMap
// is the last unrolled iterations values.
- // Update Phis in BB from OldBB to point to NewBB
- auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
- BasicBlock *NewBB) {
- for (PHINode &Phi : BB->phis()) {
- int I = Phi.getBasicBlockIndex(OldBB);
- Phi.setIncomingBlock(I, NewBB);
- }
- };
// Update Phis in BB from OldBB to point to NewBB and use the latest value
// from LastValueMap
auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
@@ -525,10 +516,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
- updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
for (unsigned It = 1; It != Count; It++) {
// Replace the conditional branch of the previous iteration subloop with an
@@ -538,10 +529,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
SubTerm->eraseFromParent();
- updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
+ ForeBlocksLast.back());
+ SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
}
@@ -555,8 +546,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
"Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
}
- updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
+ AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
+ SubLoopBlocksLast.back());
for (unsigned It = 1; It != Count; It++) {
// Replace the conditional branch of the previous iteration subloop with an
@@ -566,8 +557,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
BranchInst::Create(AftBlocksFirst[It], AftTerm);
AftTerm->eraseFromParent();
- updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
+ AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
+ SubLoopBlocksLast.back());
movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 2515b1676cb9..0abf62be156f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,11 +22,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
@@ -505,6 +505,32 @@ static bool canProfitablyUnrollMultiExitLoop(
// know of kinds of multiexit loops that would benefit from unrolling.
}
+// Assign the maximum possible trip count as the back edge weight for the
+// remainder loop if the original loop comes with a branch weight.
+static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
+ Loop *RemainderLoop,
+ uint64_t UnrollFactor) {
+ uint64_t TrueWeight, FalseWeight;
+ BranchInst *LatchBR =
+ cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+ if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+ ? FalseWeight
+ : TrueWeight;
+ assert(UnrollFactor > 1);
+ uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+ BasicBlock *Header = RemainderLoop->getHeader();
+ BasicBlock *Latch = RemainderLoop->getLoopLatch();
+ auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(RemainderLatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ }
+}
+
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
@@ -788,6 +814,11 @@ bool llvm::UnrollRuntimeLoopRemainder(
InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+ // Assign the maximum possible trip count as the back edge weight for the
+ // remainder loop if the original loop comes with a branch weight.
+ if (remainderLoop && !UnrollRemainder)
+ updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
F->getBasicBlockList(),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 43363736684e..f0f423e9812a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -63,6 +63,7 @@ static cl::opt<bool> ForceReductionIntrinsic(
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
+static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -297,10 +298,24 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
llvm_unreachable("unexpected number of options");
}
-static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
}
+Optional<ElementCount>
+llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+ Optional<int> Width =
+ getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
+
+ if (Width.hasValue()) {
+ Optional<int> IsScalable = getOptionalIntLoopAttribute(
+ TheLoop, "llvm.loop.vectorize.scalable.enable");
+ return ElementCount::get(*Width, IsScalable.getValueOr(false));
+ }
+
+ return None;
+}
+
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
StringRef Name) {
const MDOperand *AttrMD =
@@ -334,7 +349,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID(
bool Changed = false;
if (InheritAllAttrs || InheritSomeAttrs) {
- for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+ for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
MDNode *Op = cast<MDNode>(Existing.get());
auto InheritThisAttribute = [InheritSomeAttrs,
@@ -371,7 +386,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID(
continue;
HasAnyFollowup = true;
- for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+ for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
MDs.push_back(Option.get());
Changed = true;
}
@@ -404,6 +419,10 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
}
+bool llvm::hasMustProgress(const Loop *L) {
+ return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
+}
+
TransformationMode llvm::hasUnrollTransformation(Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
@@ -450,14 +469,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == false)
return TM_SuppressedByUser;
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+ Optional<ElementCount> VectorizeWidth =
+ getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
// 'Forcing' vector width and interleave count to one effectively disables
// this tranformation.
- if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+ if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
+ InterleaveCount == 1)
return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
@@ -466,10 +486,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == true)
return TM_ForcedByUser;
- if (VectorizeWidth == 1 && InterleaveCount == 1)
+ if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
return TM_Disable;
- if (VectorizeWidth > 1 || InterleaveCount > 1)
+ if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
return TM_Enable;
if (hasDisableAllTransformsHint(L))
@@ -542,10 +562,6 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
if (SE)
SE->forgetLoop(L);
- auto *ExitBlock = L->getUniqueExitBlock();
- assert(ExitBlock && "Should have a unique exit block!");
- assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
-
auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
assert(OldBr && "Preheader must end with a branch");
assert(OldBr->isUnconditional() && "Preheader must have a single successor");
@@ -575,48 +591,63 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// deleting the backedge of the outer loop). If the outer loop is indeed a
// non-loop, it will be deleted in a future iteration of loop deletion pass.
IRBuilder<> Builder(OldBr);
- Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
- // Remove the old branch. The conditional branch becomes a new terminator.
- OldBr->eraseFromParent();
-
- // Rewrite phis in the exit block to get their inputs from the Preheader
- // instead of the exiting block.
- for (PHINode &P : ExitBlock->phis()) {
- // Set the zero'th element of Phi to be from the preheader and remove all
- // other incoming values. Given the loop has dedicated exits, all other
- // incoming values must be from the exiting blocks.
- int PredIndex = 0;
- P.setIncomingBlock(PredIndex, Preheader);
- // Removes all incoming values from all other exiting blocks (including
- // duplicate values from an exiting block).
- // Nuke all entries except the zero'th entry which is the preheader entry.
- // NOTE! We need to remove Incoming Values in the reverse order as done
- // below, to keep the indices valid for deletion (removeIncomingValues
- // updates getNumIncomingValues and shifts all values down into the operand
- // being deleted).
- for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
- P.removeIncomingValue(e - i, false);
-
- assert((P.getNumIncomingValues() == 1 &&
- P.getIncomingBlock(PredIndex) == Preheader) &&
- "Should have exactly one value and that's from the preheader!");
- }
+ auto *ExitBlock = L->getUniqueExitBlock();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- if (DT) {
- DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
- if (MSSA) {
- MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, *DT);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
+ if (ExitBlock) {
+ assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
+ Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
+ // Remove the old branch. The conditional branch becomes a new terminator.
+ OldBr->eraseFromParent();
+
+ // Rewrite phis in the exit block to get their inputs from the Preheader
+ // instead of the exiting block.
+ for (PHINode &P : ExitBlock->phis()) {
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P.setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the
+ // operand being deleted).
+ for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
+ P.removeIncomingValue(e - i, false);
+
+ assert((P.getNumIncomingValues() == 1 &&
+ P.getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
+ }
+
+ if (DT) {
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
+ if (MSSA) {
+ MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
+ *DT);
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+ }
}
- }
- // Disconnect the loop body by branching directly to its exit.
- Builder.SetInsertPoint(Preheader->getTerminator());
- Builder.CreateBr(ExitBlock);
- // Remove the old branch.
- Preheader->getTerminator()->eraseFromParent();
+ // Disconnect the loop body by branching directly to its exit.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ Builder.CreateBr(ExitBlock);
+ // Remove the old branch.
+ Preheader->getTerminator()->eraseFromParent();
+ } else {
+ assert(L->hasNoExitBlocks() &&
+ "Loop should have either zero or one exit blocks.");
+
+ Builder.SetInsertPoint(OldBr);
+ Builder.CreateUnreachable();
+ Preheader->getTerminator()->eraseFromParent();
+ }
if (DT) {
DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
@@ -635,54 +666,58 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
- // Given LCSSA form is satisfied, we should not have users of instructions
- // within the dead loop outside of the loop. However, LCSSA doesn't take
- // unreachable uses into account. We handle them here.
- // We could do it after drop all references (in this case all users in the
- // loop will be already eliminated and we have less work to do but according
- // to API doc of User::dropAllReferences only valid operation after dropping
- // references, is deletion. So let's substitute all usages of
- // instruction from the loop with undef value of corresponding type first.
- for (auto *Block : L->blocks())
- for (Instruction &I : *Block) {
- auto *Undef = UndefValue::get(I.getType());
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
- if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
- if (L->contains(Usr->getParent()))
- continue;
- // If we have a DT then we can check that uses outside a loop only in
- // unreachable block.
- if (DT)
- assert(!DT->isReachableFromEntry(U) &&
- "Unexpected user in reachable block");
- U.set(Undef);
+ if (ExitBlock) {
+ // Given LCSSA form is satisfied, we should not have users of instructions
+ // within the dead loop outside of the loop. However, LCSSA doesn't take
+ // unreachable uses into account. We handle them here.
+ // We could do it after drop all references (in this case all users in the
+ // loop will be already eliminated and we have less work to do but according
+ // to API doc of User::dropAllReferences only valid operation after dropping
+ // references, is deletion. So let's substitute all usages of
+ // instruction from the loop with undef value of corresponding type first.
+ for (auto *Block : L->blocks())
+ for (Instruction &I : *Block) {
+ auto *Undef = UndefValue::get(I.getType());
+ for (Value::use_iterator UI = I.use_begin(), E = I.use_end();
+ UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
+ if (L->contains(Usr->getParent()))
+ continue;
+ // If we have a DT then we can check that uses outside a loop only in
+ // unreachable block.
+ if (DT)
+ assert(!DT->isReachableFromEntry(U) &&
+ "Unexpected user in reachable block");
+ U.set(Undef);
+ }
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ continue;
+ auto Key =
+ DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
+ if (Key != DeadDebugSet.end())
+ continue;
+ DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
+ DeadDebugInst.push_back(DVI);
}
- auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
- if (!DVI)
- continue;
- auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
- if (Key != DeadDebugSet.end())
- continue;
- DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
- DeadDebugInst.push_back(DVI);
- }
- // After the loop has been deleted all the values defined and modified
- // inside the loop are going to be unavailable.
- // Since debug values in the loop have been deleted, inserting an undef
- // dbg.value truncates the range of any dbg.value before the loop where the
- // loop used to be. This is particularly important for constant values.
- DIBuilder DIB(*ExitBlock->getModule());
- Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
- assert(InsertDbgValueBefore &&
- "There should be a non-PHI instruction in exit block, else these "
- "instructions will have no parent.");
- for (auto *DVI : DeadDebugInst)
- DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
- DVI->getVariable(), DVI->getExpression(),
- DVI->getDebugLoc(), InsertDbgValueBefore);
+ // After the loop has been deleted all the values defined and modified
+ // inside the loop are going to be unavailable.
+ // Since debug values in the loop have been deleted, inserting an undef
+ // dbg.value truncates the range of any dbg.value before the loop where the
+ // loop used to be. This is particularly important for constant values.
+ DIBuilder DIB(*ExitBlock->getModule());
+ Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+ assert(InsertDbgValueBefore &&
+ "There should be a non-PHI instruction in exit block, else these "
+ "instructions will have no parent.");
+ for (auto *DVI : DeadDebugInst)
+ DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
+ DVI->getVariable(), DVI->getExpression(),
+ DVI->getDebugLoc(), InsertDbgValueBefore);
+ }
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
@@ -726,6 +761,51 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
}
}
+static Loop *getOutermostLoop(Loop *L) {
+ while (Loop *Parent = L->getParentLoop())
+ L = Parent;
+ return L;
+}
+
+void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA) {
+ auto *Latch = L->getLoopLatch();
+ assert(Latch && "multiple latches not yet supported");
+ auto *Header = L->getHeader();
+ Loop *OutermostLoop = getOutermostLoop(L);
+
+ SE.forgetLoop(L);
+
+ // Note: By splitting the backedge, and then explicitly making it unreachable
+ // we gracefully handle corner cases such as non-bottom tested loops and the
+ // like. We also have the benefit of being able to reuse existing well tested
+ // code. It might be worth special casing the common bottom tested case at
+ // some point to avoid code churn.
+
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
+ /*PreserveLCSSA*/true, &DTU, MSSAU.get());
+
+ // Erase (and destroy) this loop instance. Handles relinking sub-loops
+ // and blocks within the loop as needed.
+ LI.erase(L);
+
+ // If the loop we broke had a parent, then changeToUnreachable might have
+ // caused a block to be removed from the parent loop (see loop_nest_lcssa
+ // test case in zero-btc.ll for an example), thus changing the parent's
+ // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
+ // loop which might have a had a block removed.
+ if (OutermostLoop != L)
+ formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
+}
+
+
/// Checks if \p L has single exit through latch block except possibly
/// "deoptimizing" exits. Returns branch instruction terminating the loop
/// latch if above check is successful, nullptr otherwise.
@@ -838,30 +918,29 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createMinMaxOp(IRBuilderBase &Builder,
- RecurrenceDescriptor::MinMaxRecurrenceKind RK,
- Value *Left, Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right) {
+ CmpInst::Predicate Pred;
switch (RK) {
default:
llvm_unreachable("Unknown min/max recurrence kind");
- case RecurrenceDescriptor::MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
+ case RecurKind::UMin:
+ Pred = CmpInst::ICMP_ULT;
break;
- case RecurrenceDescriptor::MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
+ case RecurKind::UMax:
+ Pred = CmpInst::ICMP_UGT;
break;
- case RecurrenceDescriptor::MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
+ case RecurKind::SMin:
+ Pred = CmpInst::ICMP_SLT;
break;
- case RecurrenceDescriptor::MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
+ case RecurKind::SMax:
+ Pred = CmpInst::ICMP_SGT;
break;
- case RecurrenceDescriptor::MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
+ case RecurKind::FMin:
+ Pred = CmpInst::FCMP_OLT;
break;
- case RecurrenceDescriptor::MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
+ case RecurKind::FMax:
+ Pred = CmpInst::FCMP_OGT;
break;
}
@@ -871,17 +950,15 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder,
FastMathFlags FMF;
FMF.setFast();
Builder.setFastMathFlags(FMF);
- Value *Cmp = Builder.CreateCmp(P, Left, Right, "rdx.minmax.cmp");
+ Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
// Helper to generate an ordered reduction.
-Value *
-llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
- unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
+Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
+ unsigned Op, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// Extract and apply reduction ops in ascending order:
@@ -895,9 +972,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
"bin.rdx");
} else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
"Invalid min/max");
- Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
+ Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
}
if (!RedOps.empty())
@@ -908,10 +985,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
}
// Helper to generate a log2 shuffle reduction.
-Value *
-llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
+Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
+ unsigned Op, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
@@ -928,17 +1004,16 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
// Fill the rest of the mask with undef.
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
- Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf");
+ Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
// The builder propagates its fast-math-flags setting.
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
} else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&
"Invalid min/max");
- TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
+ TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
}
if (!RedOps.empty())
propagateIRFlags(TmpVec, RedOps);
@@ -952,124 +1027,62 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
-/// Create a simple vector reduction specified by an opcode and some
-/// flags (if generating min/max reductions).
-Value *llvm::createSimpleTargetReduction(
- IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
- Value *Src, TargetTransformInfo::ReductionFlags Flags,
- ArrayRef<Value *> RedOps) {
- auto *SrcVTy = cast<VectorType>(Src->getType());
-
- std::function<Value *()> BuildFunc;
- using RD = RecurrenceDescriptor;
- RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-
- switch (Opcode) {
- case Instruction::Add:
- BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
- break;
- case Instruction::Mul:
- BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
- break;
- case Instruction::And:
- BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
- break;
- case Instruction::Or:
- BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
- break;
- case Instruction::Xor:
- BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
- break;
- case Instruction::FAdd:
- BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(
- Constant::getNullValue(SrcVTy->getElementType()), Src);
- return Rdx;
- };
- break;
- case Instruction::FMul:
- BuildFunc = [&]() {
- Type *Ty = SrcVTy->getElementType();
- auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
- return Rdx;
- };
- break;
- case Instruction::ICmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
- BuildFunc = [&]() {
- return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
- };
- } else {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
- BuildFunc = [&]() {
- return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
- };
- }
- break;
- case Instruction::FCmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = RD::MRK_FloatMax;
- BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
- } else {
- MinMaxKind = RD::MRK_FloatMin;
- BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
- }
- break;
+Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src, RecurKind RdxKind,
+ ArrayRef<Value *> RedOps) {
+ unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
+ TargetTransformInfo::ReductionFlags RdxFlags;
+ RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
+ RdxKind == RecurKind::FMax;
+ RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
+ if (!ForceReductionIntrinsic &&
+ !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
+ return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
+
+ auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
+ switch (RdxKind) {
+ case RecurKind::Add:
+ return Builder.CreateAddReduce(Src);
+ case RecurKind::Mul:
+ return Builder.CreateMulReduce(Src);
+ case RecurKind::And:
+ return Builder.CreateAndReduce(Src);
+ case RecurKind::Or:
+ return Builder.CreateOrReduce(Src);
+ case RecurKind::Xor:
+ return Builder.CreateXorReduce(Src);
+ case RecurKind::FAdd:
+ return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
+ Src);
+ case RecurKind::FMul:
+ return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
+ case RecurKind::SMax:
+ return Builder.CreateIntMaxReduce(Src, true);
+ case RecurKind::SMin:
+ return Builder.CreateIntMinReduce(Src, true);
+ case RecurKind::UMax:
+ return Builder.CreateIntMaxReduce(Src, false);
+ case RecurKind::UMin:
+ return Builder.CreateIntMinReduce(Src, false);
+ case RecurKind::FMax:
+ return Builder.CreateFPMaxReduce(Src);
+ case RecurKind::FMin:
+ return Builder.CreateFPMinReduce(Src);
default:
llvm_unreachable("Unhandled opcode");
- break;
}
- if (ForceReductionIntrinsic ||
- TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
- return BuildFunc();
- return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
}
-/// Create a vector reduction using a given recurrence descriptor.
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src,
- bool NoNaN) {
+ RecurrenceDescriptor &Desc, Value *Src) {
// TODO: Support in-order reductions based on the recurrence descriptor.
- using RD = RecurrenceDescriptor;
- RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
- TargetTransformInfo::ReductionFlags Flags;
- Flags.NoNaN = NoNaN;
-
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
-
- switch (RecKind) {
- case RD::RK_FloatAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
- case RD::RK_FloatMult:
- return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
- case RD::RK_IntegerAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
- case RD::RK_IntegerMult:
- return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
- case RD::RK_IntegerAnd:
- return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
- case RD::RK_IntegerOr:
- return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
- case RD::RK_IntegerXor:
- return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
- case RD::RK_IntegerMinMax: {
- RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
- Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
- Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
- return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
- }
- case RD::RK_FloatMinMax: {
- Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
- return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
- }
- default:
- llvm_unreachable("Unhandled RecKind");
- }
+ return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
}
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
@@ -1145,7 +1158,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
// producing an expression involving multiple pointers. Until then, we must
// bail out here.
//
- // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject
// because it understands lcssa phis while SCEV does not.
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
@@ -1162,7 +1175,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
// SCEV may have rewritten an expression that produces the GEP's pointer
// operand. That's ok as long as the pointer operand has the same base
- // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 16bd08c704ee..599bd1feb2bc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -16,8 +16,12 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -32,29 +36,22 @@ static cl::opt<bool>
cl::desc("Add no-alias annotation for instructions that "
"are disambiguated by memchecks"));
-LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT, ScalarEvolution *SE,
- bool UseLAIChecks)
- : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
+ ArrayRef<RuntimePointerCheck> Checks, Loop *L,
+ LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE)
+ : VersionedLoop(L), NonVersionedLoop(nullptr),
+ AliasChecks(Checks.begin(), Checks.end()),
+ Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT),
SE(SE) {
- assert(L->getExitBlock() && "No single exit block");
- assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form");
- if (UseLAIChecks) {
- setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
- setSCEVChecks(LAI.getPSE().getUnionPredicate());
- }
-}
-
-void LoopVersioning::setAliasChecks(ArrayRef<RuntimePointerCheck> Checks) {
- AliasChecks = {Checks.begin(), Checks.end()};
-}
-
-void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
- Preds = std::move(Check);
+ assert(L->getUniqueExitBlock() && "No single exit block");
}
void LoopVersioning::versionLoop(
const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ assert(VersionedLoop->isLoopSimplifyForm() &&
+ "Loop is not in loop-simplify form");
+
Instruction *FirstCheckInst;
Instruction *MemRuntimeCheck;
Value *SCEVRuntimeCheck;
@@ -67,11 +64,10 @@ void LoopVersioning::versionLoop(
addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
AliasChecks, RtPtrChecking.getSE());
- const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
SCEVRuntimeCheck =
- Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+ Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator());
auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
// Discard the SCEV runtime check if it is always true.
@@ -122,6 +118,11 @@ void LoopVersioning::versionLoop(
// Adds the necessary PHI nodes for the versioned loops based on the
// loop-defined values used outside of the loop.
addPHINodes(DefsUsedOutside);
+ formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true);
+ formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true);
+ assert(NonVersionedLoop->isLoopSimplifyForm() &&
+ VersionedLoop->isLoopSimplifyForm() &&
+ "The versioned loops should be in simplify form.");
}
void LoopVersioning::addPHINodes(
@@ -253,47 +254,59 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
}
namespace {
+bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA,
+ DominatorTree *DT, ScalarEvolution *SE) {
+ // Build up a worklist of inner-loops to version. This is necessary as the
+ // act of versioning a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->isInnermost())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ if (!L->isLoopSimplifyForm() || !L->isRotatedForm() ||
+ !L->getExitingBlock())
+ continue;
+ const LoopAccessInfo &LAI = GetLAA(*L);
+ if (!LAI.hasConvergentOp() &&
+ (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L,
+ LI, DT, SE);
+ LVer.versionLoop();
+ LVer.annotateLoopWithNoAlias();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Also expose this is a pass. Currently this is only used for
/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
/// array accesses from the loop.
-class LoopVersioningPass : public FunctionPass {
+class LoopVersioningLegacyPass : public FunctionPass {
public:
- LoopVersioningPass() : FunctionPass(ID) {
- initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
+ LoopVersioningLegacyPass() : FunctionPass(ID) {
+ initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
+ return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(&L);
+ };
+
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- // Build up a worklist of inner-loops to version. This is necessary as the
- // act of versioning a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L);
- if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
- (LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
- LoopVersioning LVer(LAI, L, LI, DT, SE);
- LVer.versionLoop();
- LVer.annotateLoopWithNoAlias();
- Changed = true;
- }
- }
-
- return Changed;
+ return runImpl(LI, GetLAA, DT, SE);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -312,18 +325,45 @@ public:
#define LVER_OPTION "loop-versioning"
#define DEBUG_TYPE LVER_OPTION
-char LoopVersioningPass::ID;
+char LoopVersioningLegacyPass::ID;
static const char LVer_name[] = "Loop Versioning";
-INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
+ false)
namespace llvm {
-FunctionPass *createLoopVersioningPass() {
- return new LoopVersioningPass();
+FunctionPass *createLoopVersioningLegacyPass() {
+ return new LoopVersioningLegacyPass();
}
+
+PreservedAnalyses LoopVersioningPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, MSSA};
+ return LAM.getResult<LoopAccessAnalysis>(L, AR);
+ };
+
+ if (runImpl(&LI, GetLAA, &DT, &SE))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 0b225e8abc4e..fe0ff5899d8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -48,7 +48,7 @@ static bool runImpl(Function &F) {
bool Changed = false;
for (BasicBlock &BB : F)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
- SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end());
+ SmallVector<Value *, 16> CallArgs(II->args());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 34e836d9660f..ec8d7a7074cd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -26,6 +27,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,9 +57,9 @@ namespace {
} // end anonymous namespace
+namespace {
// Return true iff R is covered by Ranges.
-static bool IsInRanges(const IntRange &R,
- const std::vector<IntRange> &Ranges) {
+bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) {
// Note: Ranges must be sorted, non-overlapping and non-adjacent.
// Find the first range whose High field is >= R.High,
@@ -68,120 +70,34 @@ static bool IsInRanges(const IntRange &R,
return I != Ranges.end() && I->Low <= R.Low;
}
-namespace {
-
- /// Replace all SwitchInst instructions with chained branch instructions.
- class LowerSwitch : public FunctionPass {
- public:
- // Pass identification, replacement for typeid
- static char ID;
-
- LowerSwitch() : FunctionPass(ID) {
- initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LazyValueInfoWrapperPass>();
- }
-
- struct CaseRange {
- ConstantInt* Low;
- ConstantInt* High;
- BasicBlock* BB;
-
- CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
- : Low(low), High(high), BB(bb) {}
- };
-
- using CaseVector = std::vector<CaseRange>;
- using CaseItr = std::vector<CaseRange>::iterator;
-
- private:
- void processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI);
-
- BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- Value *Val, BasicBlock *Predecessor,
- BasicBlock *OrigBlock, BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges);
- BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- BasicBlock *OrigBlock, BasicBlock *Default);
- unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
- };
-
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const LowerSwitch::CaseRange& C1,
- const LowerSwitch::CaseRange& C2) {
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
-} // end anonymous namespace
-
-char LowerSwitch::ID = 0;
-
-// Publicly exposed interface to pass...
-char &llvm::LowerSwitchID = LowerSwitch::ID;
-
-INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-
-// createLowerSwitchPass - Interface to this file...
-FunctionPass *llvm::createLowerSwitchPass() {
- return new LowerSwitch();
-}
-
-bool LowerSwitch::runOnFunction(Function &F) {
- LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
- AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
-
- bool Changed = false;
- SmallPtrSet<BasicBlock*, 8> DeleteList;
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
-
- // If the block is a dead Default block that will be deleted later, don't
- // waste time processing it.
- if (DeleteList.count(Cur))
- continue;
-
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
- Changed = true;
- processSwitchInst(SI, DeleteList, AC, LVI);
- }
- }
-
- for (BasicBlock* BB: DeleteList) {
- LVI->eraseBlock(BB);
- DeleteDeadBlock(BB);
+struct CaseRange {
+ ConstantInt *Low;
+ ConstantInt *High;
+ BasicBlock *BB;
+
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
+};
+
+using CaseVector = std::vector<CaseRange>;
+using CaseItr = std::vector<CaseRange>::iterator;
+
+/// The comparison function for sorting the switch case values in the vector.
+/// WARNING: Case ranges should be disjoint!
+struct CaseCmp {
+ bool operator()(const CaseRange &C1, const CaseRange &C2) {
+ const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt *CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
}
-
- return Changed;
-}
+};
/// Used for debugging purposes.
LLVM_ATTRIBUTE_USED
-static raw_ostream &operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) {
+raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) {
O << "[";
- for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
- B != E;) {
+ for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) {
O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
if (++B != E)
O << ", ";
@@ -200,9 +116,9 @@ static raw_ostream &operator<<(raw_ostream &O,
/// 2) Removed if subsequent incoming values now share the same case, i.e.,
/// multiple outcome edges are condensed into one. This is necessary to keep the
/// number of phi values equal to the number of branches to SuccBB.
-static void
-fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
- const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
+void FixPhis(
+ BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
for (BasicBlock::iterator I = SuccBB->begin(),
IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
@@ -233,17 +149,80 @@ fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
+BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, BasicBlock *OrigBlock,
+ BasicBlock *Default) {
+ Function *F = OrigBlock->getParent();
+ BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
+
+ // Emit comparison
+ ICmpInst *Comp = nullptr;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp =
+ new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (Leaf.Low == LowerBound) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (Leaf.High == UpperBound) {
+ // Val <= Max && Val >= Lo --> Val >= Lo
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+ "SwitchLeaf");
+ } else if (Leaf.Low->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant *NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction *Add = BinaryOperator::CreateAdd(
+ Val, NegLo, Val->getName() + ".off", NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock *Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
/// Convert the switch statement into a binary lookup of the case values.
/// The function recursively builds this tree. LowerBound and UpperBound are
/// used to keep track of the bounds for Val that have already been checked by
/// a block emitted by one of the previous calls to switchConvert in the call
/// stack.
-BasicBlock *
-LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
- ConstantInt *UpperBound, Value *Val,
- BasicBlock *Predecessor, BasicBlock *OrigBlock,
- BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges) {
+BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
assert(LowerBound && UpperBound && "Bounds must be initialized");
unsigned Size = End - Begin;
@@ -255,10 +234,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
unsigned NumMergedCases = 0;
NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
- fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
+ FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
return Begin->BB;
}
- return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+ return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
Default);
}
@@ -305,12 +284,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
- BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
- NewUpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
- BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
- UpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
+ BasicBlock *LBranch =
+ SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
+ BasicBlock *RBranch =
+ SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val,
+ NewNode, OrigBlock, Default, UnreachableRanges);
F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
NewNode->getInstList().push_back(Comp);
@@ -319,78 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
return NewNode;
}
-/// Create a new leaf block for the binary lookup tree. It checks if the
-/// switch's value == the case's value. If not, then it jumps to the default
-/// branch. At this point in the tree, the value can't be another valid case
-/// value, so the jump to the "default" branch is warranted.
-BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound,
- ConstantInt *UpperBound,
- BasicBlock *OrigBlock,
- BasicBlock *Default) {
- Function* F = OrigBlock->getParent();
- BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
- F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
-
- // Emit comparison
- ICmpInst* Comp = nullptr;
- if (Leaf.Low == Leaf.High) {
- // Make the seteq instruction...
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
- Leaf.Low, "SwitchLeaf");
- } else {
- // Make range comparison
- if (Leaf.Low == LowerBound) {
- // Val >= Min && Val <= Hi --> Val <= Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
- "SwitchLeaf");
- } else if (Leaf.High == UpperBound) {
- // Val <= Max && Val >= Lo --> Val >= Lo
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
- "SwitchLeaf");
- } else if (Leaf.Low->isZero()) {
- // Val >= 0 && Val <= Hi --> Val <=u Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
- "SwitchLeaf");
- } else {
- // Emit V-Lo <=u Hi-Lo
- Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
- Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
- Val->getName()+".off",
- NewLeaf);
- Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
- "SwitchLeaf");
- }
- }
-
- // Make the conditional branch...
- BasicBlock* Succ = Leaf.BB;
- BranchInst::Create(Succ, Default, Comp, NewLeaf);
-
- // If there were any PHI nodes in this successor, rewrite one entry
- // from OrigBlock to come from NewLeaf.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode* PN = cast<PHINode>(I);
- // Remove all but one incoming entries from the cluster
- uint64_t Range = Leaf.High->getSExtValue() -
- Leaf.Low->getSExtValue();
- for (uint64_t j = 0; j < Range; ++j) {
- PN->removeIncomingValue(OrigBlock);
- }
-
- int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
- assert(BlockIdx != -1 && "Switch didn't go to this successor??");
- PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
- }
-
- return NewLeaf;
-}
-
/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
/// \post \p Cases wouldn't contain references to \p SI's default BB.
/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
-unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) {
unsigned NumSimpleCases = 0;
// Start with "simple" cases
@@ -431,9 +342,9 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
/// Replace the specified switch instruction with a sequence of chained if-then
/// insts in a balanced binary search.
-void LowerSwitch::processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI) {
+void ProcessSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI) {
BasicBlock *OrigBlock = SI->getParent();
Function *F = OrigBlock->getParent();
Value *Val = SI->getCondition(); // The value we are switching on...
@@ -458,7 +369,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
if (Cases.empty()) {
BranchInst::Create(Default, OrigBlock);
// Remove all the references from Default's PHIs to OrigBlock, but one.
- fixPhis(Default, OrigBlock, OrigBlock);
+ FixPhis(Default, OrigBlock, OrigBlock);
SI->eraseFromParent();
return;
}
@@ -489,7 +400,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// TODO Shouldn't this create a signed range?
ConstantRange KnownBitsRange =
ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
- const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
+ const ConstantRange LVIRange = LVI->getConstantRange(Val, SI);
ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
// We delegate removal of unreachable non-default cases to other passes. In
// the unlikely event that some of them survived, we just conservatively
@@ -563,10 +474,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// cases.
assert(MaxPop > 0 && PopSucc);
Default = PopSucc;
- Cases.erase(
- llvm::remove_if(
- Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
- Cases.end());
+ llvm::erase_if(Cases,
+ [PopSucc](const CaseRange &R) { return R.BB == PopSucc; });
// If there are no cases left, just branch.
if (Cases.empty()) {
@@ -592,12 +501,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
BranchInst::Create(Default, NewDefault);
BasicBlock *SwitchBlock =
- switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+ SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
// If there are entries in any PHI nodes for the default edge, make sure
// to update them as well.
- fixPhis(Default, OrigBlock, NewDefault);
+ FixPhis(Default, OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
@@ -607,6 +516,84 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
OrigBlock->getInstList().erase(SI);
// If the Default block has no more predecessors just add it to DeleteList.
- if (pred_begin(OldDefault) == pred_end(OldDefault))
+ if (pred_empty(OldDefault))
DeleteList.insert(OldDefault);
}
+
+bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
+ bool Changed = false;
+ SmallPtrSet<BasicBlock *, 8> DeleteList;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+ BasicBlock *Cur =
+ &*I++; // Advance over block so we don't traverse new blocks
+
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(Cur))
+ continue;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ ProcessSwitchInst(SI, DeleteList, AC, LVI);
+ }
+ }
+
+ for (BasicBlock *BB : DeleteList) {
+ LVI->eraseBlock(BB);
+ DeleteDeadBlock(BB);
+ }
+
+ return Changed;
+}
+
+/// Replace all SwitchInst instructions with chained branch instructions.
+class LowerSwitchLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ LowerSwitchLegacyPass() : FunctionPass(ID) {
+ initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+};
+
+} // end anonymous namespace
+
+char LowerSwitchLegacyPass::ID = 0;
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID;
+
+INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitchLegacyPass();
+}
+
+bool LowerSwitchLegacyPass::runOnFunction(Function &F) {
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+ return LowerSwitch(F, LVI, AC);
+}
+
+PreservedAnalyses LowerSwitchPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
+ AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F);
+ return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp
new file mode 100644
index 000000000000..6a137630deeb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -0,0 +1,104 @@
+//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for generating tiled loops for matrix operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MatrixUtils.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
+ Value *Bound, Value *Step, StringRef Name,
+ IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
+ LoopInfo &LI) {
+ LLVMContext &Ctx = Preheader->getContext();
+ BasicBlock *Header = BasicBlock::Create(
+ Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit);
+ BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body",
+ Header->getParent(), Exit);
+ BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch",
+ Header->getParent(), Exit);
+
+ Type *I32Ty = Type::getInt64Ty(Ctx);
+ BranchInst::Create(Body, Header);
+ BranchInst::Create(Latch, Body);
+ PHINode *IV =
+ PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator());
+ IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
+
+ B.SetInsertPoint(Latch);
+ Value *Inc = B.CreateAdd(IV, Step, Name + ".step");
+ Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
+ BranchInst::Create(Header, Exit, Cond, Latch);
+ IV->addIncoming(Inc, Latch);
+
+ BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
+ BasicBlock *Tmp = PreheaderBr->getSuccessor(0);
+ PreheaderBr->setSuccessor(0, Header);
+ DTU.applyUpdatesPermissive({
+ {DominatorTree::Delete, Preheader, Tmp},
+ {DominatorTree::Insert, Header, Body},
+ {DominatorTree::Insert, Body, Latch},
+ {DominatorTree::Insert, Latch, Header},
+ {DominatorTree::Insert, Latch, Exit},
+ {DominatorTree::Insert, Preheader, Header},
+ });
+
+ L->addBasicBlockToLoop(Header, LI);
+ L->addBasicBlockToLoop(Body, LI);
+ L->addBasicBlockToLoop(Latch, LI);
+ return Body;
+}
+
+// Creates the following loop nest skeleton:
+// for C = 0; C < NumColumns; C += TileSize
+// for R = 0; R < NumRows; R += TileSize
+// for K = 0; K < Inner ; K += TileSize
+BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, DomTreeUpdater &DTU,
+ LoopInfo &LI) {
+ Loop *ColLoop = LI.AllocateLoop();
+ Loop *RowLoop = LI.AllocateLoop();
+ Loop *InnerLoop = LI.AllocateLoop();
+ RowLoop->addChildLoop(InnerLoop);
+ ColLoop->addChildLoop(RowLoop);
+ if (Loop *ParentL = LI.getLoopFor(Start))
+ ParentL->addChildLoop(ColLoop);
+ else
+ LI.addTopLevelLoop(ColLoop);
+
+ BasicBlock *ColBody =
+ CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize),
+ "cols", B, DTU, ColLoop, LI);
+ BasicBlock *ColLatch = ColBody->getSingleSuccessor();
+ BasicBlock *RowBody =
+ CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize),
+ "rows", B, DTU, RowLoop, LI);
+ RowLoopLatch = RowBody->getSingleSuccessor();
+
+ BasicBlock *InnerBody =
+ CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner),
+ B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI);
+ InnerLoopLatch = InnerBody->getSingleSuccessor();
+ ColumnLoopHeader = ColBody->getSinglePredecessor();
+ RowLoopHeader = RowBody->getSinglePredecessor();
+ InnerLoopHeader = InnerBody->getSinglePredecessor();
+ CurrentRow = &*RowLoopHeader->begin();
+ CurrentCol = &*ColumnLoopHeader->begin();
+ CurrentK = &*InnerLoopHeader->begin();
+
+ return InnerBody;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 7f961dbaf4b4..e350320e7569 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
@@ -25,6 +26,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/InitializePasses.h"
@@ -40,123 +42,125 @@ static const char *const metaNames[] = {
};
namespace {
+// This PRNG is from the ISO C spec. It is intentionally simple and
+// unsuitable for cryptographic use. We're just looking for enough
+// variety to surprise and delight users.
+struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) { next = seed; }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+};
- // This PRNG is from the ISO C spec. It is intentionally simple and
- // unsuitable for cryptographic use. We're just looking for enough
- // variety to surprise and delight users.
- struct PRNG {
- unsigned long next;
+struct Renamer {
+ Renamer(unsigned int seed) { prng.srand(seed); }
- void srand(unsigned int seed) {
- next = seed;
- }
+ const char *newName() {
+ return metaNames[prng.rand() % array_lengthof(metaNames)];
+ }
- int rand() {
- next = next * 1103515245 + 12345;
- return (unsigned int)(next / 65536) % 32768;
- }
- };
+ PRNG prng;
+};
- struct Renamer {
- Renamer(unsigned int seed) {
- prng.srand(seed);
- }
+void MetaRename(Function &F) {
+ for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
- const char *newName() {
- return metaNames[prng.rand() % array_lengthof(metaNames)];
- }
+ for (auto &BB : F) {
+ BB.setName("bb");
- PRNG prng;
- };
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
+ }
+}
- struct MetaRenamer : public ModulePass {
- // Pass identification, replacement for typeid
- static char ID;
-
- MetaRenamer() : ModulePass(ID) {
- initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- // Seed our PRNG with simple additive sum of ModuleID. We're looking to
- // simply avoid always having the same function names, and we need to
- // remain deterministic.
- unsigned int randSeed = 0;
- for (auto C : M.getModuleIdentifier())
- randSeed += C;
-
- Renamer renamer(randSeed);
-
- // Rename all aliases
- for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
- StringRef Name = AI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- AI->setName("alias");
- }
-
- // Rename all global variables
- for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
- StringRef Name = GI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- GI->setName("global");
- }
-
- // Rename all struct types
- TypeFinder StructTypes;
- StructTypes.run(M, true);
- for (StructType *STy : StructTypes) {
- if (STy->isLiteral() || STy->getName().empty()) continue;
-
- SmallString<128> NameStorage;
- STy->setName((Twine("struct.") +
- renamer.newName()).toStringRef(NameStorage));
- }
-
- // Rename all functions
- for (auto &F : M) {
- StringRef Name = F.getName();
- LibFunc Tmp;
- // Leave library functions alone because their presence or absence could
- // affect the behavior of other passes.
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc(
- F, Tmp))
- continue;
-
- // Leave @main alone. The output of -metarenamer might be passed to
- // lli for execution and the latter needs a main entry point.
- if (Name != "main")
- F.setName(renamer.newName());
-
- runOnFunction(F);
- }
- return true;
- }
-
- bool runOnFunction(Function &F) {
- for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
- if (!AI->getType()->isVoidTy())
- AI->setName("arg");
-
- for (auto &BB : F) {
- BB.setName("bb");
-
- for (auto &I : BB)
- if (!I.getType()->isVoidTy())
- I.setName("tmp");
- }
- return true;
- }
- };
+void MetaRename(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
+
+ Renamer renamer(randSeed);
+
+ // Rename all aliases
+ for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ AI->setName("alias");
+ }
+
+ // Rename all global variables
+ for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ GI->setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (StructType *STy : StructTypes) {
+ if (STy->isLiteral() || STy->getName().empty())
+ continue;
+
+ SmallString<128> NameStorage;
+ STy->setName(
+ (Twine("struct.") + renamer.newName()).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ for (auto &F : M) {
+ StringRef Name = F.getName();
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ GetTLI(F).getLibFunc(F, Tmp))
+ continue;
+
+ // Leave @main alone. The output of -metarenamer might be passed to
+ // lli for execution and the latter needs a main entry point.
+ if (Name != "main")
+ F.setName(renamer.newName());
+
+ MetaRename(F);
+ }
+}
+
+struct MetaRenamer : public ModulePass {
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ MetaRename(M, GetTLI);
+ return true;
+ }
+};
} // end anonymous namespace
@@ -175,3 +179,14 @@ INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
ModulePass *llvm::createMetaRenamerPass() {
return new MetaRenamer();
}
+
+PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ MetaRename(M, GetTLI);
+
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
deleted file mode 100644
index a16ca1fb8efa..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code to emit warnings for potentially incorrect usage of the
-// llvm.expect intrinsic. This utility extracts the threshold values from
-// metadata associated with the instrumented Branch or Switch instruction. The
-// threshold values are then used to determine if a warning should be emmited.
-//
-// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
-// LowerExpectIntrinsic.cpp
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/MisExpect.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatVariadic.h"
-#include <cstdint>
-#include <functional>
-#include <numeric>
-
-#define DEBUG_TYPE "misexpect"
-
-using namespace llvm;
-using namespace misexpect;
-
-namespace llvm {
-
-// Command line option to enable/disable the warning when profile data suggests
-// a mismatch with the use of the llvm.expect intrinsic
-static cl::opt<bool> PGOWarnMisExpect(
- "pgo-warn-misexpect", cl::init(false), cl::Hidden,
- cl::desc("Use this option to turn on/off "
- "warnings about incorrect usage of llvm.expect intrinsics."));
-
-} // namespace llvm
-
-namespace {
-
-Instruction *getOprndOrInst(Instruction *I) {
- assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
- Instruction *Ret = nullptr;
- if (auto *B = dyn_cast<BranchInst>(I)) {
- Ret = dyn_cast<Instruction>(B->getCondition());
- }
- // TODO: Find a way to resolve condition location for switches
- // Using the condition of the switch seems to often resolve to an earlier
- // point in the program, i.e. the calculation of the switch condition, rather
- // than the switches location in the source code. Thus, we should use the
- // instruction to get source code locations rather than the condition to
- // improve diagnostic output, such as the caret. If the same problem exists
- // for branch instructions, then we should remove this function and directly
- // use the instruction
- //
- // else if (auto S = dyn_cast<SwitchInst>(I)) {
- // Ret = I;
- //}
- return Ret ? Ret : I;
-}
-
-void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
- uint64_t ProfCount, uint64_t TotalCount) {
- double PercentageCorrect = (double)ProfCount / TotalCount;
- auto PerString =
- formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
- auto RemStr = formatv(
- "Potential performance regression from use of the llvm.expect intrinsic: "
- "Annotation was correct on {0} of profiled executions.",
- PerString);
- Twine Msg(PerString);
- Instruction *Cond = getOprndOrInst(I);
- if (PGOWarnMisExpect)
- Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
- OptimizationRemarkEmitter ORE(I->getParent()->getParent());
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
-}
-
-} // namespace
-
-namespace llvm {
-namespace misexpect {
-
-void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
- LLVMContext &Ctx) {
- if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
- auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
- if (MisExpectDataName &&
- MisExpectDataName->getString().equals("misexpect")) {
- LLVM_DEBUG(llvm::dbgs() << "------------------\n");
- LLVM_DEBUG(llvm::dbgs()
- << "Function: " << I->getFunction()->getName() << "\n");
- LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
- LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
- llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
- });
-
- // extract values from misexpect metadata
- const auto *IndexCint =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
- const auto *LikelyCInt =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
- const auto *UnlikelyCInt =
- mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
-
- if (!IndexCint || !LikelyCInt || !UnlikelyCInt)
- return;
-
- const uint64_t Index = IndexCint->getZExtValue();
- const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue();
- const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue();
- const uint64_t ProfileCount = Weights[Index];
- const uint64_t CaseTotal = std::accumulate(
- Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>());
- const uint64_t NumUnlikelyTargets = Weights.size() - 1;
-
- const uint64_t TotalBranchWeight =
- LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
-
- const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
- TotalBranchWeight);
- uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal);
-
- LLVM_DEBUG(llvm::dbgs()
- << "Unlikely Targets: " << NumUnlikelyTargets << ":\n");
- LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n");
- LLVM_DEBUG(llvm::dbgs()
- << "Scaled Threshold: " << ScaledThreshold << ":\n");
- LLVM_DEBUG(llvm::dbgs() << "------------------\n");
- if (ProfileCount < ScaledThreshold)
- emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal);
- }
- }
-}
-
-void checkFrontendInstrumentation(Instruction &I) {
- if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) {
- unsigned NOps = MD->getNumOperands();
-
- // Only emit misexpect diagnostics if at least 2 branch weights are present.
- // Less than 2 branch weights means that the profiling metadata is:
- // 1) incorrect/corrupted
- // 2) not branch weight metadata
- // 3) completely deterministic
- // In these cases we should not emit any diagnostic related to misexpect.
- if (NOps < 3)
- return;
-
- // Operand 0 is a string tag "branch_weights"
- if (MDString *Tag = cast<MDString>(MD->getOperand(0))) {
- if (Tag->getString().equals("branch_weights")) {
- SmallVector<uint32_t, 4> RealWeights(NOps - 1);
- for (unsigned i = 1; i < NOps; i++) {
- ConstantInt *Value =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(i));
- RealWeights[i - 1] = Value->getZExtValue();
- }
- verifyMisExpect(&I, RealWeights, I.getContext());
- }
- }
- }
-}
-
-} // namespace misexpect
-} // namespace llvm
-#undef DEBUG_TYPE
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 99b64a7462f6..3312a6f9459b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -53,6 +53,10 @@ static cl::opt<bool> VerifyPredicateInfo(
DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
"Controls which variables are renamed with predicateinfo");
+// Maximum number of conditions considered for renaming for each branch/assume.
+// This limits renaming of deep and/or chains.
+static const unsigned MaxCondsPerBranch = 8;
+
namespace {
// Given a predicate info that is a type of branching terminator, get the
// branching block.
@@ -367,6 +371,13 @@ void PredicateInfoBuilder::convertUsesToDFSOrdered(
}
}
+bool shouldRename(Value *V) {
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse();
+}
+
// Collect relevant operations from Comparison that we may want to insert copies
// for.
void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
@@ -374,15 +385,9 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
auto *Op1 = Comparison->getOperand(1);
if (Op0 == Op1)
return;
- CmpOperands.push_back(Comparison);
- // Only want real values, not constants. Additionally, operands with one use
- // are only being used in the comparison, which means they will not be useful
- // for us to consider for predicateinfo.
- //
- if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
- CmpOperands.push_back(Op0);
- if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
- CmpOperands.push_back(Op1);
+
+ CmpOperands.push_back(Op0);
+ CmpOperands.push_back(Op1);
}
// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
@@ -400,38 +405,32 @@ void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename,
void PredicateInfoBuilder::processAssume(
IntrinsicInst *II, BasicBlock *AssumeBB,
SmallVectorImpl<Value *> &OpsToRename) {
- // See if we have a comparison we support
- SmallVector<Value *, 8> CmpOperands;
- SmallVector<Value *, 2> ConditionsToProcess;
- CmpInst::Predicate Pred;
- Value *Operand = II->getOperand(0);
- if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))
- .match(II->getOperand(0))) {
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
- ConditionsToProcess.push_back(Operand);
- } else if (isa<CmpInst>(Operand)) {
-
- ConditionsToProcess.push_back(Operand);
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands) {
- auto *PA = new PredicateAssume(Op, II, Cmp);
- addInfoFor(OpsToRename, Op, PA);
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(II->getOperand(0));
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
+ continue;
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
+
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ auto *PA = new PredicateAssume(V, II, Cond);
+ addInfoFor(OpsToRename, V, PA);
}
- CmpOperands.clear();
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // Otherwise, it should be an AND.
- assert(BinOp->getOpcode() == Instruction::And &&
- "Should have been an AND");
- auto *PA = new PredicateAssume(BinOp, II, BinOp);
- addInfoFor(OpsToRename, BinOp, PA);
- } else {
- llvm_unreachable("Unknown type of condition");
}
}
}
@@ -443,68 +442,46 @@ void PredicateInfoBuilder::processBranch(
SmallVectorImpl<Value *> &OpsToRename) {
BasicBlock *FirstBB = BI->getSuccessor(0);
BasicBlock *SecondBB = BI->getSuccessor(1);
- SmallVector<BasicBlock *, 2> SuccsToProcess;
- SuccsToProcess.push_back(FirstBB);
- SuccsToProcess.push_back(SecondBB);
- SmallVector<Value *, 2> ConditionsToProcess;
-
- auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
- for (auto *Succ : SuccsToProcess) {
- // Don't try to insert on a self-edge. This is mainly because we will
- // eliminate during renaming anyway.
- if (Succ == BranchBB)
- continue;
- bool TakenEdge = (Succ == FirstBB);
- // For and, only insert on the true edge
- // For or, only insert on the false edge
- if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
+
+ for (BasicBlock *Succ : {FirstBB, SecondBB}) {
+ bool TakenEdge = Succ == FirstBB;
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+
+ SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ Worklist.push_back(BI->getCondition());
+ while (!Worklist.empty()) {
+ Value *Cond = Worklist.pop_back_val();
+ if (!Visited.insert(Cond).second)
continue;
- PredicateBase *PB =
- new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
- addInfoFor(OpsToRename, Op, PB);
- if (!Succ->getSinglePredecessor())
- EdgeUsesOnly.insert({BranchBB, Succ});
- }
- };
+ if (Visited.size() > MaxCondsPerBranch)
+ break;
+
+ Value *Op0, *Op1;
+ if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))
+ : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op0);
+ }
- // Match combinations of conditions.
- CmpInst::Predicate Pred;
- bool isAnd = false;
- bool isOr = false;
- SmallVector<Value *, 8> CmpOperands;
- if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))) ||
- match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value())))) {
- auto *BinOp = cast<BinaryOperator>(BI->getCondition());
- if (BinOp->getOpcode() == Instruction::And)
- isAnd = true;
- else if (BinOp->getOpcode() == Instruction::Or)
- isOr = true;
- ConditionsToProcess.push_back(BinOp->getOperand(0));
- ConditionsToProcess.push_back(BinOp->getOperand(1));
- ConditionsToProcess.push_back(BI->getCondition());
- } else if (isa<CmpInst>(BI->getCondition())) {
- ConditionsToProcess.push_back(BI->getCondition());
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands)
- InsertHelper(Op, isAnd, isOr, Cmp);
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // This must be an AND or an OR.
- assert((BinOp->getOpcode() == Instruction::And ||
- BinOp->getOpcode() == Instruction::Or) &&
- "Should have been an AND or an OR");
- // The actual value of the binop is not subject to the same restrictions
- // as the comparison. It's either true or false on the true/false branch.
- InsertHelper(BinOp, false, false, BinOp);
- } else {
- llvm_unreachable("Unknown type of condition");
+ SmallVector<Value *, 4> Values;
+ Values.push_back(Cond);
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond))
+ collectCmpOps(Cmp, Values);
+
+ for (Value *V : Values) {
+ if (shouldRename(V)) {
+ PredicateBase *PB =
+ new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, V, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ }
}
- CmpOperands.clear();
}
}
// Process a block terminating switch, and place relevant operations to be
@@ -822,6 +799,56 @@ PredicateInfo::~PredicateInfo() {
}
}
+Optional<PredicateConstraint> PredicateBase::getConstraint() const {
+ switch (Type) {
+ case PT_Assume:
+ case PT_Branch: {
+ bool TrueEdge = true;
+ if (auto *PBranch = dyn_cast<PredicateBranch>(this))
+ TrueEdge = PBranch->TrueEdge;
+
+ if (Condition == RenamedOp) {
+ return {{CmpInst::ICMP_EQ,
+ TrueEdge ? ConstantInt::getTrue(Condition->getType())
+ : ConstantInt::getFalse(Condition->getType())}};
+ }
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(Condition);
+ if (!Cmp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ CmpInst::Predicate Pred;
+ Value *OtherOp;
+ if (Cmp->getOperand(0) == RenamedOp) {
+ Pred = Cmp->getPredicate();
+ OtherOp = Cmp->getOperand(1);
+ } else if (Cmp->getOperand(1) == RenamedOp) {
+ Pred = Cmp->getSwappedPredicate();
+ OtherOp = Cmp->getOperand(0);
+ } else {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ // Invert predicate along false edge.
+ if (!TrueEdge)
+ Pred = CmpInst::getInversePredicate(Pred);
+
+ return {{Pred, OtherOp}};
+ }
+ case PT_Switch:
+ if (Condition != RenamedOp) {
+ // TODO: Make this an assertion once RenamedOp is fully accurate.
+ return None;
+ }
+
+ return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+ }
+ llvm_unreachable("Unknown predicate type");
+}
+
void PredicateInfo::verifyPredicateInfo() const {}
char PredicateInfoPrinterLegacyPass::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index c7e9c919ec47..86bbb6a889e6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -62,10 +62,6 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
- // FIXME: If the memory unit is of pointer or integer type, we can permit
- // assignments to subsections of the memory unit.
- unsigned AS = AI->getType()->getAddressSpace();
-
// Only allow direct and non-volatile loads and stores...
for (const User *U : AI->users()) {
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -81,19 +77,18 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (!II->isLifetimeStartOrEnd())
+ if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
- if (!onlyUsedByLifetimeMarkers(BCI))
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
if (!GEPI->hasAllZeroIndices())
return false;
- if (!onlyUsedByLifetimeMarkers(GEPI))
+ if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))
+ return false;
+ } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) {
+ if (!onlyUsedByLifetimeMarkers(ASCI))
return false;
} else {
return false;
@@ -106,6 +101,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
namespace {
struct AllocaInfo {
+ using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
+
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
@@ -113,7 +110,7 @@ struct AllocaInfo {
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
- TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
+ DbgUserVec DbgUsers;
void clear() {
DefiningBlocks.clear();
@@ -121,7 +118,7 @@ struct AllocaInfo {
OnlyStore = nullptr;
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- DbgDeclares.clear();
+ DbgUsers.clear();
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -132,8 +129,8 @@ struct AllocaInfo {
// As we scan the uses of the alloca instruction, keep track of stores,
// and decide whether all of the loads and stores to the alloca are within
// the same basic block.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
+ for (User *U : AI->users()) {
+ Instruction *User = cast<Instruction>(U);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Remember the basic blocks which define new values for the alloca
@@ -154,7 +151,7 @@ struct AllocaInfo {
}
}
- DbgDeclares = FindDbgAddrUses(AI);
+ findDbgUsers(DbgUsers, AI);
}
};
@@ -252,7 +249,7 @@ struct PromoteMem2Reg {
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
- SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
+ SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
/// The set of basic blocks the renamer has already visited.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -312,23 +309,37 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
AC->registerAssumption(CI);
}
-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+static void removeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
- for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
- Instruction *I = cast<Instruction>(*UI);
+ for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) {
+ Instruction *I = cast<Instruction>(UI->getUser());
+ Use &U = *UI;
++UI;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
continue;
+ // Drop the use of AI in droppable instructions.
+ if (I->isDroppable()) {
+ I->dropDroppableUse(U);
+ continue;
+ }
+
if (!I->getType()->isVoidTy()) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
- for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
- Instruction *Inst = cast<Instruction>(*UUI);
+ for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) {
+ Instruction *Inst = cast<Instruction>(UUI->getUser());
+ Use &UU = *UUI;
++UUI;
+
+ // Drop the use of I in droppable instructions.
+ if (Inst->isDroppable()) {
+ Inst->dropDroppableUse(UU);
+ continue;
+ }
Inst->eraseFromParent();
}
}
@@ -355,8 +366,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *UserInst = cast<Instruction>(*UI++);
+ for (User *U : make_early_inc_range(AI->users())) {
+ Instruction *UserInst = cast<Instruction>(U);
if (UserInst == OnlyStore)
continue;
LoadInst *LI = cast<LoadInst>(UserInst);
@@ -412,10 +423,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Record debuginfo for the store and remove the declaration's
// debuginfo.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
- DII->eraseFromParent();
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
+ DII->eraseFromParent();
+ } else if (DII->getExpression()->startsWithDeref()) {
+ DII->eraseFromParent();
+ }
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
@@ -465,8 +480,8 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ for (User *U : make_early_inc_range(AI->users())) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI)
continue;
@@ -510,9 +525,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
+ if (DII->isAddressOfVariable()) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ }
}
SI->eraseFromParent();
LBI.deleteValue(SI);
@@ -521,8 +538,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
AI->eraseFromParent();
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
- DII->eraseFromParent();
+ for (DbgVariableIntrinsic *DII : Info.DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
++NumLocalPromoted;
return true;
@@ -531,7 +549,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- AllocaDbgDeclares.resize(Allocas.size());
+ AllocaDbgUsers.resize(Allocas.size());
AllocaInfo Info;
LargeBlockInfo LBI;
@@ -544,7 +562,7 @@ void PromoteMem2Reg::run() {
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
- removeLifetimeIntrinsicUsers(AI);
+ removeIntrinsicUsers(AI);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
@@ -589,8 +607,8 @@ void PromoteMem2Reg::run() {
}
// Remember the dbg.declare intrinsic describing this alloca, if any.
- if (!Info.DbgDeclares.empty())
- AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
+ if (!Info.DbgUsers.empty())
+ AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -663,9 +681,11 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare instrinsics from the function.
- for (auto &Declares : AllocaDbgDeclares)
- for (auto *DII : Declares)
- DII->eraseFromParent();
+ for (auto &DbgUsers : AllocaDbgUsers) {
+ for (auto *DII : DbgUsers)
+ if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
+ DII->eraseFromParent();
+ }
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
@@ -720,7 +740,7 @@ void PromoteMem2Reg::run() {
continue;
// Get the preds for BB.
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ SmallVector<BasicBlock *, 16> Preds(predecessors(BB));
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
@@ -887,7 +907,7 @@ NextIteration:
// operands so far. Remember this count.
unsigned NewPHINumOperands = APN->getNumOperands();
- unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
+ unsigned NumEdges = llvm::count(successors(Pred), BB);
assert(NumEdges && "Must be at least one edge from Pred to BB!");
// Add entries for all the phis.
@@ -905,8 +925,9 @@ NextIteration:
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
- ConvertDebugDeclareToDebugValue(DII, APN, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, APN, DIB);
// Get the next phi node.
++PNI;
@@ -965,8 +986,9 @@ NextIteration:
// Record debuginfo for the store before removing it.
IncomingLocs[AllocaNo] = SI->getDebugLoc();
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
+ for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
+ if (DII->isAddressOfVariable())
+ ConvertDebugDeclareToDebugValue(DII, SI, DIB);
BB->getInstList().erase(SI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 57df2334c750..c210d1c46077 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -64,8 +64,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
}
Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const {
- AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB);
- return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr;
+ return getAvailableVals(AV).lookup(BB);
}
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
@@ -254,12 +253,10 @@ public:
// We can get our predecessor info by walking the pred_iterator list,
// but it is relatively slow. If we already have PHI nodes in this
// block, walk one of them to get the predecessor list instead.
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- Preds->append(SomePhi->block_begin(), SomePhi->block_end());
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Preds->push_back(*PI);
- }
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin()))
+ append_range(*Preds, SomePhi->blocks());
+ else
+ append_range(*Preds, predecessors(BB));
}
/// GetUndefVal - Get an undefined value of the same type as the value
@@ -283,12 +280,6 @@ public:
PHI->addIncoming(Val, Pred);
}
- /// InstrIsPHI - Check if an instruction is a PHI.
- ///
- static PHINode *InstrIsPHI(Instruction *I) {
- return dyn_cast<PHINode>(I);
- }
-
/// ValueIsPHI - Check if a value is a PHI.
static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
return dyn_cast<PHINode>(Val);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 71b48482f26a..6dbfb0b61fea 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -38,8 +39,7 @@ cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
using namespace PatternMatch;
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
-/// reusing an existing cast if a suitable one exists, moving an existing
-/// cast if a suitable one exists but isn't in the right place, or
+/// reusing an existing cast if a suitable one (= dominating IP) exists, or
/// creating a new one.
Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction::CastOps Op,
@@ -58,40 +58,38 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Instruction *Ret = nullptr;
// Check to see if there is already a cast!
- for (User *U : V->users())
- if (U->getType() == Ty)
- if (CastInst *CI = dyn_cast<CastInst>(U))
- if (CI->getOpcode() == Op) {
- // If the cast isn't where we want it, create a new cast at IP.
- // Likewise, do not reuse a cast at BIP because it must dominate
- // instructions that might be inserted before BIP.
- if (BasicBlock::iterator(CI) != IP || BIP == IP) {
- // Create a new cast, and leave the old cast in place in case
- // it is being used as an insert point.
- Ret = CastInst::Create(Op, V, Ty, "", &*IP);
- Ret->takeName(CI);
- CI->replaceAllUsesWith(Ret);
- break;
- }
- Ret = CI;
- break;
- }
+ for (User *U : V->users()) {
+ if (U->getType() != Ty)
+ continue;
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (!CI || CI->getOpcode() != Op)
+ continue;
+
+ // Found a suitable cast that is at IP or comes before IP. Use it. Note that
+ // the cast must also properly dominate the Builder's insertion point.
+ if (IP->getParent() == CI->getParent() && &*BIP != CI &&
+ (&*IP == CI || CI->comesBefore(&*IP))) {
+ Ret = CI;
+ break;
+ }
+ }
// Create a new cast.
- if (!Ret)
+ if (!Ret) {
Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
+ rememberInstruction(Ret);
+ }
// We assert at the end of the function since IP might point to an
// instruction with different dominance properties than a cast
// (an invoke for example) and not dominate BIP (but the cast does).
assert(SE.DT.dominates(Ret, &*BIP));
- rememberInstruction(Ret);
return Ret;
}
-static BasicBlock::iterator findInsertPointAfter(Instruction *I,
- BasicBlock *MustDominate) {
+BasicBlock::iterator
+SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) {
BasicBlock::iterator IP = ++I->getIterator();
if (auto *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
@@ -102,11 +100,17 @@ static BasicBlock::iterator findInsertPointAfter(Instruction *I,
if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
++IP;
} else if (isa<CatchSwitchInst>(IP)) {
- IP = MustDominate->getFirstInsertionPt();
+ IP = MustDominate->getParent()->getFirstInsertionPt();
} else {
assert(!IP->isEHPad() && "unexpected eh pad!");
}
+ // Adjust insert point to be after instructions inserted by the expander, so
+ // we can re-use already inserted instructions. Avoid skipping past the
+ // original \p MustDominate, in case it is an inserted instruction.
+ while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
+ ++IP;
+
return IP;
}
@@ -122,6 +126,22 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
"InsertNoopCastOfTo cannot change sizes!");
+ // inttoptr only works for integral pointers. For non-integral pointers, we
+ // can create a GEP on i8* null with the integral value as index. Note that
+ // it is safe to use GEP of null instead of inttoptr here, because only
+ // expressions already based on a GEP of null should be converted to pointers
+ // during expansion.
+ if (Op == Instruction::IntToPtr) {
+ auto *PtrTy = cast<PointerType>(Ty);
+ if (DL.isNonIntegralPointerType(PtrTy)) {
+ auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
+ assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 &&
+ "alloc size of i8 must by 1 byte for the GEP to be correct");
+ auto *GEP = Builder.CreateGEP(
+ Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
+ return Builder.CreateBitCast(GEP, Ty);
+ }
+ }
// Short-circuit unnecessary bitcasts.
if (Op == Instruction::BitCast) {
if (V->getType() == Ty)
@@ -166,7 +186,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
// Cast the instruction immediately after the instruction.
Instruction *I = cast<Instruction>(V);
- BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
+ BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint());
return ReuseOrCreateCast(I, Ty, Op, IP);
}
@@ -238,7 +258,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
BO->setHasNoUnsignedWrap();
if (Flags & SCEV::FlagNSW)
BO->setHasNoSignedWrap();
- rememberInstruction(BO);
return BO;
}
@@ -290,7 +309,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
if (!C->getAPInt().srem(FC->getAPInt())) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ SmallVector<const SCEV *, 4> NewMulOps(M->operands());
NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
S = SE.getMulExpr(NewMulOps);
return true;
@@ -462,9 +481,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// we didn't find any operands that could be factored, tentatively
// assume that element zero was selected (since the zero offset
// would obviously be folded away).
- Value *Scaled = ScaledOps.empty() ?
- Constant::getNullValue(Ty) :
- expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+ Value *Scaled =
+ ScaledOps.empty()
+ ? Constant::getNullValue(Ty)
+ : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
GepIndices.push_back(Scaled);
// Collect struct field index operands.
@@ -523,7 +543,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
// Expand the operands for a plain byte offset.
- Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false);
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
@@ -564,10 +584,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Emit a GEP.
- Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
- rememberInstruction(GEP);
-
- return GEP;
+ return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
}
{
@@ -598,7 +615,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Casted = InsertNoopCastOfTo(Casted, PTy);
Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
Ops.push_back(SE.getUnknown(GEP));
- rememberInstruction(GEP);
}
return expand(SE.getAddExpr(Ops));
@@ -748,14 +764,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
- Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+ Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false);
Sum = InsertNoopCastOfTo(Sum, Ty);
Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
- Value *W = expandCodeFor(Op, Ty);
+ Value *W = expandCodeForImpl(Op, Ty, false);
Sum = InsertNoopCastOfTo(Sum, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Sum)) std::swap(Sum, W);
@@ -807,7 +823,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
// that are needed into the result.
- Value *P = expandCodeFor(I->second, Ty);
+ Value *P = expandCodeForImpl(I->second, Ty, false);
Value *Result = nullptr;
if (Exponent & 1)
Result = P;
@@ -866,7 +882,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *LHS = expandCodeFor(S->getLHS(), Ty);
+ Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false);
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
@@ -875,7 +891,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
- Value *RHS = expandCodeFor(S->getRHS(), Ty);
+ Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false);
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
@@ -895,7 +911,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
Base = A->getOperand(A->getNumOperands()-1);
- SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+ SmallVector<const SCEV *, 8> NewAddOps(A->operands());
NewAddOps.back() = Rest;
Rest = SE.getAddExpr(NewAddOps);
ExposePointerBase(Base, Rest, SE);
@@ -1073,15 +1089,12 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
GEPPtrTy->getAddressSpace());
IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
- if (IncV->getType() != PN->getType()) {
+ if (IncV->getType() != PN->getType())
IncV = Builder.CreateBitCast(IncV, PN->getType());
- rememberInstruction(IncV);
- }
} else {
IncV = useSubtract ?
Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
- rememberInstruction(IncV);
}
return IncV;
}
@@ -1193,6 +1206,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (!SE.isSCEVable(PN.getType()))
continue;
+ // We should not look for a incomplete PHI. Getting SCEV for a incomplete
+ // PHI has no meaning at all.
+ if (!PN.isComplete()) {
+ DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
+ continue;
+ }
+
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
if (!PhiSCEV)
continue;
@@ -1253,6 +1274,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
InsertedValues.insert(AddRecPhiMatch);
// Remember the increment.
rememberInstruction(IncV);
+ // Those values were not actually inserted but re-used.
+ ReusedValues.insert(AddRecPhiMatch);
+ ReusedValues.insert(IncV);
return AddRecPhiMatch;
}
}
@@ -1273,8 +1297,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand code for the start value into the loop preheader.
assert(L->getLoopPreheader() &&
"Can't expand add recurrences without a loop preheader!");
- Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
- L->getLoopPreheader()->getTerminator());
+ Value *StartV =
+ expandCodeForImpl(Normalized->getStart(), ExpandTy,
+ L->getLoopPreheader()->getTerminator(), false);
// StartV must have been be inserted into L's preheader to dominate the new
// phi.
@@ -1292,7 +1317,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
// Expand the step somewhere that dominates the loop header.
- Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+ Value *StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false);
// The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
// we actually do emit an addition. It does not apply if we emit a
@@ -1306,7 +1332,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
Twine(IVName) + ".iv");
- rememberInstruction(PN);
// Create the step instructions and populate the PHI.
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
@@ -1415,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
Result = PN->getIncomingValueForBlock(LatchBlock);
+ // We might be introducing a new use of the post-inc IV that is not poison
+ // safe, in which case we should drop poison generating flags. Only keep
+ // those flags for which SCEV has proven that they always hold.
+ if (isa<OverflowingBinaryOperator>(Result)) {
+ auto *I = cast<Instruction>(Result);
+ if (!S->hasNoUnsignedWrap())
+ I->setHasNoUnsignedWrap(false);
+ if (!S->hasNoSignedWrap())
+ I->setHasNoSignedWrap(false);
+ }
+
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
@@ -1438,7 +1474,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
{
// Expand the step somewhere that dominates the loop header.
SCEVInsertPointGuard Guard(Builder, this);
- StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+ StepV = expandCodeForImpl(
+ Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false);
}
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
@@ -1452,16 +1489,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (ResTy != SE.getEffectiveSCEVType(ResTy))
Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
// Truncate the result.
- if (TruncTy != Result->getType()) {
+ if (TruncTy != Result->getType())
Result = Builder.CreateTrunc(Result, TruncTy);
- rememberInstruction(Result);
- }
+
// Invert the result.
- if (InvertStep) {
- Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
- Result);
- rememberInstruction(Result);
- }
+ if (InvertStep)
+ Result = Builder.CreateSub(
+ expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result);
}
// Re-apply any non-loop-dominating scale.
@@ -1469,24 +1503,22 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
- expandCodeFor(PostLoopScale, IntTy));
- rememberInstruction(Result);
+ expandCodeForImpl(PostLoopScale, IntTy, false));
}
// Re-apply any non-loop-dominating offset.
if (PostLoopOffset) {
if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
if (Result->getType()->isIntegerTy()) {
- Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
+ Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false);
Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
} else {
Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
}
} else {
Result = InsertNoopCastOfTo(Result, IntTy);
- Result = Builder.CreateAdd(Result,
- expandCodeFor(PostLoopOffset, IntTy));
- rememberInstruction(Result);
+ Result = Builder.CreateAdd(
+ Result, expandCodeForImpl(PostLoopOffset, IntTy, false));
}
}
@@ -1527,15 +1559,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
- findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
- V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
- &*NewInsertPt);
+ findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
+ V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
+ &*NewInsertPt, false);
return V;
}
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
- SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+ SmallVector<const SCEV *, 4> NewOps(S->operands());
NewOps[0] = SE.getConstant(Ty, 0);
const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
S->getNoWrapFlags(SCEV::FlagNW));
@@ -1642,31 +1674,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
return expand(T);
}
+Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
+ Value *V =
+ expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false);
+ return Builder.CreatePtrToInt(V, S->getType());
+}
+
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateTrunc(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateTrunc(V, Ty);
}
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateZExt(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateZExt(V, Ty);
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
Type *Ty = SE.getEffectiveSCEVType(S->getType());
- Value *V = expandCodeFor(S->getOperand(),
- SE.getEffectiveSCEVType(S->getOperand()->getType()));
- Value *I = Builder.CreateSExt(V, Ty);
- rememberInstruction(I);
- return I;
+ Value *V = expandCodeForImpl(
+ S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()),
+ false);
+ return Builder.CreateSExt(V, Ty);
}
Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
@@ -1680,11 +1715,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1705,11 +1738,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1730,11 +1761,9 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1755,11 +1784,9 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
- Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
- rememberInstruction(ICmp);
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
- rememberInstruction(Sel);
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1769,15 +1796,45 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
return LHS;
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
- Instruction *IP) {
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
+ Instruction *IP, bool Root) {
setInsertPoint(IP);
- return expandCodeFor(SH, Ty);
+ Value *V = expandCodeForImpl(SH, Ty, Root);
+ return V;
}
-Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
+Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
// Expand the code for this SCEV.
Value *V = expand(SH);
+
+ if (PreserveLCSSA) {
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ // Create a temporary instruction to at the current insertion point, so we
+ // can hand it off to the helper to create LCSSA PHIs if required for the
+ // new use.
+ // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
+ // would accept a insertion point and return an LCSSA phi for that
+ // insertion point, so there is no need to insert & remove the temporary
+ // instruction.
+ Instruction *Tmp;
+ if (Inst->getType()->isIntegerTy())
+ Tmp =
+ cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user"));
+ else {
+ assert(Inst->getType()->isPointerTy());
+ Tmp = cast<Instruction>(
+ Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user"));
+ }
+ V = fixupLCSSAFormFor(Tmp, 0);
+
+ // Clean up temporary instruction.
+ InsertedValues.erase(Tmp);
+ InsertedPostIncValues.erase(Tmp);
+ Tmp->eraseFromParent();
+ }
+ }
+
+ InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V;
if (Ty) {
assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
"non-trivial casts should be done with the SCEVs directly!");
@@ -1861,20 +1918,17 @@ Value *SCEVExpander::expand(const SCEV *S) {
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = &*L->getHeader()->getFirstInsertionPt();
+
while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
(isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt)))
+ isa<DbgInfoIntrinsic>(InsertPt))) {
InsertPt = &*std::next(InsertPt->getIterator());
+ }
break;
}
}
}
- // IndVarSimplify sometimes sets the insertion point at the block start, even
- // when there are PHIs at that point. We must correct for this.
- if (isa<PHINode>(*InsertPt))
- InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
-
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
if (I != InsertedExpressions.end())
@@ -1922,32 +1976,25 @@ Value *SCEVExpander::expand(const SCEV *S) {
}
void SCEVExpander::rememberInstruction(Value *I) {
- if (!PostIncLoops.empty())
- InsertedPostIncValues.insert(I);
- else
- InsertedValues.insert(I);
-}
-
-/// getOrInsertCanonicalInductionVariable - This method returns the
-/// canonical induction variable of the specified type for the specified
-/// loop (inserting one if there is none). A canonical induction variable
-/// starts at zero and steps by one on each iteration.
-PHINode *
-SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
- Type *Ty) {
- assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
-
- // Build a SCEV for {0,+,1}<L>.
- // Conservatively use FlagAnyWrap for now.
- const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
- SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+ auto DoInsert = [this](Value *V) {
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(V);
+ else
+ InsertedValues.insert(V);
+ };
+ DoInsert(I);
- // Emit code for it.
- SCEVInsertPointGuard Guard(Builder, this);
- PHINode *V =
- cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
+ if (!PreserveLCSSA)
+ return;
- return V;
+ if (auto *Inst = dyn_cast<Instruction>(I)) {
+ // A new instruction has been added, which might introduce new uses outside
+ // a defining loop. Fix LCSSA from for each operand of the new instruction,
+ // if required.
+ for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd;
+ OpIdx++)
+ fixupLCSSAFormFor(Inst, OpIdx);
+ }
}
/// replaceCongruentIVs - Check for congruent phis in this loop header and
@@ -1970,8 +2017,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
- return RHS->getType()->getPrimitiveSizeInBits() <
- LHS->getType()->getPrimitiveSizeInBits();
+ return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() <
+ LHS->getType()->getPrimitiveSizeInBits().getFixedSize();
});
unsigned NumElim = 0;
@@ -2079,6 +2126,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
<< *Phi << '\n');
+ DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: "
+ << *OrigPhiRef << '\n');
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
@@ -2092,15 +2141,6 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
- const Instruction *At, Loop *L) {
- Optional<ScalarEvolution::ValueOffsetPair> VO =
- getRelatedExistingExpansion(S, At, L);
- if (VO && VO.getValue().second == nullptr)
- return VO.getValue().first;
- return nullptr;
-}
-
Optional<ScalarEvolution::ValueOffsetPair>
SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
Loop *L) {
@@ -2139,15 +2179,156 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
return None;
}
+template<typename T> static int costAndCollectOperands(
+ const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
+ TargetTransformInfo::TargetCostKind CostKind,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
+
+ const T *S = cast<T>(WorkItem.S);
+ int Cost = 0;
+ // Object to help map SCEV operands to expanded IR instructions.
+ struct OperationIndices {
+ OperationIndices(unsigned Opc, size_t min, size_t max) :
+ Opcode(Opc), MinIdx(min), MaxIdx(max) { }
+ unsigned Opcode;
+ size_t MinIdx;
+ size_t MaxIdx;
+ };
+
+ // Collect the operations of all the instructions that will be needed to
+ // expand the SCEVExpr. This is so that when we come to cost the operands,
+ // we know what the generated user(s) will be.
+ SmallVector<OperationIndices, 2> Operations;
+
+ auto CastCost = [&](unsigned Opcode) {
+ Operations.emplace_back(Opcode, 0, 0);
+ return TTI.getCastInstrCost(Opcode, S->getType(),
+ S->getOperand(0)->getType(),
+ TTI::CastContextHint::None, CostKind);
+ };
+
+ auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
+ unsigned MinIdx = 0, unsigned MaxIdx = 1) {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ return NumRequired *
+ TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
+ };
+
+ auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired,
+ unsigned MinIdx, unsigned MaxIdx) {
+ Operations.emplace_back(Opcode, MinIdx, MaxIdx);
+ Type *OpType = S->getOperand(0)->getType();
+ return NumRequired * TTI.getCmpSelInstrCost(
+ Opcode, OpType, CmpInst::makeCmpResultType(OpType),
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ };
+
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ case scConstant:
+ return 0;
+ case scPtrToInt:
+ Cost = CastCost(Instruction::PtrToInt);
+ break;
+ case scTruncate:
+ Cost = CastCost(Instruction::Trunc);
+ break;
+ case scZeroExtend:
+ Cost = CastCost(Instruction::ZExt);
+ break;
+ case scSignExtend:
+ Cost = CastCost(Instruction::SExt);
+ break;
+ case scUDivExpr: {
+ unsigned Opcode = Instruction::UDiv;
+ if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
+ if (SC->getAPInt().isPowerOf2())
+ Opcode = Instruction::LShr;
+ Cost = ArithCost(Opcode, 1);
+ break;
+ }
+ case scAddExpr:
+ Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
+ break;
+ case scMulExpr:
+ // TODO: this is a very pessimistic cost modelling for Mul,
+ // because of Bin Pow algorithm actually used by the expander,
+ // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+ Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
+ break;
+ case scSMaxExpr:
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr: {
+ Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
+ Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
+ break;
+ }
+ case scAddRecExpr: {
+ // In this polynominal, we may have some zero operands, and we shouldn't
+ // really charge for those. So how many non-zero coeffients are there?
+ int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
+ return !Op->isZero();
+ });
+
+ assert(NumTerms >= 1 && "Polynominal should have at least one term.");
+ assert(!(*std::prev(S->operands().end()))->isZero() &&
+ "Last operand should not be zero");
+
+ // Ignoring constant term (operand 0), how many of the coeffients are u> 1?
+ int NumNonZeroDegreeNonOneTerms =
+ llvm::count_if(S->operands(), [](const SCEV *Op) {
+ auto *SConst = dyn_cast<SCEVConstant>(Op);
+ return !SConst || SConst->getAPInt().ugt(1);
+ });
+
+ // Much like with normal add expr, the polynominal will require
+ // one less addition than the number of it's terms.
+ int AddCost = ArithCost(Instruction::Add, NumTerms - 1,
+ /*MinIdx*/1, /*MaxIdx*/1);
+ // Here, *each* one of those will require a multiplication.
+ int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
+ Cost = AddCost + MulCost;
+
+ // What is the degree of this polynominal?
+ int PolyDegree = S->getNumOperands() - 1;
+ assert(PolyDegree >= 1 && "Should be at least affine.");
+
+ // The final term will be:
+ // Op_{PolyDegree} * x ^ {PolyDegree}
+ // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
+ // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
+ // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
+ // FIXME: this is conservatively correct, but might be overly pessimistic.
+ Cost += MulCost * (PolyDegree - 1);
+ break;
+ }
+ }
+
+ for (auto &CostOp : Operations) {
+ for (auto SCEVOp : enumerate(S->operands())) {
+ // Clamp the index to account for multiple IR operations being chained.
+ size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
+ size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
+ Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
+ }
+ }
+ return Cost;
+}
+
bool SCEVExpander::isHighCostExpansionHelper(
- const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining,
- const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed,
- SmallVectorImpl<const SCEV *> &Worklist) {
+ const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
+ int &BudgetRemaining, const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const SCEV *> &Processed,
+ SmallVectorImpl<SCEVOperand> &Worklist) {
if (BudgetRemaining < 0)
return true; // Already run out of budget, give up.
+ const SCEV *S = WorkItem.S;
// Was the cost of expansion of this expression already accounted for?
- if (!Processed.insert(S).second)
+ if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
return false; // We have already accounted for this expression.
// If we can find an existing value for this scev available at the point "At"
@@ -2155,52 +2336,37 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (getRelatedExistingExpansion(S, &At, L))
return false; // Consider the expression to be free.
- switch (S->getSCEVType()) {
- case scUnknown:
- case scConstant:
- return false; // Assume to be zero-cost.
- }
-
TargetTransformInfo::TargetCostKind CostKind =
- TargetTransformInfo::TCK_RecipThroughput;
+ L->getHeader()->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_RecipThroughput;
- if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) {
- unsigned Opcode;
- switch (S->getSCEVType()) {
- case scTruncate:
- Opcode = Instruction::Trunc;
- break;
- case scZeroExtend:
- Opcode = Instruction::ZExt;
- break;
- case scSignExtend:
- Opcode = Instruction::SExt;
- break;
- default:
- llvm_unreachable("There are no other cast types.");
- }
- const SCEV *Op = CastExpr->getOperand();
- BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(),
- /*Src=*/Op->getType(), CostKind);
- Worklist.emplace_back(Op);
+ switch (S->getSCEVType()) {
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ case scUnknown:
+ // Assume to be zero-cost.
+ return false;
+ case scConstant: {
+ // Only evalulate the costs of constants when optimizing for size.
+ if (CostKind != TargetTransformInfo::TCK_CodeSize)
+ return 0;
+ const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
+ Type *Ty = S->getType();
+ BudgetRemaining -= TTI.getIntImmCostInst(
+ WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
+ return BudgetRemaining < 0;
+ }
+ case scTruncate:
+ case scPtrToInt:
+ case scZeroExtend:
+ case scSignExtend: {
+ int Cost =
+ costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
+ BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
-
- if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
- // If the divisor is a power of two count this as a logical right-shift.
- if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) {
- if (SC->getAPInt().isPowerOf2()) {
- BudgetRemaining -=
- TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(),
- CostKind);
- // Note that we don't count the cost of RHS, because it is a constant,
- // and we consider those to be free. But if that changes, we would need
- // to log2() it first before calling isHighCostExpansionHelper().
- Worklist.emplace_back(UDivExpr->getLHS());
- return false; // Will answer upon next entry into this function.
- }
- }
-
+ case scUDivExpr: {
// UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
// HowManyLessThans produced to compute a precise expression, rather than a
// UDiv from the user's code. If we can't find a UDiv in the code with some
@@ -2213,117 +2379,36 @@ bool SCEVExpander::isHighCostExpansionHelper(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
return false; // Consider it to be free.
+ int Cost =
+ costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
// Need to count the cost of this UDiv.
- BudgetRemaining -=
- TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(),
- CostKind);
- Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()});
+ BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
-
- if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) {
- Type *OpType = NAry->getType();
-
- assert(NAry->getNumOperands() >= 2 &&
- "Polynomial should be at least linear");
-
- int AddCost =
- TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
- int MulCost =
- TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
-
- // In this polynominal, we may have some zero operands, and we shouldn't
- // really charge for those. So how many non-zero coeffients are there?
- int NumTerms = llvm::count_if(NAry->operands(),
- [](const SCEV *S) { return !S->isZero(); });
- assert(NumTerms >= 1 && "Polynominal should have at least one term.");
- assert(!(*std::prev(NAry->operands().end()))->isZero() &&
- "Last operand should not be zero");
-
- // Much like with normal add expr, the polynominal will require
- // one less addition than the number of it's terms.
- BudgetRemaining -= AddCost * (NumTerms - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // Ignoring constant term (operand 0), how many of the coeffients are u> 1?
- int NumNonZeroDegreeNonOneTerms =
- llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()),
- [](const SCEV *S) {
- auto *SConst = dyn_cast<SCEVConstant>(S);
- return !SConst || SConst->getAPInt().ugt(1);
- });
- // Here, *each* one of those will require a multiplication.
- BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms;
- if (BudgetRemaining < 0)
- return true;
-
- // What is the degree of this polynominal?
- int PolyDegree = NAry->getNumOperands() - 1;
- assert(PolyDegree >= 1 && "Should be at least affine.");
-
- // The final term will be:
- // Op_{PolyDegree} * x ^ {PolyDegree}
- // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
- // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
- // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
- // FIXME: this is conservatively correct, but might be overly pessimistic.
- BudgetRemaining -= MulCost * (PolyDegree - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // And finally, the operands themselves should fit within the budget.
- Worklist.insert(Worklist.end(), NAry->operands().begin(),
- NAry->operands().end());
- return false; // So far so good, though ops may be too costly?
- }
-
- if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
- Type *OpType = NAry->getType();
-
- int PairCost;
- switch (S->getSCEVType()) {
- case scAddExpr:
- PairCost =
- TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind);
- break;
- case scMulExpr:
- // TODO: this is a very pessimistic cost modelling for Mul,
- // because of Bin Pow algorithm actually used by the expander,
- // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
- PairCost =
- TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind);
- break;
- case scSMaxExpr:
- case scUMaxExpr:
- case scSMinExpr:
- case scUMinExpr:
- PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType,
- CmpInst::makeCmpResultType(OpType),
- CostKind) +
- TTI.getCmpSelInstrCost(Instruction::Select, OpType,
- CmpInst::makeCmpResultType(OpType),
- CostKind);
- break;
- default:
- llvm_unreachable("There are no other variants here.");
- }
-
- assert(NAry->getNumOperands() > 1 &&
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
+ assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
"Nary expr should have more than 1 operand.");
// The simple nary expr will require one less op (or pair of ops)
// than the number of it's terms.
- BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1);
- if (BudgetRemaining < 0)
- return true;
-
- // And finally, the operands themselves should fit within the budget.
- Worklist.insert(Worklist.end(), NAry->operands().begin(),
- NAry->operands().end());
- return false; // So far so good, though ops may be too costly?
+ int Cost =
+ costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
+ BudgetRemaining -= Cost;
+ return BudgetRemaining < 0;
}
-
- llvm_unreachable("No other scev expressions possible.");
+ case scAddRecExpr: {
+ assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
+ "Polynomial should be at least linear");
+ BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>(
+ WorkItem, TTI, CostKind, Worklist);
+ return BudgetRemaining < 0;
+ }
+ }
+ llvm_unreachable("Unknown SCEV kind!");
}
Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
@@ -2344,8 +2429,10 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
Instruction *IP) {
- Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
- Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+ Value *Expr0 =
+ expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false);
+ Value *Expr1 =
+ expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false);
Builder.SetInsertPoint(IP);
auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
@@ -2361,7 +2448,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *ExitCount =
SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
- assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count");
+ assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
@@ -2377,15 +2464,16 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits);
Builder.SetInsertPoint(Loc);
- Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
+ Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false);
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
- Value *StepValue = expandCodeFor(Step, Ty, Loc);
- Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
+ Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
+ Value *NegStepValue =
+ expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
+ Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2445,8 +2533,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
}
- EndCheck = Builder.CreateOr(EndCheck, OfMul);
- return EndCheck;
+ return Builder.CreateOr(EndCheck, OfMul);
}
Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,
@@ -2489,6 +2576,34 @@ Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
return Check;
}
+Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) {
+ assert(PreserveLCSSA);
+ SmallVector<Instruction *, 1> ToUpdate;
+
+ auto *OpV = User->getOperand(OpIdx);
+ auto *OpI = dyn_cast<Instruction>(OpV);
+ if (!OpI)
+ return OpV;
+
+ Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent());
+ Loop *UseLoop = SE.LI.getLoopFor(User->getParent());
+ if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
+ return OpV;
+
+ ToUpdate.push_back(OpI);
+ SmallVector<PHINode *, 16> PHIsToRemove;
+ formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove);
+ for (PHINode *PN : PHIsToRemove) {
+ if (!PN->use_empty())
+ continue;
+ InsertedValues.erase(PN);
+ InsertedPostIncValues.erase(PN);
+ PN->eraseFromParent();
+ }
+
+ return User->getOperand(OpIdx);
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
@@ -2566,4 +2681,40 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
}
return false;
}
+
+SCEVExpanderCleaner::~SCEVExpanderCleaner() {
+ // Result is used, nothing to remove.
+ if (ResultUsed)
+ return;
+
+ auto InsertedInstructions = Expander.getAllInsertedInstructions();
+#ifndef NDEBUG
+ SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
+ InsertedInstructions.end());
+ (void)InsertedSet;
+#endif
+ // Remove sets with value handles.
+ Expander.clear();
+
+ // Sort so that earlier instructions do not dominate later instructions.
+ stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) {
+ return DT.dominates(B, A);
+ });
+ // Remove all inserted instructions.
+ for (Instruction *I : InsertedInstructions) {
+
+#ifndef NDEBUG
+ assert(all_of(I->users(),
+ [&InsertedSet](Value *U) {
+ return InsertedSet.contains(cast<Instruction>(U));
+ }) &&
+ "removed instruction should only be used by instructions inserted "
+ "during expansion");
+#endif
+ assert(!I->getType()->isVoidTy() &&
+ "inserted instruction should have non-void types");
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b450d71c996c..7cfe17618cde 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -13,8 +13,11 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -58,6 +61,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -67,6 +71,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -85,6 +90,12 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
+cl::opt<bool> llvm::RequireAndPreserveDomTree(
+ "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
+ "into preserving DomTree,"));
+
// Chosen as 2 so as to be cheap, but still to have enough power to fold
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
// To catch this, we need to fold a compare and a select, hence '2' being the
@@ -105,6 +116,10 @@ static cl::opt<bool> DupRet(
cl::desc("Duplicate return instructions into unconditional branches"));
static cl::opt<bool>
+ HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
+ cl::desc("Hoist common instructions up to the parent block"));
+
+static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
@@ -138,6 +153,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
cl::desc("Max size of a block which is still considered "
"small enough to thread through"));
+// Two is chosen to allow one negation and a logical combine.
+static cl::opt<unsigned>
+ BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
+ cl::init(2),
+ cl::desc("Maximum cost of combining conditions when "
+ "folding branches"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -147,9 +169,22 @@ STATISTIC(
NumLookupTablesHoles,
"Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
-STATISTIC(NumSinkCommons,
+STATISTIC(NumFoldValueComparisonIntoPredecessors,
+ "Number of value comparisons folded into predecessor basic blocks");
+STATISTIC(NumFoldBranchToCommonDest,
+ "Number of branches folded into predecessor basic block");
+STATISTIC(
+ NumHoistCommonCode,
+ "Number of common instruction 'blocks' hoisted up to the begin block");
+STATISTIC(NumHoistCommonInstrs,
+ "Number of common instructions hoisted up to the begin block");
+STATISTIC(NumSinkCommonCode,
+ "Number of common instruction 'blocks' sunk down to the end block");
+STATISTIC(NumSinkCommonInstrs,
"Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumInvokes,
+ "Number of invokes with empty resume blocks simplified into calls");
namespace {
@@ -182,8 +217,9 @@ struct ValueEqualityComparisonCase {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
+ DomTreeUpdater *DTU;
const DataLayout &DL;
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
+ ArrayRef<WeakVH> LoopHeaders;
const SimplifyCFGOptions &Options;
bool Resimplify;
@@ -193,6 +229,9 @@ class SimplifyCFGOpt {
bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
BasicBlock *Pred,
IRBuilder<> &Builder);
+ bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
+ Instruction *PTI,
+ IRBuilder<> &Builder);
bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
@@ -225,13 +264,18 @@ class SimplifyCFGOpt {
bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
+ const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
const SimplifyCFGOptions &Opts)
- : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
+ : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
+ assert((!DTU || !DTU->hasPostDomTree()) &&
+ "SimplifyCFG is not yet capable of maintaining validity of a "
+ "PostDomTree, so don't ask for it.");
+ }
- bool run(BasicBlock *BB);
bool simplifyOnce(BasicBlock *BB);
+ bool simplifyOnceImpl(BasicBlock *BB);
+ bool run(BasicBlock *BB);
// Helper to set Resimplify and return change indication.
bool requestResimplify() {
@@ -273,46 +317,6 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
return !Fail;
}
-/// Return true if it is safe and profitable to merge these two terminator
-/// instructions together, where SI1 is an unconditional branch. PhiNodes will
-/// store all PHI nodes in common successors.
-static bool
-isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
- Instruction *Cond,
- SmallVectorImpl<PHINode *> &PhiNodes) {
- if (SI1 == SI2)
- return false; // Can't merge with self!
- assert(SI1->isUnconditional() && SI2->isConditional());
-
- // We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
- // 1> We have a constant incoming value for the conditional branch;
- // 2> We have "Cond" as the incoming value for the unconditional branch;
- // 3> SI2->getCondition() and Cond have same operands.
- CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
- if (!Ci2)
- return false;
- if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
- Cond->getOperand(1) == Ci2->getOperand(1)) &&
- !(Cond->getOperand(0) == Ci2->getOperand(1) &&
- Cond->getOperand(1) == Ci2->getOperand(0)))
- return false;
-
- BasicBlock *SI1BB = SI1->getParent();
- BasicBlock *SI2BB = SI2->getParent();
- SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
- !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
- return false;
- PhiNodes.push_back(PN);
- }
- return true;
-}
-
/// Update PHI nodes in Succ to indicate that there will now be entries in it
/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
/// will be the same as those coming in from ExistPred, an existing predecessor
@@ -651,7 +655,7 @@ private:
/// vector.
/// One "Extra" case is allowed to differ from the other.
void gather(Value *V) {
- bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or);
+ bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
// Keep a stack (SmallVector for efficiency) for depth-first traversal
SmallVector<Value *, 8> DFT;
@@ -666,11 +670,14 @@ private:
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If it is a || (or && depending on isEQ), process the operands.
- if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
- if (Visited.insert(I->getOperand(1)).second)
- DFT.push_back(I->getOperand(1));
- if (Visited.insert(I->getOperand(0)).second)
- DFT.push_back(I->getOperand(0));
+ Value *Op0, *Op1;
+ if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
+ : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
+ if (Visited.insert(Op1).second)
+ DFT.push_back(Op1);
+ if (Visited.insert(Op0).second)
+ DFT.push_back(Op0);
+
continue;
}
@@ -765,7 +772,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
static void
EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+ llvm::erase_value(Cases, BB);
}
/// Return true if there are any keys in C1 that exist in C2 as well.
@@ -875,13 +882,18 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
(void)NI;
// Remove PHI node entries for the dead edge.
- ThisCases[0].Dest->removePredecessor(TI->getParent());
+ ThisCases[0].Dest->removePredecessor(PredDef);
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI << "Leaving: " << *NI
<< "\n");
EraseTerminatorAndDCECond(TI);
+
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
+
return true;
}
@@ -894,13 +906,25 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
+ auto *Successor = i->getCaseSuccessor();
+ ++NumPerSuccessorCases[Successor];
if (DeadCases.count(i->getCaseValue())) {
- i->getCaseSuccessor()->removePredecessor(TI->getParent());
+ Successor->removePredecessor(PredDef);
SI.removeCase(i);
+ --NumPerSuccessorCases[Successor];
}
}
+
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, PredDef, I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
@@ -930,12 +954,16 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
if (!TheRealDest)
TheRealDest = ThisDef;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccs;
+
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
for (BasicBlock *Succ : successors(TIBB))
- if (Succ != CheckEdge)
+ if (Succ != CheckEdge) {
+ if (Succ != TheRealDest)
+ RemovedSuccs.insert(Succ);
Succ->removePredecessor(TIBB);
- else
+ } else
CheckEdge = nullptr;
// Insert the new branch.
@@ -947,6 +975,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
<< "\n");
EraseTerminatorAndDCECond(TI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -1014,219 +1049,300 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
}
}
-/// The specified terminator is a value equality comparison instruction
-/// (either a switch or a branch on "X == c").
-/// See if any of the predecessors of the terminator block are value comparisons
-/// on the same value. If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
- IRBuilder<> &Builder) {
- BasicBlock *BB = TI->getParent();
- Value *CV = isValueEqualityComparison(TI); // CondVal
- assert(CV && "Not a comparison?");
- bool Changed = false;
+static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
+ BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
+ Instruction *PTI = PredBlock->getTerminator();
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.pop_back_val();
+ // If we have bonus instructions, clone them into the predecessor block.
+ // Note that there may be multiple predecessor blocks, so we cannot move
+ // bonus instructions to a predecessor block.
+ for (Instruction &BonusInst : *BB) {
+ if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
+ continue;
- // See if the predecessor is a comparison with the same value.
- Instruction *PTI = Pred->getTerminator();
- Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ Instruction *NewBonusInst = BonusInst.clone();
- if (PCV == CV && TI != PTI) {
- SmallSetVector<BasicBlock*, 4> FailBlocks;
- if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
- for (auto *Succ : FailBlocks) {
- if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
- return false;
- }
- }
+ if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
+ // Unless the instruction has the same !dbg location as the original
+ // branch, drop it. When we fold the bonus instructions we want to make
+ // sure we reset their debug locations in order to avoid stepping on
+ // dead code caused by folding dead branches.
+ NewBonusInst->setDebugLoc(DebugLoc());
+ }
- // Figure out which 'cases' to copy from SI to PSI.
- std::vector<ValueEqualityComparisonCase> BBCases;
- BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
-
- std::vector<ValueEqualityComparisonCase> PredCases;
- BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
-
- // Based on whether the default edge from PTI goes to BB or not, fill in
- // PredCases and PredDefault with the new switch cases we would like to
- // build.
- SmallVector<BasicBlock *, 8> NewSuccessors;
-
- // Update the branch weight metadata along the way
- SmallVector<uint64_t, 8> Weights;
- bool PredHasWeights = HasBranchWeights(PTI);
- bool SuccHasWeights = HasBranchWeights(TI);
-
- if (PredHasWeights) {
- GetBranchWeights(PTI, Weights);
- // branch-weight metadata is inconsistent here.
- if (Weights.size() != 1 + PredCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (SuccHasWeights)
- // If there are no predecessor weights but there are successor weights,
- // populate Weights with 1, which will later be scaled to the sum of
- // successor's weights
- Weights.assign(1 + PredCases.size(), 1);
-
- SmallVector<uint64_t, 8> SuccWeights;
- if (SuccHasWeights) {
- GetBranchWeights(TI, SuccWeights);
- // branch-weight metadata is inconsistent here.
- if (SuccWeights.size() != 1 + BBCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (PredHasWeights)
- SuccWeights.assign(1 + BBCases.size(), 1);
-
- if (PredDefault == BB) {
- // If this is the default destination from PTI, only the edges in TI
- // that don't occur in PTI, or that branch to BB will be activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest != BB)
- PTIHandled.insert(PredCases[i].Value);
- else {
- // The default destination is BB, we don't need explicit targets.
- std::swap(PredCases[i], PredCases.back());
-
- if (PredHasWeights || SuccHasWeights) {
- // Increase weight for the default case.
- Weights[0] += Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
+ RemapInstruction(NewBonusInst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ VMap[&BonusInst] = NewBonusInst;
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand. We *can* preserve !annotation, because
+ // it is tied to the instruction itself, not the value or position.
+ NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation);
+
+ PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst);
+ NewBonusInst->takeName(&BonusInst);
+ BonusInst.setName(NewBonusInst->getName() + ".old");
+
+ // Update (liveout) uses of bonus instructions,
+ // now that the bonus instruction has been cloned into predecessor.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(BonusInst.getType(),
+ (NewBonusInst->getName() + ".merge").str());
+ SSAUpdate.AddAvailableValue(BB, &BonusInst);
+ SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
+ for (Use &U : make_early_inc_range(BonusInst.uses()))
+ SSAUpdate.RewriteUseAfterInsertions(U);
+ }
+}
- PredCases.pop_back();
- --i;
- --e;
- }
+bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
+ Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ BasicBlock *Pred = PTI->getParent();
+
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
- // Reconstruct the new switch statement we will be building.
- if (PredDefault != BBDefault) {
- PredDefault->removePredecessor(Pred);
- PredDefault = BBDefault;
- NewSuccessors.push_back(BBDefault);
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
}
- unsigned CasesFromPred = Weights.size();
- uint64_t ValidTotalSuccWeight = 0;
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (!PTIHandled.count(BBCases[i].Value) &&
- BBCases[i].Dest != BBDefault) {
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- if (SuccHasWeights || PredHasWeights) {
- // The default weight is at index 0, so weight for the ith case
- // should be at index i+1. Scale the cases from successor by
- // PredDefaultWeight (Weights[0]).
- Weights.push_back(Weights[0] * SuccWeights[i + 1]);
- ValidTotalSuccWeight += SuccWeights[i + 1];
- }
- }
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ if (PredDefault != BB)
+ Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
+ PredDefault = BBDefault;
+ ++NewSuccessors[BBDefault];
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
if (SuccHasWeights || PredHasWeights) {
- ValidTotalSuccWeight += SuccWeights[0];
- // Scale the cases from predecessor by ValidTotalSuccWeight.
- for (unsigned i = 1; i < CasesFromPred; ++i)
- Weights[i] *= ValidTotalSuccWeight;
- // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
- Weights[0] *= SuccWeights[0];
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i + 1]);
+ ValidTotalSuccWeight += SuccWeights[i + 1];
}
- } else {
- // If this is not the default destination from PSI, only the edges
- // in SI that occur in PSI with a destination of BB will be
- // activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- std::map<ConstantInt *, uint64_t> WeightsForHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest == BB) {
- PTIHandled.insert(PredCases[i].Value);
-
- if (PredHasWeights || SuccHasWeights) {
- WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
-
- std::swap(PredCases[i], PredCases.back());
- PredCases.pop_back();
- --i;
- --e;
- }
+ }
- // Okay, now we know which constants were sent to BB from the
- // predecessor. Figure out where they will all go now.
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (PTIHandled.count(BBCases[i].Value)) {
- // If this is one we are capable of getting...
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[BBCases[i].Value]);
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- PTIHandled.erase(
- BBCases[i].Value); // This constant is taken care of
- }
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt *, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
- // If there are any constants vectored to BB that TI doesn't handle,
- // they must go to the default destination of TI.
- for (ConstantInt *I : PTIHandled) {
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[I]);
- PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
- NewSuccessors.push_back(BBDefault);
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
}
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i;
+ --e;
}
- // Okay, at this point, we know which new successor Pred will get. Make
- // sure we update the number of entries in the PHI nodes for these
- // successors.
- for (BasicBlock *NewSuccessor : NewSuccessors)
- AddPredecessorToBlock(NewSuccessor, Pred, BB);
-
- Builder.SetInsertPoint(PTI);
- // Convert pointer to int before we switch.
- if (CV->getType()->isPointerTy()) {
- CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
- "magicptr");
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ ++NewSuccessors[BBCases[i].Dest];
+ PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
}
- // Now that the successors are updated, create the new Switch instruction.
- SwitchInst *NewSI =
- Builder.CreateSwitch(CV, PredDefault, PredCases.size());
- NewSI->setDebugLoc(PTI->getDebugLoc());
- for (ValueEqualityComparisonCase &V : PredCases)
- NewSI->addCase(V.Value, V.Dest);
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (ConstantInt *I : PTIHandled) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[I]);
+ PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
+ ++NewSuccessors[BBDefault];
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
+ NewSuccessors) {
+ for (auto I : seq(0, NewSuccessor.second)) {
+ (void)I;
+ AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
+ }
+ if (!is_contained(successors(Pred), NewSuccessor.first))
+ Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
+ }
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ CV =
+ Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- if (PredHasWeights || SuccHasWeights) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(Weights);
+ setBranchWeights(NewSI, MDWeights);
+ }
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ EraseTerminatorAndDCECond(PTI);
- setBranchWeights(NewSI, MDWeights);
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = nullptr;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (!InfLoopBlock) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
}
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
- EraseTerminatorAndDCECond(PTI);
-
- // Okay, last check. If BB is still a successor of PSI, then we must
- // have an infinite loop case. If so, add an infinitely looping block
- // to handle the case to preserve the behavior of the code.
- BasicBlock *InfLoopBlock = nullptr;
- for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
- if (NewSI->getSuccessor(i) == BB) {
- if (!InfLoopBlock) {
- // Insert it at the end of the function, because it's either code,
- // or it won't matter if it's hot. :)
- InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
- BB->getParent());
- BranchInst::Create(InfLoopBlock, InfLoopBlock);
- }
- NewSI->setSuccessor(i, InfLoopBlock);
- }
+ if (InfLoopBlock)
+ Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
- Changed = true;
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
+ ++NumFoldValueComparisonIntoPredecessors;
+ return true;
+}
+
+/// The specified terminator is a value equality comparison instruction
+/// (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+
+ bool Changed = false;
+
+ SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+ Instruction *PTI = Pred->getTerminator();
+
+ // Don't try to fold into itself.
+ if (Pred == BB)
+ continue;
+
+ // See if the predecessor is a comparison with the same value.
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ if (PCV != CV)
+ continue;
+
+ SmallSetVector<BasicBlock *, 4> FailBlocks;
+ if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
+ for (auto *Succ : FailBlocks) {
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
+ return false;
+ }
}
+
+ PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
+ Changed = true;
}
return Changed;
}
@@ -1248,7 +1364,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
return true;
}
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
/// in the two blocks up into the branch block. The caller of this function
@@ -1285,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
BasicBlock *BIParent = BI->getParent();
bool Changed = false;
+
+ auto _ = make_scope_exit([&]() {
+ if (Changed)
+ ++NumHoistCommonCode;
+ });
+
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1353,6 +1475,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
I2->eraseFromParent();
Changed = true;
}
+ ++NumHoistCommonInstrs;
I1 = &*BB1_Itr++;
I2 = &*BB2_Itr++;
@@ -1407,6 +1530,8 @@ HoistTerminator:
I2->replaceAllUsesWith(NT);
NT->takeName(I1);
}
+ Changed = true;
+ ++NumHoistCommonInstrs;
// Ensure terminator gets a debug location, even an unknown one, in case
// it involves inlinable calls.
@@ -1448,12 +1573,20 @@ HoistTerminator:
}
}
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+
// Update any PHI nodes in our new successors.
- for (BasicBlock *Succ : successors(BB1))
+ for (BasicBlock *Succ : successors(BB1)) {
AddPredecessorToBlock(Succ, BIParent, BB1);
+ Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ }
+ for (BasicBlock *Succ : successors(BI))
+ Updates.push_back({DominatorTree::Delete, BIParent, Succ});
EraseTerminatorAndDCECond(BI);
- return true;
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ return Changed;
}
// Check lifetime markers.
@@ -1744,7 +1877,8 @@ namespace {
/// true, sink any common code from the predecessors to BB.
/// We also allow one predecessor to end with conditional branch (but no more
/// than one).
-static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
+static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
// We support two situations:
// (1) all incoming arcs are unconditional
// (2) one incoming arc is conditional
@@ -1800,7 +1934,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
if (UnconditionalPreds.size() < 2)
return false;
- bool Changed = false;
// We take a two-step approach to tail sinking. First we scan from the end of
// each block upwards in lockstep. If the n'th instruction from the end of each
// block can be sunk, those instructions are added to ValuesToSink and we
@@ -1820,6 +1953,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
--LRI;
}
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+
+ bool Changed = false;
+
auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
unsigned NumPHIdValues = 0;
for (auto *I : *LRI)
@@ -1834,7 +1973,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
return NumPHIInsts <= 1;
};
- if (ScanIdx > 0 && Cond) {
+ if (Cond) {
// Check if we would actually sink anything first! This mutates the CFG and
// adds an extra block. The goal in doing this is to allow instructions that
// couldn't be sunk before to be sunk - obviously, speculatable instructions
@@ -1857,7 +1996,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
- if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
+ if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
// Edges couldn't be split.
return false;
Changed = true;
@@ -1875,7 +2014,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
// sink presuming a later value will also be sunk, but stop half way through
// and never actually sink it which means we produce more PHIs than intended.
// This is unlikely in practice though.
- for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
+ unsigned SinkIdx = 0;
+ for (; SinkIdx != ScanIdx; ++SinkIdx) {
LLVM_DEBUG(dbgs() << "SINK: Sink: "
<< *UnconditionalPreds[0]->getTerminator()->getPrevNode()
<< "\n");
@@ -1890,11 +2030,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
break;
}
- if (!sinkLastInstruction(UnconditionalPreds))
- return Changed;
- NumSinkCommons++;
+ if (!sinkLastInstruction(UnconditionalPreds)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "SINK: stopping here, failed to actually sink instruction!\n");
+ break;
+ }
+
+ NumSinkCommonInstrs++;
Changed = true;
}
+ if (SinkIdx != 0)
+ ++NumSinkCommonCode;
return Changed;
}
@@ -1938,7 +2085,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Look for a store to the same pointer in BrBB.
unsigned MaxNumInstToLookAt = 9;
- for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
+ // Skip pseudo probe intrinsic calls which are not really killing any memory
+ // accesses.
+ for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
if (!MaxNumInstToLookAt)
break;
--MaxNumInstToLookAt;
@@ -1959,6 +2108,65 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return nullptr;
}
+/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
+/// converted to selects.
+static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
+ BasicBlock *EndBB,
+ unsigned &SpeculatedInstructions,
+ int &BudgetRemaining,
+ const TargetTransformInfo &TTI) {
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize()
+ ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
+
+ bool HaveRewritablePHIs = false;
+ for (PHINode &PN : EndBB->phis()) {
+ Value *OrigV = PN.getIncomingValueForBlock(BB);
+ Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
+
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ BudgetRemaining -=
+ TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ // Don't convert to selects if we could remove undefined behavior instead.
+ if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
+ passingValueIsAlwaysUndefined(ThenV, &PN))
+ return false;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
+ continue; // Known safe and cheap.
+
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+ return false;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
+ unsigned MaxCost =
+ 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculatedInstructions;
+ if (SpeculatedInstructions > 1)
+ return false;
+ }
+
+ return HaveRewritablePHIs;
+}
+
/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -2005,6 +2213,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
BasicBlock *BB = BI->getParent();
BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+ int BudgetRemaining =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
// If ThenBB is actually on the false edge of the conditional branch, remember
// to swap the select operands later.
@@ -2037,6 +2247,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
continue;
}
+ // Skip pseudo probes. The consequence is we lose track of the branch
+ // probability for ThenBB, which is fine since the optimization here takes
+ // place regardless of the branch probability.
+ if (isa<PseudoProbeInst>(I)) {
+ SpeculatedDbgIntrinsics.push_back(I);
+ continue;
+ }
+
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
++SpeculatedInstructions;
@@ -2082,50 +2300,13 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return false;
}
- // Check that the PHI nodes can be converted to selects.
- bool HaveRewritablePHIs = false;
- for (PHINode &PN : EndBB->phis()) {
- Value *OrigV = PN.getIncomingValueForBlock(BB);
- Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
-
- // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
- // Skip PHIs which are trivial.
- if (ThenV == OrigV)
- continue;
-
- // Don't convert to selects if we could remove undefined behavior instead.
- if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
- passingValueIsAlwaysUndefined(ThenV, &PN))
- return false;
-
- HaveRewritablePHIs = true;
- ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
- ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
- if (!OrigCE && !ThenCE)
- continue; // Known safe and cheap.
-
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
- return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
- unsigned MaxCost =
- 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- if (OrigCost + ThenCost > MaxCost)
- return false;
-
- // Account for the cost of an unfolded ConstantExpr which could end up
- // getting expanded into Instructions.
- // FIXME: This doesn't account for how many operations are combined in the
- // constant expression.
- ++SpeculatedInstructions;
- if (SpeculatedInstructions > 1)
- return false;
- }
-
- // If there are no PHIs to process, bail early. This helps ensure idempotence
- // as well.
- if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
+ // Check that we can insert the selects and that it's not too expensive to do
+ // so.
+ bool Convert = SpeculatedStore != nullptr;
+ Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
+ SpeculatedInstructions,
+ BudgetRemaining, TTI);
+ if (!Convert || BudgetRemaining < 0)
return false;
// If we get here, we can hoist the instruction and if-convert.
@@ -2199,6 +2380,12 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
if (Size > MaxSmallBlockSize)
return false; // Don't clone large BB's.
+
+ // Can't fold blocks that contain noduplicate or convergent calls.
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return false;
+
// We will delete Phis while threading, so Phis should not be accounted in
// block's size
if (!isa<PHINode>(I))
@@ -2221,8 +2408,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
- AssumptionCache *AC) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL, AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2240,13 +2427,6 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (!BlockIsSimpleEnoughToThreadThrough(BB))
return false;
- // Can't fold blocks that contain noduplicate or convergent calls.
- if (any_of(*BB, [](const Instruction &I) {
- const CallInst *CI = dyn_cast<CallInst>(&I);
- return CI && (CI->cannotDuplicate() || CI->isConvergent());
- }))
- return false;
-
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -2265,6 +2445,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
continue;
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
@@ -2273,6 +2455,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
RealDest->getParent(), RealDest);
BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
// Update PHI nodes.
@@ -2331,8 +2514,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
PredBBTI->setSuccessor(i, EdgeBB);
}
+ Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL, AC) || true;
+ return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
}
return false;
@@ -2341,7 +2530,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- const DataLayout &DL) {
+ DomTreeUpdater *DTU, const DataLayout &DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -2374,11 +2563,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
int BudgetRemaining =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
+ Changed = true;
continue;
}
@@ -2386,7 +2577,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
BudgetRemaining, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
BudgetRemaining, TTI))
- return false;
+ return Changed;
}
// If we folded the first phi, PN dangles at this point. Refresh it. If
@@ -2413,7 +2604,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
isa<BinaryOperator>(IfCond)) &&
!CanHoistNotFromBothValues(PN->getIncomingValue(0),
PN->getIncomingValue(1)))
- return false;
+ return Changed;
// If all PHI nodes are promotable, check to make sure that all instructions
// in the predecessor blocks can be promoted as well. If not, we won't be able
@@ -2427,11 +2618,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
+ !isa<PseudoProbeInst>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
- return false;
+ return Changed;
}
}
@@ -2440,11 +2632,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
+ !isa<PseudoProbeInst>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
- return false;
+ return Changed;
}
}
assert(DomBlock && "Failed to find root DomBlock");
@@ -2487,7 +2680,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
Instruction *OldTI = DomBlock->getTerminator();
Builder.SetInsertPoint(OldTI);
Builder.CreateBr(BB);
+
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, DomBlock, BB});
+ for (auto *Successor : successors(DomBlock))
+ Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
+ }
+
OldTI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
return true;
}
@@ -2496,9 +2700,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
/// introducing a select if the return values disagree.
bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
IRBuilder<> &Builder) {
+ auto *BB = BI->getParent();
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // NOTE: destinations may match, this could be degenerate uncond branch.
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
@@ -2515,10 +2721,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// there is no return value for this function, just change the
// branch into a return.
if (FalseRet->getNumOperands() == 0) {
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
+ TrueSucc->removePredecessor(BB);
+ FalseSucc->removePredecessor(BB);
Builder.CreateRetVoid();
EraseTerminatorAndDCECond(BI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
+ if (TrueSucc != FalseSucc)
+ Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -2530,10 +2743,10 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// Unwrap any PHI nodes in the return blocks.
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
if (TVPN->getParent() == TrueSucc)
- TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ TrueValue = TVPN->getIncomingValueForBlock(BB);
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
if (FVPN->getParent() == FalseSucc)
- FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+ FalseValue = FVPN->getIncomingValueForBlock(BB);
// In order for this transformation to be safe, we must be able to
// unconditionally execute both operands to the return. This is
@@ -2549,8 +2762,8 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
// Okay, we collected all the mapped values and checked them for sanity, and
// defined to really do this transformation. First, update the CFG.
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
+ TrueSucc->removePredecessor(BB);
+ FalseSucc->removePredecessor(BB);
// Insert select instructions where needed.
Value *BrCond = BI->getCondition();
@@ -2575,27 +2788,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
<< *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc);
EraseTerminatorAndDCECond(BI);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
+ if (TrueSucc != FalseSucc)
+ Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
+ DTU->applyUpdates(Updates);
+ }
return true;
}
-/// Return true if the given instruction is available
-/// in its predecessor block. If yes, the instruction will be removed.
-static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
- if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
- return false;
- for (Instruction &I : *PB) {
- Instruction *PBI = &I;
- // Check whether Inst and PBI generate the same value.
- if (Inst->isIdenticalTo(PBI)) {
- Inst->replaceAllUsesWith(PBI);
- Inst->eraseFromParent();
- return true;
- }
- }
- return false;
-}
-
/// Return true if either PBI or BI has branch weight available, and store
/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
/// not have branch weight, use 1:1 as its weight.
@@ -2619,63 +2822,174 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}
+// Determine if the two branches share a common destination,
+// and deduce a glue that we need to use to join branch's conditions
+// to arrive at the common destination.
+static Optional<std::pair<Instruction::BinaryOps, bool>>
+CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
+ assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
+ "Both blocks must end with a conditional branches.");
+ assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
+ "PredBB must be a predecessor of BB.");
+
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+ return {{Instruction::Or, false}};
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+ return {{Instruction::And, false}};
+ else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+ return {{Instruction::And, true}};
+ else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+ return {{Instruction::Or, true}};
+ return None;
+}
+
+static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+ DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *PredBlock = PBI->getParent();
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ std::tie(Opc, InvertPredCond) =
+ *CheckIfCondBranchesShareCommonDestination(BI, PBI);
+
+ LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+
+ IRBuilder<> Builder(PBI);
+ // The builder is used to create instructions to eliminate the branch in BB.
+ // If BB's terminator has !annotation metadata, add it to the new
+ // instructions.
+ Builder.CollectMetadataToCopy(BB->getTerminator(),
+ {LLVMContext::MD_annotation});
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond =
+ Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ BasicBlock *UniqueSucc =
+ PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
+
+ // Before cloning instructions, notify the successor basic block that it
+ // is about to have a new predecessor. This will update PHI nodes,
+ // which will allow us to update live-out uses of bonus instructions.
+ AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
+
+ // Try to update branch weights.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight)) {
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, UniqueSucc, FalseDest
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
+ } else {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, UniqueSucc
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
+ setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
+
+ // Now, update the CFG.
+ PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
+
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
+ {DominatorTree::Delete, PredBlock, BB}});
+
+ // If BI was a loop latch, it may have had associated loop metadata.
+ // We need to copy it to the new latch, that is, PBI.
+ if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
+ PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
+
+ ValueToValueMapTy VMap; // maps original values to cloned values
+ CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+
+ // Now that the Cond was cloned into the predecessor basic block,
+ // or/and the two conditions together.
+ Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(
+ Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond"));
+ PBI->setCondition(NewCond);
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (Instruction &I : *BB) {
+ if (isa<DbgInfoIntrinsic>(I)) {
+ Instruction *NewI = I.clone();
+ RemapInstruction(NewI, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ NewI->insertBefore(PBI);
+ }
+ }
+
+ ++NumFoldBranchToCommonDest;
+ return true;
+}
+
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) {
+ // If this block ends with an unconditional branch,
+ // let SpeculativelyExecuteBB() deal with it.
+ if (!BI->isConditional())
+ return false;
+
BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB);
bool Changed = false;
- Instruction *Cond = nullptr;
- if (BI->isConditional())
- Cond = dyn_cast<Instruction>(BI->getCondition());
- else {
- // For unconditional branch, check for a simple CFG pattern, where
- // BB has a single predecessor and BB's successor is also its predecessor's
- // successor. If such pattern exists, check for CSE between BB and its
- // predecessor.
- if (BasicBlock *PB = BB->getSinglePredecessor())
- if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
- if (PBI->isConditional() &&
- (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
- BI->getSuccessor(0) == PBI->getSuccessor(1))) {
- for (auto I = BB->instructionsWithoutDebug().begin(),
- E = BB->instructionsWithoutDebug().end();
- I != E;) {
- Instruction *Curr = &*I++;
- if (isa<CmpInst>(Curr)) {
- Cond = Curr;
- break;
- }
- // Quit if we can't remove this instruction.
- if (!tryCSEWithPredecessor(Curr, PB))
- return Changed;
- Changed = true;
- }
- }
+ TargetTransformInfo::TargetCostKind CostKind =
+ BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
+ : TargetTransformInfo::TCK_SizeAndLatency;
- if (!Cond)
- return Changed;
- }
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return Changed;
- // Make sure the instruction after the condition is the cond branch.
- BasicBlock::iterator CondIt = ++Cond->getIterator();
-
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(CondIt))
- ++CondIt;
-
- if (&*CondIt != BI)
- return Changed;
-
// Only allow this transformation if computing the condition doesn't involve
// too many instructions and these involved instructions can be executed
// unconditionally. We denote all involved instructions except the condition
@@ -2683,19 +2997,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
- for (auto I = BB->begin(); Cond != &*I; ++I) {
- // Ignore dbg intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
+ for (Instruction &I : *BB) {
+ // Don't check the branch condition comparison itself.
+ if (&I == Cond)
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
- return Changed;
- // I has only one use and can be executed unconditionally.
- Instruction *User = dyn_cast<Instruction>(I->user_back());
- if (User == nullptr || User->getParent() != BB)
+ // Ignore dbg intrinsics, and the terminator.
+ if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
+ continue;
+ // I must be safe to execute unconditionally.
+ if (!isSafeToSpeculativelyExecute(&I))
return Changed;
- // I is used in the same BB. Since BI uses Cond and doesn't have more slots
- // to use any other instruction, User must be an instruction between next(I)
- // and Cond.
// Account for the cost of duplicating this instruction into each
// predecessor.
@@ -2715,9 +3026,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
return Changed;
// Finally, don't infinitely unroll conditional loops.
- BasicBlock *TrueDest = BI->getSuccessor(0);
- BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
- if (TrueDest == BB || FalseDest == BB)
+ if (is_contained(successors(BB), BB))
return Changed;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -2727,222 +3036,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
- SmallVector<PHINode *, 4> PHIs;
- if (!PBI || PBI->isUnconditional() ||
- (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
- (!BI->isConditional() &&
- !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
+ if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
continue;
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
- bool InvertPredCond = false;
-
- if (BI->isConditional()) {
- if (PBI->getSuccessor(0) == TrueDest) {
- Opc = Instruction::Or;
- } else if (PBI->getSuccessor(1) == FalseDest) {
- Opc = Instruction::And;
- } else if (PBI->getSuccessor(0) == FalseDest) {
- Opc = Instruction::And;
- InvertPredCond = true;
- } else if (PBI->getSuccessor(1) == TrueDest) {
- Opc = Instruction::Or;
- InvertPredCond = true;
- } else {
- continue;
- }
- } else {
- if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- Changed = true;
-
- IRBuilder<> Builder(PBI);
-
- // If we need to invert the condition in the pred block to match, do so now.
- if (InvertPredCond) {
- Value *NewCond = PBI->getCondition();
-
- if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
- CmpInst *CI = cast<CmpInst>(NewCond);
- CI->setPredicate(CI->getInversePredicate());
- } else {
- NewCond =
- Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
- }
+ Instruction::BinaryOps Opc;
+ bool InvertPredCond;
+ if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
+ std::tie(Opc, InvertPredCond) = *Recepie;
+ else
+ continue;
- PBI->setCondition(NewCond);
- PBI->swapSuccessors();
- }
+ // Check the cost of inserting the necessary logic before performing the
+ // transformation.
+ if (TTI) {
+ Type *Ty = BI->getCondition()->getType();
+ unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+ if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
+ !isa<CmpInst>(PBI->getCondition())))
+ Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
- // If we have bonus instructions, clone them into the predecessor block.
- // Note that there may be multiple predecessor blocks, so we cannot move
- // bonus instructions to a predecessor block.
- ValueToValueMapTy VMap; // maps original values to cloned values
- // We already make sure Cond is the last instruction before BI. Therefore,
- // all instructions before Cond other than DbgInfoIntrinsic are bonus
- // instructions.
- for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
- if (isa<DbgInfoIntrinsic>(BonusInst))
+ if (Cost > BranchFoldThreshold)
continue;
- Instruction *NewBonusInst = BonusInst->clone();
-
- // When we fold the bonus instructions we want to make sure we
- // reset their debug locations in order to avoid stepping on dead
- // code caused by folding dead branches.
- NewBonusInst->setDebugLoc(DebugLoc());
-
- RemapInstruction(NewBonusInst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- VMap[&*BonusInst] = NewBonusInst;
-
- // If we moved a load, we cannot any longer claim any knowledge about
- // its potential value. The previous information might have been valid
- // only given the branch precondition.
- // For an analogous reason, we must also drop all the metadata whose
- // semantics we don't understand.
- NewBonusInst->dropUnknownNonDebugMetadata();
-
- PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
- NewBonusInst->takeName(&*BonusInst);
- BonusInst->setName(BonusInst->getName() + ".old");
}
- // Clone Cond into the predecessor basic block, and or/and the
- // two conditions together.
- Instruction *CondInPred = Cond->clone();
-
- // Reset the condition debug location to avoid jumping on dead code
- // as the result of folding dead branches.
- CondInPred->setDebugLoc(DebugLoc());
-
- RemapInstruction(CondInPred, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
- CondInPred->takeName(Cond);
- Cond->setName(CondInPred->getName() + ".old");
-
- if (BI->isConditional()) {
- Instruction *NewCond = cast<Instruction>(
- Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
- PBI->setCondition(NewCond);
-
- uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- bool HasWeights =
- extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
- SuccTrueWeight, SuccFalseWeight);
- SmallVector<uint64_t, 8> NewWeights;
-
- if (PBI->getSuccessor(0) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, BB, FalseDest
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
- // TrueWeight for PBI * FalseWeight for BI.
- // We assume that total weights of a BranchInst can fit into 32 bits.
- // Therefore, we will not have overflow using 64-bit arithmetic.
- NewWeights.push_back(PredFalseWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredTrueWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(0, TrueDest);
- }
- if (PBI->getSuccessor(1) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, TrueDest, BB
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
- // FalseWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredFalseWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
- NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(1, FalseDest);
- }
- if (NewWeights.size() == 2) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(NewWeights);
-
- SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
- NewWeights.end());
- setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
- } else
- PBI->setMetadata(LLVMContext::MD_prof, nullptr);
- } else {
- // Update PHI nodes in the common successors.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- ConstantInt *PBI_C = cast<ConstantInt>(
- PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
- assert(PBI_C->getType()->isIntegerTy(1));
- Instruction *MergedCond = nullptr;
- if (PBI->getSuccessor(0) == TrueDest) {
- // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
- // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
- // is false: !PBI_Cond and BI_Value
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(
- Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
- "and.cond"));
- if (PBI_C->isOne())
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
- } else {
- // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
- // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
- // is false: PBI_Cond and BI_Value
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
- if (PBI_C->isOne()) {
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, NotCond, MergedCond, "or.cond"));
- }
- }
- // Update PHI Node.
- PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
- }
-
- // PBI is changed to branch to TrueDest below. Remove itself from
- // potential phis from all other successors.
- if (MSSAU)
- MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
-
- // Change PBI from Conditional to Unconditional.
- BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorAndDCECond(PBI, MSSAU);
- PBI = New_PBI;
- }
-
- // If BI was a loop latch, it may have had associated loop metadata.
- // We need to copy it to the new latch, that is, PBI.
- if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
- PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
-
- // TODO: If BB is reachable from all paths through PredBlock, then we
- // could replace PBI's branch probabilities with BI's.
-
- // Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB) {
- if (isa<DbgInfoIntrinsic>(I)) {
- Instruction *NewI = I.clone();
- RemapInstruction(NewI, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- NewI->insertBefore(PBI);
- }
- }
-
- return Changed;
+ return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
}
return Changed;
}
@@ -3015,12 +3133,10 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
return PHI;
}
-static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
- BasicBlock *QTB, BasicBlock *QFB,
- BasicBlock *PostBB, Value *Address,
- bool InvertPCond, bool InvertQCond,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+static bool mergeConditionalStoreToAddress(
+ BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
+ DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
// For every pointer, there must be exactly two stores, one coming from
// PTB or PFB, and the other from QTB or QFB. We don't support more than one
// store (to any address) in PTB,PFB or QTB,QFB.
@@ -3095,7 +3211,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
return true;
};
- const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore};
+ const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
if (!MergeCondStoresAggressively &&
(!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
!IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
@@ -3109,8 +3225,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
// If QTB does not exist, then QFB's only predecessor has a conditional
// branch to QFB and PostBB.
BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
- BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
- "condstore.split");
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
if (!NewBB)
return false;
PostBB = NewBB;
@@ -3139,8 +3255,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
QPred = QB.CreateNot(QPred);
Value *CombinedPred = QB.CreateOr(PPred, QPred);
- auto *T =
- SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+ auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
+ /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
AAMDNodes AAMD;
@@ -3160,7 +3277,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
}
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
// The intention here is to find diamonds or triangles (see below) where each
// conditional block contains a store to the same address. Both of these
@@ -3262,16 +3379,17 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
bool Changed = false;
for (auto *Address : CommonAddresses)
- Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI);
+ Changed |=
+ mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
+ InvertPCond, InvertQCond, DTU, DL, TTI);
return Changed;
}
-
/// If the previous block ended with a widenable branch, determine if reusing
/// the target block is profitable and legal. This will have the effect of
/// "widening" PBI, but doesn't require us to reason about hosting safety.
-static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU) {
// TODO: This can be generalized in two important ways:
// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
// values from the PBI edge.
@@ -3294,15 +3412,25 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
NoSideEffects(*BI->getParent())) {
- BI->getSuccessor(1)->removePredecessor(BI->getParent());
+ auto *OldSuccessor = BI->getSuccessor(1);
+ OldSuccessor->removePredecessor(BI->getParent());
BI->setSuccessor(1, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
return true;
}
if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
NoSideEffects(*BI->getParent())) {
- BI->getSuccessor(0)->removePredecessor(BI->getParent());
+ auto *OldSuccessor = BI->getSuccessor(0);
+ OldSuccessor->removePredecessor(BI->getParent());
BI->setSuccessor(0, IfFalseBB);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
+ {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
return true;
}
return false;
@@ -3313,6 +3441,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
/// that PBI and BI are both conditional branches, and BI is in one of the
/// successor blocks of PBI - PBI branches to BI.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(PBI->isConditional() && BI->isConditional());
@@ -3366,7 +3495,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If the previous block ended with a widenable branch, determine if reusing
// the target block is profitable and legal. This will have the effect of
// "widening" PBI, but doesn't require us to reason about hosting safety.
- if (tryWidenCondBranchToCondBranch(PBI, BI))
+ if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
@@ -3376,7 +3505,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI))
+ if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return true;
// If this is a conditional branch in an empty block, and if any
@@ -3419,6 +3548,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// case, it would be unsafe to hoist the operation into a select instruction.
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+ BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
++II, ++NumPhis) {
@@ -3444,6 +3574,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
+ SmallVector<DominatorTree::UpdateType, 5> Updates;
+
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -3457,6 +3589,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
BasicBlock *InfLoopBlock =
BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
OtherDest = InfLoopBlock;
}
@@ -3483,6 +3616,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
+ Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
+ Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// Update branch weight for PBI.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
@@ -3562,6 +3701,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *FalseBB,
uint32_t TrueWeight,
uint32_t FalseWeight) {
+ auto *BB = OldTerm->getParent();
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -3569,6 +3709,8 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *KeepEdge1 = TrueBB;
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
+
// Then remove the rest.
for (BasicBlock *Succ : successors(OldTerm)) {
// Make sure only to keep exactly one copy of each edge.
@@ -3576,9 +3718,13 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
KeepEdge2 = nullptr;
- else
- Succ->removePredecessor(OldTerm->getParent(),
+ else {
+ Succ->removePredecessor(BB,
/*KeepOneInputPHIs=*/true);
+
+ if (Succ != TrueBB && Succ != FalseBB)
+ RemovedSuccessors.insert(Succ);
+ }
}
IRBuilder<> Builder(OldTerm);
@@ -3586,11 +3732,11 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
// Insert an appropriate new terminator.
if (!KeepEdge1 && !KeepEdge2) {
- if (TrueBB == FalseBB)
+ if (TrueBB == FalseBB) {
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
Builder.CreateBr(TrueBB);
- else {
+ } else {
// We found both of the successors we were looking for.
// Create a conditional branch sharing the condition of the select.
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
@@ -3605,15 +3751,25 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
// One of the selected values was a successor, but the other wasn't.
// Insert an unconditional branch to the one that was found;
// the edge to the one that wasn't must be unreachable.
- if (!KeepEdge1)
+ if (!KeepEdge1) {
// Only TrueBB was found.
Builder.CreateBr(TrueBB);
- else
+ } else {
// Only FalseBB was found.
Builder.CreateBr(FalseBB);
+ }
}
EraseTerminatorAndDCECond(OldTerm);
+
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.reserve(RemovedSuccessors.size());
+ for (auto *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+
return true;
}
@@ -3768,6 +3924,8 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(DefaultCst);
ICI->eraseFromParent();
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
// Okay, the switch goes to this block on a default value. Add an edge from
// the switch to the merge point on the compared value.
BasicBlock *NewBB =
@@ -3781,13 +3939,17 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
SIW.setSuccessorWeight(0, *NewW);
}
SIW.addCase(Cst, NewBB, NewW);
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
}
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
Builder.CreateBr(SuccBlock);
+ Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
PHIUse->addIncoming(NewCst, NewBB);
+ if (DTU)
+ DTU->applyUpdates(Updates);
return true;
}
@@ -3821,7 +3983,7 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
if (UsedICmps <= 1)
return false;
- bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
+ bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
// There might be duplicate constants in the list, which the switch
// instruction can't handle, remove them now.
@@ -3853,12 +4015,15 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
<< " cases into SWITCH. BB is:\n"
<< *BB);
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
if (ExtraCase) {
- BasicBlock *NewBB =
- BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
+ BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "switch.early.test");
+
// Remove the uncond branch added to the old block.
Instruction *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
@@ -3870,6 +4035,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
OldTI->eraseFromParent();
+ Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
+
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
@@ -3905,6 +4072,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
// Erase the old branch instruction.
EraseTerminatorAndDCECond(BI);
+ if (DTU)
+ DTU->applyUpdates(Updates);
LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
@@ -3921,17 +4090,36 @@ bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
}
+// Check if cleanup block is empty
+static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
+ for (Instruction &I : R) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ return false;
+
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
// Simplify resume that is shared by several landing pads (phi of landing pad).
bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
- // Check that there are no other instructions except for debug intrinsics
- // between the phi of landing pads (RI->getValue()) and resume instruction.
- BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
- E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
- return false;
+ // Check that there are no other instructions except for debug and lifetime
+ // intrinsics between the phi's and resume instruction.
+ if (!isCleanupBlockEmpty(
+ make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
+ return false;
SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
auto *PhiLPInst = cast<PHINode>(RI->getValue());
@@ -3952,17 +4140,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
if (IncomingValue != LandingPad)
continue;
- bool isTrivial = true;
-
- I = IncomingBB->getFirstNonPHI()->getIterator();
- E = IncomingBB->getTerminator()->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I)) {
- isTrivial = false;
- break;
- }
-
- if (isTrivial)
+ if (isCleanupBlockEmpty(
+ make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
TrivialUnwindBlocks.insert(IncomingBB);
}
@@ -3981,7 +4160,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
PI != PE;) {
BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
}
// In each SimplifyCFG run, only the current processed block can be erased.
@@ -3991,37 +4171,21 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
// predecessors.
TrivialBB->getTerminator()->eraseFromParent();
new UnreachableInst(RI->getContext(), TrivialBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
}
// Delete the resume block if all its predecessors have been removed.
- if (pred_empty(BB))
- BB->eraseFromParent();
+ if (pred_empty(BB)) {
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
+ }
return !TrivialUnwindBlocks.empty();
}
-// Check if cleanup block is empty
-static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) {
- BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator();
- while (++I != E) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
-
- Intrinsic::ID IntrinsicID = II->getIntrinsicID();
- switch (IntrinsicID) {
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::lifetime_end:
- break;
- default:
- return false;
- }
- }
- return true;
-}
-
// Simplify resume that is only used by a single (non-phi) landing pad.
bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
@@ -4030,23 +4194,26 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
"Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
- if (!isCleanupBlockEmpty(LPInst, RI))
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(LPInst->getNextNode(), RI)))
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
+ removeUnwindEdge(Pred, DTU);
+ ++NumInvokes;
}
// The landingpad is now unreachable. Zap it.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
return true;
}
-static bool removeEmptyCleanup(CleanupReturnInst *RI) {
+static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
// If this is a trivial cleanup pad that executes no instructions, it can be
// eliminated. If the cleanup pad continues to the caller, any predecessor
// that is an EH pad will be updated to continue to the caller and any
@@ -4067,7 +4234,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
return false;
// Check that there are no other instructions except for benign intrinsics.
- if (!isCleanupBlockEmpty(CPInst, RI))
+ if (!isCleanupBlockEmpty(
+ make_range<Instruction *>(CPInst->getNextNode(), RI)))
return false;
// If the cleanup return we are simplifying unwinds to the caller, this will
@@ -4152,19 +4320,32 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
}
}
+ std::vector<DominatorTree::UpdateType> Updates;
+
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
// The iterator must be updated here because we are removing this pred.
BasicBlock *PredBB = *PI++;
if (UnwindDest == nullptr) {
- removeUnwindEdge(PredBB);
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(PredBB, DTU);
+ ++NumInvokes;
} else {
Instruction *TI = PredBB->getTerminator();
TI->replaceUsesOfWith(BB, UnwindDest);
+ Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
}
- // The cleanup pad is now unreachable. Zap it.
- BB->eraseFromParent();
+ if (DTU) {
+ DTU->applyUpdates(Updates);
+ DTU->deleteBB(BB);
+ } else
+ // The cleanup pad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
return true;
}
@@ -4211,7 +4392,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
if (mergeCleanupPad(RI))
return true;
- if (removeEmptyCleanup(RI))
+ if (removeEmptyCleanup(RI, DTU))
return true;
return false;
@@ -4242,15 +4423,16 @@ bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
- (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU);
}
// If we eliminated all predecessors of the block, delete the block now.
if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
}
return true;
@@ -4330,18 +4512,26 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
if (&BB->front() != UI)
return Changed;
- SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
+ std::vector<DominatorTree::UpdateType> Updates;
+
+ SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- Instruction *TI = Preds[i]->getTerminator();
+ auto *Predecessor = Preds[i];
+ Instruction *TI = Predecessor->getTerminator();
IRBuilder<> Builder(TI);
if (auto *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isUnconditional()) {
- assert(BI->getSuccessor(0) == BB && "Incorrect CFG");
+ // We could either have a proper unconditional branch,
+ // or a degenerate conditional branch with matching destinations.
+ if (all_of(BI->successors(),
+ [BB](auto *Successor) { return Successor == BB; })) {
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
} else {
+ assert(BI->isConditional() && "Can't get here with an uncond branch.");
Value* Cond = BI->getCondition();
+ assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "The destinations are guaranteed to be different here.");
if (BI->getSuccessor(0) == BB) {
Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
@@ -4353,6 +4543,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
EraseTerminatorAndDCECond(BI);
Changed = true;
}
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
SwitchInstProfUpdateWrapper SU(*SI);
for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
@@ -4365,14 +4556,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
e = SU->case_end();
Changed = true;
}
+ // Note that the default destination can't be removed!
+ if (SI->getDefaultDest() != BB)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
}
} else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
if (CSI->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
continue;
}
@@ -4387,35 +4587,53 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
}
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
if (CSI->getNumHandlers() == 0) {
- BasicBlock *CatchSwitchBB = CSI->getParent();
if (CSI->hasUnwindDest()) {
- // Redirect preds to the unwind dest
- CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
+ // Redirect all predecessors of the block containing CatchSwitchInst
+ // to instead branch to the CatchSwitchInst's unwind destination.
+ for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
+ Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor,
+ CSI->getUnwindDest()});
+ Updates.push_back(
+ {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor});
+ }
+ Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
} else {
// Rewrite all preds to unwind to caller (or from invoke to call).
- SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
+ if (DTU)
+ DTU->applyUpdates(Updates);
+ Updates.clear();
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
for (BasicBlock *EHPred : EHPreds)
- removeUnwindEdge(EHPred);
+ removeUnwindEdge(EHPred, DTU);
}
// The catchswitch is no longer reachable.
new UnreachableInst(CSI->getContext(), CSI);
CSI->eraseFromParent();
Changed = true;
}
- } else if (isa<CleanupReturnInst>(TI)) {
+ } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ (void)CRI;
+ assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
+ "Expected to always have an unwind to BB.");
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
}
}
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
// If this block is now dead, remove it.
if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
+ if (DTU)
+ DTU->deleteBB(BB);
+ else
+ BB->eraseFromParent();
return true;
}
@@ -4433,15 +4651,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
-static void createUnreachableSwitchDefault(SwitchInst *Switch) {
+static void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefaultBlock =
- SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), "");
+ auto *BB = Switch->getParent();
+ BasicBlock *NewDefaultBlock = SplitBlockPredecessors(
+ Switch->getDefaultDest(), Switch->getParent(), "", DTU);
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
Switch->setDefaultDest(&*NewDefaultBlock);
- SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front());
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock},
+ {DominatorTree::Delete, BB, OrigDefaultBlock}});
+ SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ for (auto *Successor : successors(NewDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
auto *NewTerminator = NewDefaultBlock->getTerminator();
new UnreachableInst(Switch->getContext(), NewTerminator);
EraseTerminatorAndDCECond(NewTerminator);
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
/// Turn a switch with two reachable destinations into an integer range
@@ -4453,6 +4682,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ auto *BB = SI->getParent();
+
// Partition the cases into two sets with different destinations.
BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
BasicBlock *DestB = nullptr;
@@ -4556,17 +4787,23 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
// Clean up the default block - it may have phis or other instructions before
// the unreachable terminator.
if (!HasDefault)
- createUnreachableSwitchDefault(SI);
+ createUnreachableSwitchDefault(SI, DTU);
+
+ auto *UnreachableDefault = SI->getDefaultDest();
// Drop the switch.
SI->eraseFromParent();
+ if (!HasDefault && DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
+
return true;
}
/// Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
+ AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
@@ -4580,11 +4817,15 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
+ SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
for (auto &Case : SI->cases()) {
+ auto *Successor = Case.getCaseSuccessor();
+ ++NumPerSuccessorCases[Successor];
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
+ --NumPerSuccessorCases[Successor];
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
<< " is dead.\n");
}
@@ -4602,7 +4843,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
- createUnreachableSwitchDefault(SI);
+ createUnreachableSwitchDefault(SI, DTU);
return true;
}
@@ -4619,6 +4860,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SIW.removeCase(CaseI);
}
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
+ if (DTU)
+ DTU->applyUpdates(Updates);
+
return true;
}
@@ -4974,30 +5222,41 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
// a select, fixing up PHI nodes and basic blocks.
static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
Value *SelectValue,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder,
+ DomTreeUpdater *DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+
BasicBlock *SelectBB = SI->getParent();
+ BasicBlock *DestBB = PHI->getParent();
+
+ if (!is_contained(predecessors(DestBB), SelectBB))
+ Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
+ Builder.CreateBr(DestBB);
+
+ // Remove the switch.
+
while (PHI->getBasicBlockIndex(SelectBB) >= 0)
PHI->removeIncomingValue(SelectBB);
PHI->addIncoming(SelectValue, SelectBB);
- Builder.CreateBr(PHI->getParent());
-
- // Remove the switch.
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
- if (Succ == PHI->getParent())
+ if (Succ == DestBB)
continue;
Succ->removePredecessor(SelectBB);
+ Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
}
SI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
}
/// If the switch is only used to initialize one or more
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
@@ -5017,7 +5276,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
Value *SelectValue =
ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
if (SelectValue) {
- RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
+ RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
return true;
}
// The switch couldn't be converted into a select.
@@ -5402,11 +5661,12 @@ static void reuseTableCompare(
/// successor block with different constant values, replace the switch with
/// lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
+ DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
- Function *Fn = SI->getParent()->getParent();
+ BasicBlock *BB = SI->getParent();
+ Function *Fn = BB->getParent();
// Only build lookup table when we have a target that supports it or the
// attribute is not set.
if (!TTI.shouldBuildLookupTables() ||
@@ -5500,6 +5760,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
+ std::vector<DominatorTree::UpdateType> Updates;
+
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(
@@ -5532,6 +5794,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
// Note: We call removeProdecessor later since we need to be able to get the
// PHI value for the default case in case we're using a bit mask.
} else {
@@ -5539,6 +5802,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
}
// Populate the BB that does the lookups.
@@ -5576,16 +5840,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *LoBit = Builder.CreateTrunc(
Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
-
+ Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
+ Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
Builder.SetInsertPoint(LookupBB);
- AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
}
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
- SI->getDefaultDest()->removePredecessor(SI->getParent(),
+ SI->getDefaultDest()->removePredecessor(BB,
/*KeepOneInputPHIs=*/true);
+ Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
}
bool ReturnedEarly = false;
@@ -5622,19 +5888,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
PHI->addIncoming(Result, LookupBB);
}
- if (!ReturnedEarly)
+ if (!ReturnedEarly) {
Builder.CreateBr(CommonDest);
+ Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
+ }
// Remove the switch.
+ SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == SI->getDefaultDest())
continue;
- Succ->removePredecessor(SI->getParent());
+ Succ->removePredecessor(BB);
+ RemovedSuccessors.insert(Succ);
}
SI->eraseFromParent();
+ if (DTU) {
+ for (BasicBlock *RemovedSuccessor : RemovedSuccessors)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ DTU->applyUpdates(Updates);
+ }
+
++NumLookupTables;
if (NeedMask)
++NumLookupTablesHoles;
@@ -5770,10 +6046,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
return requestResimplify();
// Remove unreachable cases.
- if (eliminateDeadSwitchCases(SI, Options.AC, DL))
+ if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
return requestResimplify();
- if (switchToSelect(SI, Builder, DL, TTI))
+ if (switchToSelect(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
@@ -5785,7 +6061,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DL, TTI))
+ SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (ReduceSwitchRange(SI, Builder, DL, TTI))
@@ -5800,9 +6076,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
+ SmallSetVector<BasicBlock *, 8> RemovedSuccs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
+ if (!Dest->hasAddressTaken())
+ RemovedSuccs.insert(Dest);
Dest->removePredecessor(BB);
IBI->removeDestination(i);
--i;
@@ -5811,6 +6090,14 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
}
}
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(RemovedSuccs.size());
+ for (auto *RemovedSucc : RemovedSuccs)
+ Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
+ DTU->applyUpdates(Updates);
+ }
+
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
new UnreachableInst(IBI->getContext(), IBI);
@@ -5854,7 +6141,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
/// block when the inputs in the phi are the same for the two blocks being
/// merged. In some cases, this could result in removal of the PHI entirely.
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
- BasicBlock *BB) {
+ BasicBlock *BB, DomTreeUpdater *DTU) {
auto Succ = BB->getUniqueSuccessor();
assert(Succ);
// If there's a phi in the successor block, we'd likely have to introduce
@@ -5875,6 +6162,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
if (!BI2 || !BI2->isIdenticalTo(BI))
continue;
+ std::vector<DominatorTree::UpdateType> Updates;
+
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
SmallPtrSet<BasicBlock *, 16> Preds;
@@ -5884,6 +6173,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
"unexpected successor");
II->setUnwindDest(OtherPred);
+ Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
}
// The debug info in OtherPred doesn't cover the merged control flow that
@@ -5899,11 +6190,14 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
Succs.insert(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
Succ->removePredecessor(BB);
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
IRBuilder<> Builder(BI);
Builder.CreateUnreachable();
BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates(Updates);
return true;
}
return false;
@@ -5928,11 +6222,11 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// backedge, so we can eliminate BB.
bool NeedCanonicalLoop =
Options.NeedCanonicalLoop &&
- (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
- (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
+ (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
+ (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
- !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
return true;
// If the only instruction in the block is a seteq/setne comparison against a
@@ -5951,7 +6245,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
- if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
+ if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
return true;
}
@@ -5959,7 +6253,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
return requestResimplify();
return false;
}
@@ -6022,7 +6317,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ Options.BonusInstThreshold))
return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
@@ -6031,8 +6327,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, TTI))
- return requestResimplify();
+ if (HoistCommon && Options.HoistCommonInsts)
+ if (HoistThenElseCodeToIf(BI, TTI))
+ return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
@@ -6056,14 +6353,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL, Options.AC))
+ if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC))
return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI))
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
return requestResimplify();
// Look for diamond patterns.
@@ -6071,14 +6368,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI, DL, TTI))
+ if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();
return false;
}
/// Check if passing a value to an instruction will cause undefined behavior.
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
Constant *C = dyn_cast<Constant>(V);
if (!C)
return false;
@@ -6101,12 +6398,15 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Look through GEPs. A load from a GEP derived from NULL is still undefined
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
- if (GEP->getPointerOperand() == I)
- return passingValueIsAlwaysUndefined(V, GEP);
+ if (GEP->getPointerOperand() == I) {
+ if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
+ PtrValueMayBeModified = true;
+ return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
+ }
// Look through bitcasts.
if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
- return passingValueIsAlwaysUndefined(V, BC);
+ return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
@@ -6121,24 +6421,51 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
SI->getPointerAddressSpace())) &&
SI->getPointerOperand() == I;
- // A call to null is undefined.
- if (auto *CB = dyn_cast<CallBase>(Use))
- return !NullPointerIsDefined(CB->getFunction()) &&
- CB->getCalledOperand() == I;
+ if (auto *CB = dyn_cast<CallBase>(Use)) {
+ if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
+ return false;
+ // A call to null is undefined.
+ if (CB->getCalledOperand() == I)
+ return true;
+
+ if (C->isNullValue()) {
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) &&
+ CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ // Passing null to a nonnnull+noundef argument is undefined.
+ return !PtrValueMayBeModified;
+ }
+ }
+ } else if (isa<UndefValue>(C)) {
+ // Passing undef to a noundef argument is undefined.
+ for (const llvm::Use &Arg : CB->args())
+ if (Arg == I) {
+ unsigned ArgIdx = CB->getArgOperandNo(&Arg);
+ if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ // Passing undef to a noundef argument is undefined.
+ return true;
+ }
+ }
+ }
+ }
}
return false;
}
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
-static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
+ DomTreeUpdater *DTU) {
for (PHINode &PHI : BB->phis())
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
- Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
+ BasicBlock *Predecessor = PHI.getIncomingBlock(i);
+ Instruction *T = Predecessor->getTerminator();
IRBuilder<> Builder(T);
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
- BB->removePredecessor(PHI.getIncomingBlock(i));
+ BB->removePredecessor(Predecessor);
// Turn uncoditional branches into unreachables and remove the dead
// destination from conditional branches.
if (BI->isUnconditional())
@@ -6147,6 +6474,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
: BI->getSuccessor(0));
BI->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
return true;
}
// TODO: SwitchInst.
@@ -6155,7 +6484,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
return false;
}
-bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6166,28 +6495,29 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
- DeleteDeadBlock(BB);
+ DeleteDeadBlock(BB, DTU);
return true;
}
// Check to see if we can constant propagate this terminator instruction
// away...
- Changed |= ConstantFoldTerminator(BB, true);
+ Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
+ /*TLI=*/nullptr, DTU);
// Check for and eliminate duplicate PHI nodes in this block.
Changed |= EliminateDuplicatePHINodes(BB);
// Check for and remove branches that will always cause undefined behavior.
- Changed |= removeUndefIntroducingPredecessor(BB);
+ Changed |= removeUndefIntroducingPredecessor(BB, DTU);
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
- if (MergeBlockIntoPredecessor(BB))
+ if (MergeBlockIntoPredecessor(BB, DTU))
return true;
if (SinkCommon && Options.SinkCommonInsts)
- Changed |= SinkCommonCodeFromPredecessors(BB);
+ Changed |= SinkCommonCodeFromPredecessors(BB, DTU);
IRBuilder<> Builder(BB);
@@ -6196,7 +6526,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL);
}
Instruction *Terminator = BB->getTerminator();
@@ -6228,7 +6558,23 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
return Changed;
}
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
+ bool Changed = simplifyOnceImpl(BB);
+
+ assert((!RequireAndPreserveDomTree ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Failed to maintain validity of domtree!");
+
+ return Changed;
+}
+
bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ assert((!RequireAndPreserveDomTree ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
bool Changed = false;
// Repeated simplify BB as long as resimplification is requested.
@@ -6244,9 +6590,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
}
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const SimplifyCFGOptions &Options,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
- return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
- Options)
+ DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
+ ArrayRef<WeakVH> LoopHeaders) {
+ return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr,
+ BB->getModule()->getDataLayout(), LoopHeaders, Options)
.run(BB);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d3d0c3341908..290c04a7ad10 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -191,15 +191,15 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
- ICmpInst::Predicate InvariantPredicate;
- const SCEV *InvariantLHS, *InvariantRHS;
-
auto *PN = dyn_cast<PHINode>(IVOperand);
if (!PN)
return false;
- if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
- InvariantLHS, InvariantRHS))
+ auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L);
+ if (!LIP)
return false;
+ ICmpInst::Predicate InvariantPredicate = LIP->Pred;
+ const SCEV *InvariantLHS = LIP->LHS;
+ const SCEV *InvariantRHS = LIP->RHS;
// Rewrite the comparison to a loop invariant comparison if it can be done
// cheaply, where cheaply means "we don't need to emit any new
@@ -477,6 +477,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
if (WO->use_empty())
WO->eraseFromParent();
+ Changed = true;
return true;
}
@@ -967,3 +968,1122 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
}
} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+// Widen Induction Variables - Extend the width of an IV to cover its
+// widest uses.
+//===----------------------------------------------------------------------===//
+
+class WidenIV {
+ // Parameters
+ PHINode *OrigPhi;
+ Type *WideType;
+
+ // Context
+ LoopInfo *LI;
+ Loop *L;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ // Does the module have any calls to the llvm.experimental.guard intrinsic
+ // at all? If not we can avoid scanning instructions looking for guards.
+ bool HasGuards;
+
+ bool UsePostIncrementRanges;
+
+ // Statistics
+ unsigned NumElimExt = 0;
+ unsigned NumWidened = 0;
+
+ // Result
+ PHINode *WidePhi = nullptr;
+ Instruction *WideInc = nullptr;
+ const SCEV *WideIncExpr = nullptr;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
+
+ SmallPtrSet<Instruction *,16> Widened;
+
+ enum ExtendKind { ZeroExtended, SignExtended, Unknown };
+
+ // A map tracking the kind of extension used to widen each narrow IV
+ // and narrow IV user.
+ // Key: pointer to a narrow IV or IV user.
+ // Value: the kind of extension used to widen this Instruction.
+ DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
+
+ using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
+
+ // A map with control-dependent ranges for post increment IV uses. The key is
+ // a pair of IV def and a use of this def denoting the context. The value is
+ // a ConstantRange representing possible values of the def at the given
+ // context.
+ DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
+
+ Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
+ Instruction *UseI) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ return It == PostIncRangeInfos.end()
+ ? Optional<ConstantRange>(None)
+ : Optional<ConstantRange>(It->second);
+ }
+
+ void calculatePostIncRanges(PHINode *OrigPhi);
+ void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
+
+ void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
+ DefUserPair Key(Def, UseI);
+ auto It = PostIncRangeInfos.find(Key);
+ if (It == PostIncRangeInfos.end())
+ PostIncRangeInfos.insert({Key, R});
+ else
+ It->second = R.intersectWith(It->second);
+ }
+
+public:
+ /// Record a link in the Narrow IV def-use chain along with the WideIV that
+ /// computes the same value as the Narrow IV def. This avoids caching Use*
+ /// pointers.
+ struct NarrowIVDefUse {
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
+ };
+
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges = true);
+
+ PHINode *createWideIV(SCEVExpander &Rewriter);
+
+ unsigned getNumElimExt() { return NumElimExt; };
+ unsigned getNumWidened() { return NumWidened; };
+
+protected:
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
+
+ ExtendKind getExtendKind(Instruction *I);
+
+ using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
+
+ WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
+
+ WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
+
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const;
+
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+
+ bool widenLoopCompare(NarrowIVDefUse DU);
+ bool widenWithVariantUse(NarrowIVDefUse DU);
+
+ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
+
+private:
+ SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
+};
+
+
+/// Determine the insertion point for this user. By default, insert immediately
+/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
+/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
+static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
+ DominatorTree *DT, LoopInfo *LI) {
+ PHINode *PHI = dyn_cast<PHINode>(User);
+ if (!PHI)
+ return User;
+
+ Instruction *InsertPt = nullptr;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ if (PHI->getIncomingValue(i) != Def)
+ continue;
+
+ BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+ if (!DT->isReachableFromEntry(InsertBB))
+ continue;
+
+ if (!InsertPt) {
+ InsertPt = InsertBB->getTerminator();
+ continue;
+ }
+ InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
+ InsertPt = InsertBB->getTerminator();
+ }
+
+ // If we have skipped all inputs, it means that Def only comes to Phi from
+ // unreachable blocks.
+ if (!InsertPt)
+ return nullptr;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
+}
+
+WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards, bool UsePostIncrementRanges)
+ : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+ HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
+ DeadInsts(DI) {
+ assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+ ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
+}
+
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
+}
+
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ unsigned Opcode = DU.NarrowUse->getOpcode();
+ switch (Opcode) {
+ default:
+ return nullptr;
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse =
+ getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
+ }
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
+ auto It = ExtendKindMap.find(I);
+ assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
+ return It->second;
+}
+
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const {
+ switch (OpCode) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Sub:
+ return SE->getMinusSCEV(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ case Instruction::UDiv:
+ return SE->getUDivExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unsupported opcode.");
+ };
+}
+
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return the AddRec and the kind of
+/// extension used.
+WidenIV::WidenedRecTy
+WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = DU.NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions supported yet.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return {nullptr, Unknown};
+
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ const unsigned ExtendOperIdx =
+ DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = nullptr;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
+ if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return {nullptr, Unknown};
+
+ // When creating this SCEV expr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
+ const SCEV *lhs = SE->getSCEV(DU.WideDef);
+ const SCEV *rhs = ExtendOperExpr;
+
+ // Let's swap operands to the initial order for the case of non-commutative
+ // operations, like SUB. See PR21014.
+ if (ExtendOperIdx == 0)
+ std::swap(lhs, rhs);
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
+
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, Unknown};
+
+ return {AddRec, ExtKind};
+}
+
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the extended recurrence and the kind of extension used. Otherwise
+/// return {nullptr, Unknown}.
+WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
+ if (!SE->isSCEVable(DU.NarrowUse->getType()))
+ return {nullptr, Unknown};
+
+ const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
+ if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
+ SE->getTypeSizeInBits(WideType)) {
+ // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+ // index. So don't follow this use.
+ return {nullptr, Unknown};
+ }
+
+ const SCEV *WideExpr;
+ ExtendKind ExtKind;
+ if (DU.NeverNegative) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ if (isa<SCEVAddRecExpr>(WideExpr))
+ ExtKind = SignExtended;
+ else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ZeroExtended;
+ }
+ } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
+ WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+ ExtKind = SignExtended;
+ } else {
+ WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+ ExtKind = ZeroExtended;
+ }
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return {nullptr, Unknown};
+ return {AddRec, ExtKind};
+}
+
+/// This IV user cannot be widened. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
+ LoopInfo *LI) {
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return;
+ LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
+ << *DU.NarrowUse << "\n");
+ IRBuilder<> Builder(InsertPt);
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
+/// If the narrow use is a compare instruction, then widen the compare
+// (and possibly the other operand). The extend operation is hoisted into the
+// loop preheader as far as possible.
+bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
+ if (!Cmp)
+ return false;
+
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+ bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
+ return false;
+
+ Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
+ unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
+
+ // Widen the compare instruction.
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return false;
+ IRBuilder<> Builder(InsertPt);
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+
+ // Widen the other operand of the compare, if necessary.
+ if (CastWidth < IVWidth) {
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
+ DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
+ }
+ return true;
+}
+
+// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
+// will not work when:
+// 1) SCEV traces back to an instruction inside the loop that SCEV can not
+// expand, eg. add %indvar, (load %addr)
+// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
+// While SCEV fails to avoid trunc, we can still try to use instruction
+// combining approach to prove trunc is not required. This can be further
+// extended with other instruction combining checks, but for now we handle the
+// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
+//
+// Src:
+// %c = sub nsw %b, %indvar
+// %d = sext %c to i64
+// Dst:
+// %indvar.ext1 = sext %indvar to i64
+// %m = sext %b to i64
+// %d = sub nsw i64 %m, %indvar.ext1
+// Therefore, as long as the result of add/sub/mul is extended to wide type, no
+// trunc is required regardless of how %b is generated. This pattern is common
+// when calculating address in 64 bit architecture
+bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ // Handle the common case of add<nsw/nuw>
+ const unsigned OpCode = NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions are supported.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
+ return false;
+
+ // The operand that is not defined by NarrowDef of DU. Let's call it the
+ // other operand.
+ assert((NarrowUse->getOperand(0) == NarrowDef ||
+ NarrowUse->getOperand(1) == NarrowDef) &&
+ "bad DU");
+
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(NarrowUse);
+ ExtendKind ExtKind = getExtendKind(NarrowDef);
+ bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
+ bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
+ auto AnotherOpExtKind = ExtKind;
+
+ // Check that all uses are either:
+ // - narrow def (in case of we are widening the IV increment);
+ // - single-input LCSSA Phis;
+ // - comparison of the chosen type;
+ // - extend of the chosen type (raison d'etre).
+ SmallVector<Instruction *, 4> ExtUsers;
+ SmallVector<PHINode *, 4> LCSSAPhiUsers;
+ SmallVector<ICmpInst *, 4> ICmpUsers;
+ for (Use &U : NarrowUse->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (User == NarrowDef)
+ continue;
+ if (!L->contains(User)) {
+ auto *LCSSAPhi = cast<PHINode>(User);
+ // Make sure there is only 1 input, so that we don't have to split
+ // critical edges.
+ if (LCSSAPhi->getNumOperands() != 1)
+ return false;
+ LCSSAPhiUsers.push_back(LCSSAPhi);
+ continue;
+ }
+ if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
+ auto Pred = ICmp->getPredicate();
+ // We have 3 types of predicates: signed, unsigned and equality
+ // predicates. For equality, it's legal to widen icmp for either sign and
+ // zero extend. For sign extend, we can also do so for signed predicates,
+ // likeweise for zero extend we can widen icmp for unsigned predicates.
+ if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
+ return false;
+ if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
+ return false;
+ ICmpUsers.push_back(ICmp);
+ continue;
+ }
+ if (ExtKind == SignExtended)
+ User = dyn_cast<SExtInst>(User);
+ else
+ User = dyn_cast<ZExtInst>(User);
+ if (!User || User->getType() != WideType)
+ return false;
+ ExtUsers.push_back(User);
+ }
+ if (ExtUsers.empty()) {
+ DeadInsts.emplace_back(NarrowUse);
+ return true;
+ }
+
+ // We'll prove some facts that should be true in the context of ext users. If
+ // there is no users, we are done now. If there are some, pick their common
+ // dominator as context.
+ Instruction *Context = nullptr;
+ for (auto *Ext : ExtUsers) {
+ if (!Context || DT->dominates(Ext, Context))
+ Context = Ext;
+ else if (!DT->dominates(Context, Ext))
+ // For users that don't have dominance relation, use common dominator.
+ Context =
+ DT->findNearestCommonDominator(Context->getParent(), Ext->getParent())
+ ->getTerminator();
+ }
+ assert(Context && "Context not found?");
+
+ if (!CanSignExtend && !CanZeroExtend) {
+ // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
+ // will most likely not see it. Let's try to prove it.
+ if (OpCode != Instruction::Add)
+ return false;
+ if (ExtKind != ZeroExtended)
+ return false;
+ const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
+ // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
+ if (NarrowUse->getOperand(0) != NarrowDef)
+ return false;
+ if (!SE->isKnownNegative(RHS))
+ return false;
+ bool ProvedSubNUW = SE->isKnownPredicateAt(
+ ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context);
+ if (!ProvedSubNUW)
+ return false;
+ // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
+ // neg(zext(neg(op))), which is basically sext(op).
+ AnotherOpExtKind = SignExtended;
+ }
+
+ // Verifying that Defining operand is an AddRec
+ const SCEV *Op1 = SE->getSCEV(WideDef);
+ const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
+ if (!AddRecOp1 || AddRecOp1->getLoop() != L)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ // Generating a widening use instruction.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ AnotherOpExtKind, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ AnotherOpExtKind, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ ExtendKindMap[NarrowUse] = ExtKind;
+
+ for (Instruction *User : ExtUsers) {
+ assert(User->getType() == WideType && "Checked before!");
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (PHINode *User : LCSSAPhiUsers) {
+ assert(User->getNumOperands() == 1 && "Checked before!");
+ Builder.SetInsertPoint(User);
+ auto *WidePN =
+ Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
+ BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
+ assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
+ "Not a LCSSA Phi?");
+ WidePN->addIncoming(WideBO, LoopExitingBlock);
+ Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt());
+ auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
+ User->replaceAllUsesWith(TruncPN);
+ DeadInsts.emplace_back(User);
+ }
+
+ for (ICmpInst *User : ICmpUsers) {
+ Builder.SetInsertPoint(User);
+ auto ExtendedOp = [&](Value * V)->Value * {
+ if (V == NarrowUse)
+ return WideBO;
+ if (ExtKind == ZeroExtended)
+ return Builder.CreateZExt(V, WideBO->getType());
+ else
+ return Builder.CreateSExt(V, WideBO->getType());
+ };
+ auto Pred = User->getPredicate();
+ auto *LHS = ExtendedOp(User->getOperand(0));
+ auto *RHS = ExtendedOp(User->getOperand(1));
+ auto *WideCmp =
+ Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
+ User->replaceAllUsesWith(WideCmp);
+ DeadInsts.emplace_back(User);
+ }
+
+ return true;
+}
+
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+ assert(ExtendKindMap.count(DU.NarrowDef) &&
+ "Should already know the kind of extension used to widen NarrowDef");
+
+ // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+ if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+ if (LI->getLoopFor(UsePhi->getParent()) != L) {
+ // For LCSSA phis, sink the truncate outside the loop.
+ // After SimplifyCFG most loop exit targets have a single predecessor.
+ // Otherwise fall back to a truncate within the loop.
+ if (UsePhi->getNumOperands() != 1)
+ truncateIVUse(DU, DT, LI);
+ else {
+ // Widening the PHI requires us to insert a trunc. The logical place
+ // for this trunc is in the same BB as the PHI. This is not possible if
+ // the BB is terminated by a catchswitch.
+ if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
+ return nullptr;
+
+ PHINode *WidePhi =
+ PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+ UsePhi);
+ WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ UsePhi->replaceAllUsesWith(Trunc);
+ DeadInsts.emplace_back(UsePhi);
+ LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
+ << *WidePhi << "\n");
+ }
+ return nullptr;
+ }
+ }
+
+ // This narrow use can be widened by a sext if it's non-negative or its narrow
+ // def was widended by a sext. Same for zext.
+ auto canWidenBySExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
+ };
+ auto canWidenByZExt = [&]() {
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
+ };
+
+ // Our raison d'etre! Eliminate sign and zero extension.
+ if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
+ (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
+ Value *NewDef = DU.WideDef;
+ if (DU.NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(DU.NarrowUse);
+ NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *DU.NarrowUse
+ << "\n");
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+ NewDef = DU.NarrowUse;
+ }
+ }
+ if (NewDef != DU.NarrowUse) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
+ << " replaced by " << *DU.WideDef << "\n");
+ ++NumElimExt;
+ DU.NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.emplace_back(DU.NarrowUse);
+ }
+ // Now that the extend is gone, we want to expose it's uses for potential
+ // further simplification. We don't need to directly inform SimplifyIVUsers
+ // of the new users, because their parent IV will be processed later as a
+ // new loop phi. If we preserved IVUsers analysis, we would also want to
+ // push the uses of WideDef here.
+
+ // No further widening is needed. The deceased [sz]ext had done it for us.
+ return nullptr;
+ }
+
+ // Does this user itself evaluate to a recurrence after widening?
+ WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+ if (!WideAddRec.first)
+ WideAddRec = getWideRecurrence(DU);
+
+ assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
+ if (!WideAddRec.first) {
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (widenLoopCompare(DU))
+ return nullptr;
+
+ // We are here about to generate a truncate instruction that may hurt
+ // performance because the scalar evolution expression computed earlier
+ // in WideAddRec.first does not indicate a polynomial induction expression.
+ // In that case, look at the operands of the use instruction to determine
+ // if we can still widen the use instead of truncating its operand.
+ if (widenWithVariantUse(DU))
+ return nullptr;
+
+ // This user does not evaluate to a recurrence after widening, so don't
+ // follow it. Instead insert a Trunc to kill off the original use,
+ // eventually isolating the original narrow IV so it can be removed.
+ truncateIVUse(DU, DT, LI);
+ return nullptr;
+ }
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
+ // insert a Trunc after a terminator if there happens to be a critical edge.
+ assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
+ "SCEV is not expected to evaluate a block terminator");
+
+ // Reuse the IV increment that SCEVExpander created as long as it dominates
+ // NarrowUse.
+ Instruction *WideUse = nullptr;
+ if (WideAddRec.first == WideIncExpr &&
+ Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+ WideUse = WideInc;
+ else {
+ WideUse = cloneIVUser(DU, WideAddRec.first);
+ if (!WideUse)
+ return nullptr;
+ }
+ // Evaluation of WideAddRec ensured that the narrow expression could be
+ // extended outside the loop without overflow. This suggests that the wide use
+ // evaluates to the same expression as the extended narrow use, but doesn't
+ // absolutely guarantee it. Hence the following failsafe check. In rare cases
+ // where it fails, we simply throw away the newly created wide use.
+ if (WideAddRec.first != SE->getSCEV(WideUse)) {
+ LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
+ << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
+ << "\n");
+ DeadInsts.emplace_back(WideUse);
+ return nullptr;
+ }
+
+ // if we reached this point then we are going to replace
+ // DU.NarrowUse with WideUse. Reattach DbgValue then.
+ replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
+
+ ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
+ // Returning WideUse pushes it on the worklist.
+ return WideUse;
+}
+
+/// Add eligible users of NarrowDef to NarrowIVUsers.
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NonNegativeDef =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getZero(NarrowSCEV->getType()));
+ for (User *U : NarrowDef->users()) {
+ Instruction *NarrowUser = cast<Instruction>(U);
+
+ // Handle data flow merges and bizarre phi cycles.
+ if (!Widened.insert(NarrowUser).second)
+ continue;
+
+ bool NonNegativeUse = false;
+ if (!NonNegativeDef) {
+ // We might have a control-dependent range information for this context.
+ if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
+ NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
+ }
+
+ NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
+ NonNegativeDef || NonNegativeUse);
+ }
+}
+
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
+///
+/// It would be simpler to delete uses as they are processed, but we must avoid
+/// invalidating SCEV expressions.
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
+ // Is this phi an induction variable?
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
+ if (!AddRec)
+ return nullptr;
+
+ // Widen the induction variable expression.
+ const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
+ ? SE->getSignExtendExpr(AddRec, WideType)
+ : SE->getZeroExtendExpr(AddRec, WideType);
+
+ assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
+ "Expect the new IV expression to preserve its type");
+
+ // Can the IV be extended outside the loop without overflow?
+ AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return nullptr;
+
+ // An AddRec must have loop-invariant operands. Since this AddRec is
+ // materialized by a loop header phi, the expression cannot have any post-loop
+ // operands, so they must dominate the loop header.
+ assert(
+ SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
+ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
+ "Loop header phi recurrence inputs do not dominate the loop");
+
+ // Iterate over IV uses (including transitive ones) looking for IV increments
+ // of the form 'add nsw %iv, <const>'. For each increment and each use of
+ // the increment calculate control-dependent range information basing on
+ // dominating conditions inside of the loop (e.g. a range check inside of the
+ // loop). Calculated ranges are stored in PostIncRangeInfos map.
+ //
+ // Control-dependent range information is later used to prove that a narrow
+ // definition is not negative (see pushNarrowIVUsers). It's difficult to do
+ // this on demand because when pushNarrowIVUsers needs this information some
+ // of the dominating conditions might be already widened.
+ if (UsePostIncrementRanges)
+ calculatePostIncRanges(OrigPhi);
+
+ // The rewriter provides a value for the desired IV expression. This may
+ // either find an existing phi or materialize a new one. Either way, we
+ // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
+ // of the phi-SCC dominates the loop entry.
+ Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
+ // If the wide phi is not a phi node, for example a cast node, like bitcast,
+ // inttoptr, ptrtoint, just skip for now.
+ if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
+ // if the cast node is an inserted instruction without any user, we should
+ // remove it to make sure the pass don't touch the function as we can not
+ // wide the phi.
+ if (ExpandInst->hasNUses(0) &&
+ Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
+ DeadInsts.emplace_back(ExpandInst);
+ return nullptr;
+ }
+
+ // Remembering the WideIV increment generated by SCEVExpander allows
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // employ a general reuse mechanism because the call above is the only call to
+ // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ WideInc =
+ cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
+ WideIncExpr = SE->getSCEV(WideInc);
+ // Propagate the debug location associated with the original loop increment
+ // to the new (widened) increment.
+ auto *OrigInc =
+ cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+ WideInc->setDebugLoc(OrigInc->getDebugLoc());
+ }
+
+ LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
+ ++NumWidened;
+
+ // Traverse the def-use chain using a worklist starting at the original IV.
+ assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+ Widened.insert(OrigPhi);
+ pushNarrowIVUsers(OrigPhi, WidePhi);
+
+ while (!NarrowIVUsers.empty()) {
+ WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
+
+ // Process a def-use edge. This may replace the use, so don't hold a
+ // use_iterator across it.
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
+
+ // Follow all def-use edges from the previous narrow use.
+ if (WideUse)
+ pushNarrowIVUsers(DU.NarrowUse, WideUse);
+
+ // widenIVUse may have removed the def-use edge.
+ if (DU.NarrowDef->use_empty())
+ DeadInsts.emplace_back(DU.NarrowDef);
+ }
+
+ // Attach any debug information to the new PHI.
+ replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
+
+ return WidePhi;
+}
+
+/// Calculates control-dependent range for the given def at the given context
+/// by looking at dominating conditions inside of the loop
+void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
+ Instruction *NarrowUser) {
+ using namespace llvm::PatternMatch;
+
+ Value *NarrowDefLHS;
+ const APInt *NarrowDefRHS;
+ if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
+ m_APInt(NarrowDefRHS))) ||
+ !NarrowDefRHS->isNonNegative())
+ return;
+
+ auto UpdateRangeFromCondition = [&] (Value *Condition,
+ bool TrueDest) {
+ CmpInst::Predicate Pred;
+ Value *CmpRHS;
+ if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
+ m_Value(CmpRHS))))
+ return;
+
+ CmpInst::Predicate P =
+ TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
+
+ auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
+ auto CmpConstrainedLHSRange =
+ ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
+ auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
+ *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
+
+ updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
+ };
+
+ auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+ if (!HasGuards)
+ return;
+
+ for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+ Ctx->getParent()->rend())) {
+ Value *C = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+ UpdateRangeFromCondition(C, /*TrueDest=*/true);
+ }
+ };
+
+ UpdateRangeFromGuards(NarrowUser);
+
+ BasicBlock *NarrowUserBB = NarrowUser->getParent();
+ // If NarrowUserBB is statically unreachable asking dominator queries may
+ // yield surprising results. (e.g. the block may not have a dom tree node)
+ if (!DT->isReachableFromEntry(NarrowUserBB))
+ return;
+
+ for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
+ L->contains(DTB->getBlock());
+ DTB = DTB->getIDom()) {
+ auto *BB = DTB->getBlock();
+ auto *TI = BB->getTerminator();
+ UpdateRangeFromGuards(TI);
+
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ continue;
+
+ auto *TrueSuccessor = BI->getSuccessor(0);
+ auto *FalseSuccessor = BI->getSuccessor(1);
+
+ auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
+ return BBE.isSingleEdge() &&
+ DT->dominates(BBE, NarrowUser->getParent());
+ };
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
+
+ if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
+ UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
+ }
+}
+
+/// Calculates PostIncRangeInfos map for the given IV
+void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 6> Worklist;
+ Worklist.push_back(OrigPhi);
+ Visited.insert(OrigPhi);
+
+ while (!Worklist.empty()) {
+ Instruction *NarrowDef = Worklist.pop_back_val();
+
+ for (Use &U : NarrowDef->uses()) {
+ auto *NarrowUser = cast<Instruction>(U.getUser());
+
+ // Don't go looking outside the current loop.
+ auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
+ if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
+ continue;
+
+ if (!Visited.insert(NarrowUser).second)
+ continue;
+
+ Worklist.push_back(NarrowUser);
+
+ calculatePostIncRange(NarrowDef, NarrowUser);
+ }
+ }
+}
+
+PHINode *llvm::createWideIV(const WideIVInfo &WI,
+ LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
+ DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+ unsigned &NumElimExt, unsigned &NumWidened,
+ bool HasGuards, bool UsePostIncrementRanges) {
+ WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
+ PHINode *WidePHI = Widener.createWideIV(Rewriter);
+ NumElimExt = Widener.getNumElimExt();
+ NumWidened = Widener.getNumWidened();
+ return WidePHI;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cfcc3454a210..f9a9dd237b6c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/CaptureTracking.h"
@@ -542,6 +541,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -569,6 +570,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return DstEnd;
}
@@ -609,15 +612,27 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
return Dst;
}
- // Let strncpy handle the zero padding
- if (Len > SrcLen + 1)
- return nullptr;
+ // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4)
+ if (Len > SrcLen + 1) {
+ if (Len <= 128) {
+ StringRef Str;
+ if (!getConstantStringInfo(Src, Str))
+ return nullptr;
+ std::string SrcStr = Str.str();
+ SrcStr.resize(Len, '\0');
+ Src = B.CreateGlobalString(SrcStr, "str");
+ } else {
+ return nullptr;
+ }
+ }
Type *PT = Callee->getFunctionType()->getParamType(0);
// strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -684,8 +699,6 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
Offset);
}
}
-
- return nullptr;
}
// strlen(x?"foo":"bars") --> x ? 3 : 4
@@ -1095,6 +1108,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1143,7 +1158,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
// mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
CallInst *NewCI =
B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
+ // Propagate attributes, but memcpy has no return value, so make sure that
+ // any return attributes are compliant.
+ // TODO: Attach return value attributes to the 1st operand to preserve them?
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1157,6 +1177,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1217,6 +1239,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1629,6 +1653,14 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
return nullptr;
+ // If we have a pow() library call (accesses memory) and we can't guarantee
+ // that the base is not an infinity, give up:
+ // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
+ // errno), but sqrt(-Inf) is required by various standards to set errno.
+ if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
+ !isKnownNeverInfinity(Base, TLI))
+ return nullptr;
+
Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
if (!Sqrt)
return nullptr;
@@ -1715,7 +1747,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
// pow(x, n) -> x * x * x * ...
const APFloat *ExpoF;
- if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
+ !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
// If the exponent is an integer+0.5 we generate a call to sqrt and an
// additional fmul.
@@ -1741,6 +1774,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
Pow->doesNotAccessMemory(), M, B, TLI);
+ if (!Sqrt)
+ return nullptr;
}
// We will memoize intermediate products of the Addition Chain.
@@ -2164,7 +2199,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
- if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ if (SinCalls.empty() || CosCalls.empty())
return nullptr;
Value *Sin, *Cos, *SinCos;
@@ -2190,7 +2225,7 @@ void LibCallSimplifier::classifyArgUse(
SmallVectorImpl<CallInst *> &SinCosCalls) {
CallInst *CI = dyn_cast<CallInst>(Val);
- if (!CI)
+ if (!CI || CI->use_empty())
return;
// Don't consider calls in other functions.
@@ -2487,6 +2522,30 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
+ if (CI->use_empty())
+ // sprintf(dest, "%s", str) -> strcpy(dest, str)
+ return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI);
+
+ uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
+ if (SrcLen) {
+ B.CreateMemCpy(
+ CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
+ // Returns total number of characters written without null-character.
+ return ConstantInt::get(CI->getType(), SrcLen - 1);
+ } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ B, TLI)) {
+ // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
+ Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0));
+ return B.CreateIntCast(PtrDiff, CI->getType(), false);
+ }
+
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ if (OptForSize)
+ return nullptr;
+
Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
return nullptr;
@@ -3219,6 +3278,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3231,6 +3292,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3245,11 +3308,29 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
}
+Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
+ IRBuilderBase &B) {
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ if (isFortifiedCallFoldable(CI, 3, 2))
+ if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, DL, TLI)) {
+ CallInst *NewCI = cast<CallInst>(Call);
+ NewCI->setAttributes(CI->getAttributes());
+ NewCI->removeAttributes(
+ AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCI->getType()));
+ return NewCI;
+ }
+ return nullptr;
+}
+
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilderBase &B,
LibFunc Func) {
@@ -3330,7 +3411,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(4), VariadicArgs, B, TLI);
}
@@ -3341,7 +3422,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+ SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
B, TLI);
}
@@ -3439,6 +3520,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
switch (Func) {
case LibFunc_memcpy_chk:
return optimizeMemCpyChk(CI, Builder);
+ case LibFunc_mempcpy_chk:
+ return optimizeMemPCpyChk(CI, Builder);
case LibFunc_memmove_chk:
return optimizeMemMoveChk(CI, Builder);
case LibFunc_memset_chk:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
index e257c5a015f5..beeb60698f04 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -43,11 +43,6 @@ cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO(
cl::desc("Apply the profile guided size optimizations only "
"to cold code under partial-profile sample PGO."));
-cl::opt<bool> PGSOIRPassOrTestOnly(
- "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
- cl::desc("Apply the profile guided size optimizations only"
- "to the IR passes or tests."));
-
cl::opt<bool> ForcePGSO(
"force-pgso", cl::Hidden, cl::init(false),
cl::desc("Force the (profiled-guided) size optimizations. "));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index b559811d120b..1fa574f04c37 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -13,6 +13,7 @@
// present.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/StripGCRelocates.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -24,22 +25,7 @@
using namespace llvm;
-namespace {
-struct StripGCRelocates : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- StripGCRelocates() : FunctionPass(ID) {
- initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {}
-
- bool runOnFunction(Function &F) override;
-
-};
-char StripGCRelocates::ID = 0;
-}
-
-bool StripGCRelocates::runOnFunction(Function &F) {
+static bool stripGCRelocates(Function &F) {
// Nothing to do for declarations.
if (F.isDeclaration())
return false;
@@ -71,6 +57,32 @@ bool StripGCRelocates::runOnFunction(Function &F) {
return !GCRelocates.empty();
}
-INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
+PreservedAnalyses StripGCRelocates::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!stripGCRelocates(F))
+ return PreservedAnalyses::all();
+
+ // Removing gc.relocate preserves the CFG, but most other analysis probably
+ // need to re-run.
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+struct StripGCRelocatesLegacy : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StripGCRelocatesLegacy() : FunctionPass(ID) {
+ initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {}
+
+ bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); }
+};
+char StripGCRelocatesLegacy::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates",
"Strip gc.relocates inserted through RewriteStatepointsForGC",
true, false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 21cbbfb140b6..10fda4df51ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -17,10 +18,11 @@ namespace {
/// This pass strips all debug info that is not related line tables.
/// The result will be the same as if the program where compiled with
/// -gline-tables-only.
-struct StripNonLineTableDebugInfo : public ModulePass {
+struct StripNonLineTableDebugLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- StripNonLineTableDebugInfo() : ModulePass(ID) {
- initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry());
+ StripNonLineTableDebugLegacyPass() : ModulePass(ID) {
+ initializeStripNonLineTableDebugLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -33,10 +35,17 @@ struct StripNonLineTableDebugInfo : public ModulePass {
};
}
-char StripNonLineTableDebugInfo::ID = 0;
-INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo",
+char StripNonLineTableDebugLegacyPass::ID = 0;
+INITIALIZE_PASS(StripNonLineTableDebugLegacyPass,
+ "strip-nonlinetable-debuginfo",
"Strip all debug info except linetables", false, false)
-ModulePass *llvm::createStripNonLineTableDebugInfoPass() {
- return new StripNonLineTableDebugInfo();
+ModulePass *llvm::createStripNonLineTableDebugLegacyPass() {
+ return new StripNonLineTableDebugLegacyPass();
+}
+
+PreservedAnalyses
+StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) {
+ llvm::stripNonLineTableDebugInfo(M);
+ return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 9af39d9a0dd1..3631733713ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -6,10 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass is used to ensure that functions have at most one return
-// instruction in them. Additionally, it keeps track of which node is the new
-// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
-// method will return a null pointer.
+// This pass is used to ensure that functions have at most one return and one
+// unreachable instruction in them.
//
//===----------------------------------------------------------------------===//
@@ -22,73 +20,66 @@
#include "llvm/Transforms/Utils.h"
using namespace llvm;
-char UnifyFunctionExitNodes::ID = 0;
+char UnifyFunctionExitNodesLegacyPass::ID = 0;
-UnifyFunctionExitNodes::UnifyFunctionExitNodes() : FunctionPass(ID) {
- initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass()
+ : FunctionPass(ID) {
+ initializeUnifyFunctionExitNodesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
-INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn",
"Unify function exit nodes", false, false)
Pass *llvm::createUnifyFunctionExitNodesPass() {
- return new UnifyFunctionExitNodes();
+ return new UnifyFunctionExitNodesLegacyPass();
}
-void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
AU.addPreservedID(LowerSwitchID);
}
-// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
-// BasicBlock, and converting all returns to unconditional branches to this
-// new basic block. The singular exit node is returned.
-//
-// If there are no return stmts in the Function, a null pointer is returned.
-//
-bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
- // Loop over all of the blocks in a function, tracking all of the blocks that
- // return.
- //
- std::vector<BasicBlock*> ReturningBlocks;
- std::vector<BasicBlock*> UnreachableBlocks;
+namespace {
+
+bool unifyUnreachableBlocks(Function &F) {
+ std::vector<BasicBlock *> UnreachableBlocks;
+
for (BasicBlock &I : F)
- if (isa<ReturnInst>(I.getTerminator()))
- ReturningBlocks.push_back(&I);
- else if (isa<UnreachableInst>(I.getTerminator()))
+ if (isa<UnreachableInst>(I.getTerminator()))
UnreachableBlocks.push_back(&I);
- // Then unreachable blocks.
- if (UnreachableBlocks.empty()) {
- UnreachableBlock = nullptr;
- } else if (UnreachableBlocks.size() == 1) {
- UnreachableBlock = UnreachableBlocks.front();
- } else {
- UnreachableBlock = BasicBlock::Create(F.getContext(),
- "UnifiedUnreachableBlock", &F);
- new UnreachableInst(F.getContext(), UnreachableBlock);
-
- for (BasicBlock *BB : UnreachableBlocks) {
- BB->getInstList().pop_back(); // Remove the unreachable inst.
- BranchInst::Create(UnreachableBlock, BB);
- }
+ if (UnreachableBlocks.size() <= 1)
+ return false;
+
+ BasicBlock *UnreachableBlock =
+ BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
}
- // Now handle return blocks.
- if (ReturningBlocks.empty()) {
- ReturnBlock = nullptr;
- return false; // No blocks return
- } else if (ReturningBlocks.size() == 1) {
- ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return true;
+}
+
+bool unifyReturnBlocks(Function &F) {
+ std::vector<BasicBlock *> ReturningBlocks;
+
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+
+ if (ReturningBlocks.size() <= 1)
return false;
- }
- // Otherwise, we need to insert a new basic block into the function, add a PHI
- // nodes (if the function returns values), and convert all of the return
- // instructions into unconditional branches.
- //
+ // Insert a new basic block into the function, add PHI nodes (if the function
+ // returns values), and convert all of the return instructions into
+ // unconditional branches.
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
"UnifiedReturnBlock", &F);
@@ -105,7 +96,6 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Loop over all of the blocks, replacing the return instruction with an
// unconditional branch.
- //
for (BasicBlock *BB : ReturningBlocks) {
// Add an incoming element to the PHI node for every return instruction that
// is merging into this new block...
@@ -115,6 +105,25 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
BB->getInstList().pop_back(); // Remove the return insn
BranchInst::Create(NewRetBlock, BB);
}
- ReturnBlock = NewRetBlock;
+
return true;
}
+} // namespace
+
+// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting
+// all returns to unconditional branches to this new basic block. Also, unify
+// all unreachable blocks.
+bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed;
+}
+
+PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ Changed |= unifyUnreachableBlocks(F);
+ Changed |= unifyReturnBlocks(F);
+ return Changed ? PreservedAnalyses() : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index b10deee3907c..0b718ed6136e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -16,6 +16,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/InitializePasses.h"
@@ -27,10 +29,10 @@
using namespace llvm;
namespace {
-struct UnifyLoopExits : public FunctionPass {
+struct UnifyLoopExitsLegacyPass : public FunctionPass {
static char ID;
- UnifyLoopExits() : FunctionPass(ID) {
- initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry());
+ UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
+ initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -46,17 +48,19 @@ struct UnifyLoopExits : public FunctionPass {
};
} // namespace
-char UnifyLoopExits::ID = 0;
+char UnifyLoopExitsLegacyPass::ID = 0;
-FunctionPass *llvm::createUnifyLoopExitsPass() { return new UnifyLoopExits(); }
+FunctionPass *llvm::createUnifyLoopExitsPass() {
+ return new UnifyLoopExitsLegacyPass();
+}
-INITIALIZE_PASS_BEGIN(UnifyLoopExits, "unify-loop-exits",
+INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(UnifyLoopExits, "unify-loop-exits",
+INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
@@ -80,7 +84,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L,
const SetVector<BasicBlock *> &Incoming,
BasicBlock *LoopExitBlock) {
using InstVector = SmallVector<Instruction *, 8>;
- using IIMap = DenseMap<Instruction *, InstVector>;
+ using IIMap = MapVector<Instruction *, InstVector>;
IIMap ExternalUsers;
for (auto BB : L->blocks()) {
for (auto &I : *BB) {
@@ -203,11 +207,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
return true;
}
-bool UnifyLoopExits::runOnFunction(Function &F) {
- LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
- << "\n");
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
bool Changed = false;
auto Loops = LI.getLoopsInPreorder();
@@ -218,3 +218,28 @@ bool UnifyLoopExits::runOnFunction(Function &F) {
}
return Changed;
}
+
+bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
+ << "\n");
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ return runImpl(LI, DT);
+}
+
+namespace llvm {
+
+PreservedAnalyses UnifyLoopExitsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (!runImpl(LI, DT))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
index 5b58548e54dc..c57cec6be676 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
@@ -13,8 +13,11 @@
#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -27,13 +30,31 @@ static bool uniqueifyInternalLinkageNames(Module &M) {
Md5.final(R);
SmallString<32> Str;
llvm::MD5::stringifyResult(R, Str);
- std::string ModuleNameHash = (Twine(".") + Twine(Str)).str();
+ // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers
+ // or characters but not both.
+ APInt IntHash = APInt(128, Str.str(), 16);
+ // Prepend "__uniq" before the hash for tools like profilers to understand that
+ // this symbol is of internal linkage type.
+ std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str();
bool Changed = false;
+ MDBuilder MDB(M.getContext());
// Append the module hash to all internal linkage functions.
for (auto &F : M) {
if (F.hasInternalLinkage()) {
F.setName(F.getName() + ModuleNameHash);
+ F.addFnAttr("sample-profile-suffix-elision-policy", "selected");
+ // Replace linkage names in the debug metadata.
+ if (DISubprogram *SP = F.getSubprogram()) {
+ if (SP->getRawLinkageName()) {
+ auto *Name = MDB.createString(F.getName());
+ SP->replaceRawLinkageName(Name);
+ if (DISubprogram *SPDecl = SP->getDeclaration()) {
+ if (SPDecl->getRawLinkageName())
+ SPDecl->replaceRawLinkageName(Name);
+ }
+ }
+ }
Changed = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index ce98a739bea8..73c0532f3fd5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -34,17 +34,17 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLibCallsShrinkWrapLegacyPassPass(Registry);
initializeLoopSimplifyPass(Registry);
initializeLowerInvokeLegacyPassPass(Registry);
- initializeLowerSwitchPass(Registry);
+ initializeLowerSwitchLegacyPassPass(Registry);
initializeNameAnonGlobalLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
- initializeStripNonLineTableDebugInfoPass(Registry);
- initializeUnifyFunctionExitNodesPass(Registry);
+ initializeStripNonLineTableDebugLegacyPassPass(Registry);
+ initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
initializeMetaRenamerPass(Registry);
- initializeStripGCRelocatesPass(Registry);
+ initializeStripGCRelocatesLegacyPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
initializeInjectTLIMappingsLegacyPass(Registry);
initializeFixIrreduciblePass(Registry);
- initializeUnifyLoopExitsPass(Registry);
+ initializeUnifyLoopExitsLegacyPassPass(Registry);
initializeUniqueInternalLinkageNamesLegacyPassPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 6ff08cd28712..61cd8595a73b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
Type *StoredTy = StoredVal->getType();
+
if (StoredTy == LoadTy)
return true;
@@ -36,17 +37,29 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize())
return false;
+ bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
// Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ if (StoredNI != LoadNI) {
// As a special case, allow coercion of memset used to initialize
// an array w/null. Despite non-integral pointers not generally having a
// specific bit pattern, we do assume null is zero.
if (auto *CI = dyn_cast<Constant>(StoredVal))
return CI->isNullValue();
return false;
+ } else if (StoredNI && LoadNI &&
+ StoredTy->getPointerAddressSpace() !=
+ LoadTy->getPointerAddressSpace()) {
+ return false;
}
-
+
+
+ // The implementation below uses inttoptr for vectors of unequal size; we
+ // can't allow this for non integral pointers. We could teach it to extract
+ // exact subvectors if desired.
+ if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize())
+ return false;
+
return true;
}
@@ -223,14 +236,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
return -1;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- // Allow casts of zero values to null as a special case
- auto *CI = dyn_cast<Constant>(StoredVal);
- if (!CI || !CI->isNullValue())
- return -1;
- }
+ if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
+ return -1;
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
@@ -333,9 +340,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
return -1;
Value *DepPtr = DepLI->getPointerOperand();
@@ -393,7 +398,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (!Src)
return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src));
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return -1;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
index f1b3fe8e2fa9..930e0b7ee01a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -167,12 +167,9 @@ public:
void flush();
private:
- void mapGlobalInitializer(GlobalVariable &GV, Constant &Init);
void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers);
- void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target);
- void remapFunction(Function &F, ValueToValueMapTy &VM);
ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
@@ -822,11 +819,15 @@ void Mapper::flush() {
break;
case WorklistEntry::MapAppendingVar: {
unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
+ // mapAppendingVariable call can change AppendingInits if initalizer for
+ // the variable depends on another appending global, because of that inits
+ // need to be extracted and updated before the call.
+ SmallVector<Constant *, 8> NewInits(
+ drop_begin(AppendingInits, PrefixSize));
+ AppendingInits.resize(PrefixSize);
mapAppendingVariable(*E.Data.AppendingGV.GV,
E.Data.AppendingGV.InitPrefix,
- E.AppendingGVIsOldCtorDtor,
- makeArrayRef(AppendingInits).slice(PrefixSize));
- AppendingInits.resize(PrefixSize);
+ E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits));
break;
}
case WorklistEntry::MapGlobalIndirectSymbol:
@@ -900,14 +901,13 @@ void Mapper::remapInstruction(Instruction *I) {
LLVMContext &C = CB->getContext();
AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- if (Attrs.hasAttribute(i, Attribute::ByVal)) {
- Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
- if (!Ty)
- continue;
-
- Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
- Attrs = Attrs.addAttribute(
- C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+ for (Attribute::AttrKind TypedAttr :
+ {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+ if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
+ TypeMapper->remapType(Ty));
+ break;
+ }
}
}
CB->setAttributes(Attrs);