summaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp2
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp80
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp169
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp31
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp964
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt5
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp126
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp52
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp113
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp596
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp243
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp14
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp190
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp14
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp9
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp320
-rw-r--r--lib/Transforms/Utils/Local.cpp368
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp88
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp229
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp497
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp310
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp202
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp11
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp10
-rw-r--r--lib/Transforms/Utils/Makefile15
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp108
-rw-r--r--lib/Transforms/Utils/MemorySSA.cpp1361
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp57
-rw-r--r--lib/Transforms/Utils/NameAnonFunctions.cpp102
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp5
-rw-r--r--lib/Transforms/Utils/SanitizerStats.cpp108
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp1741
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp230
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp109
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp799
-rw-r--r--lib/Transforms/Utils/SplitModule.cpp202
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp1
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp9
-rw-r--r--lib/Transforms/Utils/Utils.cpp9
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp1223
40 files changed, 7781 insertions, 2941 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 409326eba401f..7e50d4bb447ef 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -44,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
else if (Size <= 512) Res = Size + 64;
else if (Size <= 4096) Res = Size + 128;
else Res = Size + 256;
- return RoundUpToAlignment(Res, Alignment);
+ return alignTo(Res, Alignment);
}
void
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 0262358fa3d57..d034905b6572b 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,7 +52,9 @@
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@@ -72,20 +74,22 @@ using namespace llvm;
#define DEBUG_TYPE "add-discriminators"
namespace {
-struct AddDiscriminators : public FunctionPass {
+// The legacy pass of AddDiscriminators.
+struct AddDiscriminatorsLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- AddDiscriminators() : FunctionPass(ID) {
- initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+ AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
+ initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
};
-}
-char AddDiscriminators::ID = 0;
-INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators",
+} // end anonymous namespace
+
+char AddDiscriminatorsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
-INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
+INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
"Add DWARF path discriminators", false, false)
// Command line option to disable discriminator generation even in the
@@ -95,13 +99,9 @@ static cl::opt<bool> NoDiscriminators(
"no-discriminators", cl::init(false),
cl::desc("Disable generation of discriminator information."));
+// Create the legacy AddDiscriminatorsPass.
FunctionPass *llvm::createAddDiscriminatorsPass() {
- return new AddDiscriminators();
-}
-
-static bool hasDebugInfo(const Function &F) {
- DISubprogram *S = getDISubprogram(&F);
- return S != nullptr;
+ return new AddDiscriminatorsLegacyPass();
}
/// \brief Assign DWARF discriminators.
@@ -155,13 +155,13 @@ static bool hasDebugInfo(const Function &F) {
/// lexical block for I2 and all the instruction in B2 that share the same
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
-bool AddDiscriminators::runOnFunction(Function &F) {
+static bool addDiscriminators(Function &F) {
// If the function has debug information, but the user has disabled
// discriminators, do nothing.
// Simlarly, if the function has no debug info, do nothing.
// Finally, if this module is built with dwarf versions earlier than 4,
// do nothing (discriminator support is a DWARF 4 feature).
- if (NoDiscriminators || !hasDebugInfo(F) ||
+ if (NoDiscriminators || !F.getSubprogram() ||
F.getParent()->getDwarfVersion() < 4)
return false;
@@ -173,8 +173,11 @@ bool AddDiscriminators::runOnFunction(Function &F) {
typedef std::pair<StringRef, unsigned> Location;
typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap;
typedef DenseMap<Location, BBScopeMap> LocationBBMap;
+ typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
+ typedef DenseSet<Location> LocationSet;
LocationBBMap LBM;
+ LocationDiscriminatorMap LDM;
// Traverse all instructions in the function. If the source line location
// of the instruction appears in other basic block, assign a new
@@ -199,8 +202,7 @@ bool AddDiscriminators::runOnFunction(Function &F) {
auto *Scope = DIL->getScope();
auto *File =
Builder.createFile(DIL->getFilename(), Scope->getDirectory());
- NewScope = Builder.createLexicalBlockFile(
- Scope, File, DIL->computeNewDiscriminator());
+ NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]);
}
I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
NewScope, DIL->getInlinedAt()));
@@ -217,32 +219,40 @@ bool AddDiscriminators::runOnFunction(Function &F) {
// Sample base profile needs to distinguish different function calls within
// a same source line for correct profile annotation.
for (BasicBlock &B : F) {
- const DILocation *FirstDIL = NULL;
+ LocationSet CallLocations;
for (auto &I : B.getInstList()) {
CallInst *Current = dyn_cast<CallInst>(&I);
if (!Current || isa<DbgInfoIntrinsic>(&I))
continue;
DILocation *CurrentDIL = Current->getDebugLoc();
- if (FirstDIL) {
- if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() &&
- CurrentDIL->getFilename() == FirstDIL->getFilename()) {
- auto *Scope = FirstDIL->getScope();
- auto *File = Builder.createFile(FirstDIL->getFilename(),
- Scope->getDirectory());
- auto *NewScope = Builder.createLexicalBlockFile(
- Scope, File, FirstDIL->computeNewDiscriminator());
- Current->setDebugLoc(DILocation::get(
- Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope,
- CurrentDIL->getInlinedAt()));
- Changed = true;
- } else {
- FirstDIL = CurrentDIL;
- }
- } else {
- FirstDIL = CurrentDIL;
+ if (!CurrentDIL)
+ continue;
+ Location L =
+ std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
+ if (!CallLocations.insert(L).second) {
+ auto *Scope = CurrentDIL->getScope();
+ auto *File = Builder.createFile(CurrentDIL->getFilename(),
+ Scope->getDirectory());
+ auto *NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]);
+ Current->setDebugLoc(DILocation::get(Ctx, CurrentDIL->getLine(),
+ CurrentDIL->getColumn(), NewScope,
+ CurrentDIL->getInlinedAt()));
+ Changed = true;
}
}
}
return Changed;
}
+
+bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
+ return addDiscriminators(F);
+}
+PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ if (!addDiscriminators(F))
+ return PreservedAnalyses::all();
+
+ // FIXME: should be all()
+ return PreservedAnalyses::none();
+}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 72db980cf572a..b90349d3cdad1 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -31,8 +31,6 @@
#include <algorithm>
using namespace llvm;
-/// DeleteDeadBlock - Delete the specified block, which must have no
-/// predecessors.
void llvm::DeleteDeadBlock(BasicBlock *BB) {
assert((pred_begin(BB) == pred_end(BB) ||
// Can delete self loop.
@@ -61,12 +59,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
BB->eraseFromParent();
}
-/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are
-/// any single-entry PHI nodes in it, fold them away. This handles the case
-/// when all entries to the PHI nodes in a block are guaranteed equal, such as
-/// when the block has exactly one predecessor.
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
- MemoryDependenceAnalysis *MemDep) {
+ MemoryDependenceResults *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
@@ -82,11 +76,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
}
}
-
-/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
-/// is dead. Also recursively delete any operands that become dead as
-/// a result. This includes tracing the def-use list from the PHI to see if
-/// it is ultimately unused or if it reaches an unused cycle.
bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
// or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
@@ -103,11 +92,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
return Changed;
}
-/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
-/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
LoopInfo *LI,
- MemoryDependenceAnalysis *MemDep) {
+ MemoryDependenceResults *MemDep) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
@@ -165,10 +152,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (DomTreeNode *DTN = DT->getNode(BB)) {
DomTreeNode *PredDTN = DT->getNode(PredBB);
SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
- for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
- DE = Children.end();
- DI != DE; ++DI)
- DT->changeImmediateDominator(*DI, PredDTN);
+ for (DomTreeNode *DI : Children)
+ DT->changeImmediateDominator(DI, PredDTN);
DT->eraseNode(BB);
}
@@ -183,9 +168,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
return true;
}
-/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
-/// with a value, then remove and delete the original instruction.
-///
void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
BasicBlock::iterator &BI, Value *V) {
Instruction &I = *BI;
@@ -200,11 +182,6 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
BI = BIL.erase(BI);
}
-
-/// ReplaceInstWithInst - Replace the instruction specified by BI with the
-/// instruction specified by I. The original instruction is deleted and BI is
-/// updated to point to the new instruction.
-///
void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BasicBlock::iterator &BI, Instruction *I) {
assert(I->getParent() == nullptr &&
@@ -225,16 +202,11 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BI = New;
}
-/// ReplaceInstWithInst - Replace the instruction specified by From with the
-/// instruction specified by To.
-///
void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
BasicBlock::iterator BI(From);
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
}
-/// SplitEdge - Split the edge connecting specified block. Pass P must
-/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
LoopInfo *LI) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
@@ -266,8 +238,8 @@ unsigned
llvm::SplitAllCriticalEdges(Function &F,
const CriticalEdgeSplittingOptions &Options) {
unsigned NumBroken = 0;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- TerminatorInst *TI = I->getTerminator();
+ for (BasicBlock &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
@@ -276,11 +248,6 @@ llvm::SplitAllCriticalEdges(Function &F,
return NumBroken;
}
-/// SplitBlock - Split the specified block at the specified instruction - every
-/// thing before SplitPt stays in Old and everything starting with SplitPt moves
-/// to a new block. The two blocks are joined by an unconditional branch and
-/// the loop info is updated.
-///
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
DominatorTree *DT, LoopInfo *LI) {
BasicBlock::iterator SplitIt = SplitPt->getIterator();
@@ -297,22 +264,17 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
if (DT)
// Old dominates New. New node dominates all other nodes dominated by Old.
if (DomTreeNode *OldNode = DT->getNode(Old)) {
- std::vector<DomTreeNode *> Children;
- for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
- I != E; ++I)
- Children.push_back(*I);
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
DomTreeNode *NewNode = DT->addNewBlock(New, Old);
- for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
- E = Children.end(); I != E; ++I)
- DT->changeImmediateDominator(*I, NewNode);
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
}
return New;
}
-/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
-/// analysis information.
+/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
DominatorTree *DT, LoopInfo *LI,
@@ -331,10 +293,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
// this split will affect loops.
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
- for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end();
- i != e; ++i) {
- BasicBlock *Pred = *i;
-
+ for (BasicBlock *Pred : Preds) {
// If we need to preserve LCSSA, determine if any of the preds is a loop
// exit.
if (PreserveLCSSA)
@@ -362,9 +321,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
// loops enclose them, and select the most-nested loop which contains the
// loop containing the block being split.
Loop *InnermostPredLoop = nullptr;
- for (ArrayRef<BasicBlock*>::iterator
- i = Preds.begin(), e = Preds.end(); i != e; ++i) {
- BasicBlock *Pred = *i;
+ for (BasicBlock *Pred : Preds) {
if (Loop *PredLoop = LI->getLoopFor(Pred)) {
// Seek a loop which actually contains the block being split (to avoid
// adjacent loops).
@@ -388,8 +345,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
}
-/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
-/// from NewBB. This also updates AliasAnalysis, if available.
+/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
+/// This also updates AliasAnalysis, if available.
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds, BranchInst *BI,
bool HasLoopExit) {
@@ -456,21 +413,6 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-/// SplitBlockPredecessors - This method introduces at least one new basic block
-/// into the function and moves some of the predecessors of BB to be
-/// predecessors of the new block. The new predecessors are indicated by the
-/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic
-/// block to which predecessors from Preds are now pointing.
-///
-/// If BB is a landingpad block then additional basicblock might be introduced.
-/// It will have suffix of 'Suffix'+".split_lp".
-/// See SplitLandingPadPredecessors for more details on this case.
-///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
-/// preserve LoopSimplify (because it's complicated to handle the case where one
-/// of the edges being split is an exit of a loop with other exits).
-///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix, DominatorTree *DT,
@@ -529,19 +471,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
return NewBB;
}
-/// SplitLandingPadPredecessors - This method transforms the landing pad,
-/// OrigBB, by introducing two new basic blocks into the function. One of those
-/// new basic blocks gets the predecessors listed in Preds. The other basic
-/// block gets the remaining predecessors of OrigBB. The landingpad instruction
-/// OrigBB is clone into both of the new basic blocks. The new blocks are given
-/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
-///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
-/// it does not preserve LoopSimplify (because it's complicated to handle the
-/// case where one of the edges being split is an exit of a loop with other
-/// exits).
-///
void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
@@ -603,9 +532,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
// Move the remaining edges from OrigBB to point to NewBB2.
- for (SmallVectorImpl<BasicBlock*>::iterator
- i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
- (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+ for (BasicBlock *NewBB2Pred : NewBB2Preds)
+ NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
@@ -646,11 +574,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
}
}
-/// FoldReturnIntoUncondBranch - This method duplicates the specified return
-/// instruction into a predecessor which ends in an unconditional branch. If
-/// the return instruction returns a value defined by a PHI, propagate the
-/// right value into the return. It returns the new return instruction in the
-/// predecessor.
ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred) {
Instruction *UncondBranch = Pred->getTerminator();
@@ -689,31 +612,10 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
-/// SplitBlockAndInsertIfThen - Split the containing block at the
-/// specified instruction - everything before and including SplitBefore stays
-/// in the old basic block, and everything after SplitBefore is moved to a
-/// new block. The two blocks are connected by a conditional branch
-/// (with value of Cmp being the condition).
-/// Before:
-/// Head
-/// SplitBefore
-/// Tail
-/// After:
-/// Head
-/// if (Cond)
-/// ThenBlock
-/// SplitBefore
-/// Tail
-///
-/// If Unreachable is true, then ThenBlock ends with
-/// UnreachableInst, otherwise it branches to Tail.
-/// Returns the NewBasicBlock's terminator.
-
-TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
- bool Unreachable,
- MDNode *BranchWeights,
- DominatorTree *DT) {
+TerminatorInst *
+llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
TerminatorInst *HeadOldTerm = Head->getTerminator();
@@ -735,7 +637,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
- for (auto Child : Children)
+ for (DomTreeNode *Child : Children)
DT->changeImmediateDominator(Child, NewNode);
// Head dominates ThenBlock.
@@ -743,23 +645,15 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
}
}
+ if (LI) {
+ Loop *L = LI->getLoopFor(Head);
+ L->addBasicBlockToLoop(ThenBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
+
return CheckTerm;
}
-/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
-/// but also creates the ElseBlock.
-/// Before:
-/// Head
-/// SplitBefore
-/// Tail
-/// After:
-/// Head
-/// if (Cond)
-/// ThenBlock
-/// else
-/// ElseBlock
-/// SplitBefore
-/// Tail
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
TerminatorInst **ThenTerm,
TerminatorInst **ElseTerm,
@@ -781,15 +675,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
}
-/// GetIfCondition - Given a basic block (BB) with two predecessors,
-/// check to see if the merge at this block is due
-/// to an "if condition". If so, return the boolean condition that determines
-/// which entry into BB will be taken. Also, return by references the block
-/// that will be entered from if the condition is true, and the block that will
-/// be entered if the condition is false.
-///
-/// This does no checking to see if the true/false blocks have large or unsavory
-/// instructions in them.
Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 95825991cee96..49b646a041f50 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -76,11 +76,10 @@ FunctionPass *llvm::createBreakCriticalEdgesPass() {
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
-/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
-/// may require new PHIs in the new exit block. This function inserts the
-/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB
-/// is the new loop exit block, and DestBB is the old loop exit, now the
-/// successor of SplitBB.
+/// When a loop exit edge is split, LCSSA form may require new PHIs in the new
+/// exit block. This function inserts the new PHIs, as needed. Preds is a list
+/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is
+/// the old loop exit, now the successor of SplitBB.
static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
BasicBlock *SplitBB,
BasicBlock *DestBB) {
@@ -112,25 +111,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
}
}
-/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
-/// split the critical edge. This will update DominatorTree information if it
-/// is available, thus calling this pass will not invalidate either of them.
-/// This returns the new block if the edge was split, null otherwise.
-///
-/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
-/// specified successor will be merged into the same critical edge block.
-/// This is most commonly interesting with switch instructions, which may
-/// have many edges to any one destination. This ensures that all edges to that
-/// dest go to one block instead of each going to a different block, but isn't
-/// the standard definition of a "critical edge".
-///
-/// It is invalid to call this function on a critical edge that starts at an
-/// IndirectBrInst. Splitting these edges will almost always create an invalid
-/// program because the address of the new block won't be the one that is jumped
-/// to.
-///
-BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
- const CriticalEdgeSplittingOptions &Options) {
+BasicBlock *
+llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 64b44a6b79194..f4260a9ff9804 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -25,81 +26,742 @@
using namespace llvm;
-/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+#define DEBUG_TYPE "build-libcalls"
+
+//- Infer Attributes ---------------------------------------------------------//
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setOnlyAccessesArgMemory(Function &F) {
+ if (F.onlyAccessesArgMemory())
+ return false;
+ F.setOnlyAccessesArgMemory ();
+ ++NumArgMemOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+ if (F.doesNotCapture(n))
+ return false;
+ F.setDoesNotCapture(n);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+ if (F.onlyReadsMemory(n))
+ return false;
+ F.setOnlyReadsMemory(n);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+ if (F.doesNotAlias(n))
+ return false;
+ F.setDoesNotAlias(n);
+ ++NumNoAlias;
+ return true;
+}
+
+static bool setNonNull(Function &F, unsigned n) {
+ assert((n != AttributeSet::ReturnIndex ||
+ F.getReturnType()->isPointerTy()) &&
+ "nonnull applies only to pointers");
+ if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
+ return false;
+ F.addAttribute(n, Attribute::NonNull);
+ ++NumNonNull;
+ return true;
+}
+
+bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
+ LibFunc::Func TheLibFunc;
+ if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strxfrm:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strcmp: // 0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: // 0,1
+ case LibFunc::strcoll: // 0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::scanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat:
+ case LibFunc::statvfs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::sscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::sprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::snprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::setitimer:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::system:
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::malloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::memcmp:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::memcpy_chk:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::memalign:
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::mkdir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::mktime:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::realloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::read:
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::rewind:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::rmdir:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::rename:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::readlink:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::write:
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::bcopy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::bcmp:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::bzero:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::calloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::access:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fopen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fdopen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::ferror:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::fgets:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc::fread:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fwrite:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ // FIXME: readonly #1?
+ return Changed;
+ case LibFunc::fputs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fgetpos:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::getenv:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::getitimer:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getpwnam:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ungetc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::uname:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::unlink:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::unsetenv:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::pread:
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::pwrite:
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::popen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::pclose:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::vscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vsscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vfscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::valloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::vprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vsnprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::open:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::opendir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::tmpfile:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::times:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc::lstat:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::lchown:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::qsort:
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::under_IO_getc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::under_IO_putc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::dunder_isoc99_scanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_isoc99_sscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fopen64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::tmpfile64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::open64:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::gettimeofday:
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::Znwj: // new(unsigned int)
+ case LibFunc::Znwm: // new(unsigned long)
+ case LibFunc::Znaj: // new[](unsigned int)
+ case LibFunc::Znam: // new[](unsigned long)
+ case LibFunc::msvc_new_int: // new(unsigned int)
+ case LibFunc::msvc_new_longlong: // new(unsigned long long)
+ case LibFunc::msvc_new_array_int: // new[](unsigned int)
+ case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
+ // Operator new always returns a nonnull noalias pointer
+ Changed |= setNonNull(F, AttributeSet::ReturnIndex);
+ Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
+ return Changed;
+ //TODO: add LibFunc entries for:
+ //case LibFunc::memset_pattern4:
+ //case LibFunc::memset_pattern8:
+ case LibFunc::memset_pattern16:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ // int __nvvm_reflect(const char *)
+ case LibFunc::nvvm_reflect:
+ Changed |= setDoesNotAccessMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+//- Emit LibCalls ------------------------------------------------------------//
+
+Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
unsigned AS = V->getType()->getPointerAddressSpace();
return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
-/// EmitStrLen - Emit a call to the strlen function to the builder, for the
-/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
+ Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction(
- "strlen", AttributeSet::get(M->getContext(), AS),
- DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr);
- CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context),
+ B.getInt8PtrTy(), nullptr);
+ inferLibFuncAttributes(*M->getFunction("strlen"), *TLI);
+ CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitStrChr - Emit a call to the strchr function to the builder, for the
-/// specified pointer and character. Ptr is required to be some pointer type,
-/// and the return value has 'i8*' type.
-Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AttributeSet AS =
- AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
+ Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
- Constant *StrChr = M->getOrInsertFunction("strchr",
- AttributeSet::get(M->getContext(),
- AS),
- I8Ptr, I8Ptr, I32Ty, nullptr);
+ Constant *StrChr =
+ M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr);
+ inferLibFuncAttributes(*M->getFunction("strchr"), *TLI);
CallInst *CI = B.CreateCall(
- StrChr, {CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
+ StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
-Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[3];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
+ Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction(
- "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+ Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), nullptr);
+ inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI);
CallInst *CI = B.CreateCall(
- StrNCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "strncmp");
+ StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp");
if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -107,64 +769,46 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
return CI;
}
-/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-/// specified pointer arguments.
-Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name,
- AttributeSet::get(M->getContext(), AS),
- I8Ptr, I8Ptr, I8Ptr, nullptr);
+ Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr);
+ inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI =
- B.CreateCall(StrCpy, {CastToCStr(Dst, B), CastToCStr(Src, B)}, Name);
+ B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
-/// specified pointer arguments.
-Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrNCpy = M->getOrInsertFunction(Name,
- AttributeSet::get(M->getContext(),
- AS),
- I8Ptr, I8Ptr, I8Ptr,
+ Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
Len->getType(), nullptr);
+ inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI = B.CreateCall(
- StrNCpy, {CastToCStr(Dst, B), CastToCStr(Src, B), Len}, "strncpy");
+ StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy");
if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
-/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
-/// are pointers.
-Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Module *M = B.GetInsertBlock()->getModule();
AttributeSet AS;
AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
Attribute::NoUnwind);
@@ -173,30 +817,26 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
"__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
DL.getIntPtrType(Context), nullptr);
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
+ Dst = castToCStr(Dst, B);
+ Src = castToCStr(Src, B);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
-/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS;
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
+ Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction(
- "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr);
- CallInst *CI = B.CreateCall(MemChr, {CastToCStr(Ptr, B), Val, Len}, "memchr");
+ Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt32Ty(),
+ DL.getIntPtrType(Context), nullptr);
+ inferLibFuncAttributes(*M->getFunction("memchr"), *TLI);
+ CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr");
if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -204,25 +844,19 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
return CI;
}
-/// EmitMemCmp - Emit a call to the memcmp function.
-Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[3];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
+ Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction(
- "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+ Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), nullptr);
+ inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI);
CallInst *CI = B.CreateCall(
- MemCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "memcmp");
+ MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp");
if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -231,7 +865,8 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
}
/// Append a suffix to the function name according to the type of 'Op'.
-static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) {
+static void appendTypeSuffix(Value *Op, StringRef &Name,
+ SmallString<20> &NameBuffer) {
if (!Op->getType()->isDoubleTy()) {
NameBuffer += Name;
@@ -242,19 +877,14 @@ static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBu
Name = NameBuffer;
}
- return;
}
-/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
-/// 'floor'). This function is known to take a single of type matching 'Op' and
-/// returns one value with the same type. If 'Op' is a long double, 'l' is
-/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
const AttributeSet &Attrs) {
SmallString<20> NameBuffer;
- AppendTypeSuffix(Op, Name, NameBuffer);
+ appendTypeSuffix(Op, Name, NameBuffer);
- Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(), nullptr);
CallInst *CI = B.CreateCall(Callee, Op, Name);
@@ -265,19 +895,14 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
return CI;
}
-/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name'
-/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2'
-/// and return one value with the same type. If 'Op1/Op2' are long double, 'l'
-/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f'
-/// suffix.
-Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
IRBuilder<> &B, const AttributeSet &Attrs) {
SmallString<20> NameBuffer;
- AppendTypeSuffix(Op1, Name, NameBuffer);
+ appendTypeSuffix(Op1, Name, NameBuffer);
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
- Op1->getType(), Op2->getType(), nullptr);
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
+ Op2->getType(), nullptr);
CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -286,14 +911,12 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
return CI;
}
-/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
-/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B,
+Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Module *M = B.GetInsertBlock()->getModule();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
B.getInt32Ty(), nullptr);
CallInst *CI = B.CreateCall(PutChar,
@@ -308,54 +931,31 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B,
return CI;
}
-/// EmitPutS - Emit a call to the puts function. This assumes that Str is
-/// some pointer.
-Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B,
+Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
-
- Value *PutS = M->getOrInsertFunction("puts",
- AttributeSet::get(M->getContext(), AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- nullptr);
- CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *PutS =
+ M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr);
+ inferLibFuncAttributes(*M->getFunction("puts"), *TLI);
+ CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
-/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
-/// an integer and File is a pointer to FILE.
-Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[2];
- AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- Constant *F;
+ Module *M = B.GetInsertBlock()->getModule();
+ Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(),
+ File->getType(), nullptr);
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fputc",
- AttributeSet::get(M->getContext(), AS),
- B.getInt32Ty(),
- B.getInt32Ty(), File->getType(),
- nullptr);
- else
- F = M->getOrInsertFunction("fputc",
- B.getInt32Ty(),
- B.getInt32Ty(),
- File->getType(), nullptr);
+ inferLibFuncAttributes(*M->getFunction("fputc"), *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
CallInst *CI = B.CreateCall(F, {Char, File}, "fputc");
@@ -365,66 +965,40 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
return CI;
}
-/// EmitFPutS - Emit a call to the puts function. Str is required to be a
-/// pointer and File is a pointer to FILE.
-Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[3];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsName = TLI->getName(LibFunc::fputs);
- Constant *F;
+ Constant *F = M->getOrInsertFunction(
+ FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr);
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FPutsName,
- AttributeSet::get(M->getContext(), AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- File->getType(), nullptr);
- else
- F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
- B.getInt8PtrTy(),
- File->getType(), nullptr);
- CallInst *CI = B.CreateCall(F, {CastToCStr(Str, B), File}, "fputs");
+ inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
-/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
-/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
return nullptr;
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeSet AS[3];
- AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
- AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
- Constant *F;
+ Constant *F = M->getOrInsertFunction(
+ FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(),
+ nullptr);
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(
- FWriteName, AttributeSet::get(M->getContext(), AS),
- DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
- DL.getIntPtrType(Context), File->getType(), nullptr);
- else
- F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context),
- B.getInt8PtrTy(), DL.getIntPtrType(Context),
- DL.getIntPtrType(Context), File->getType(),
- nullptr);
+ inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI);
CallInst *CI =
- B.CreateCall(F, {CastToCStr(Ptr, B), Size,
+ B.CreateCall(F, {castToCStr(Ptr, B), Size,
ConstantInt::get(DL.getIntPtrType(Context), 1), File});
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 8308a9b69149d..5aec0dce34db8 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -11,7 +11,9 @@ add_llvm_library(LLVMTransformUtils
CodeExtractor.cpp
CtorUtils.cpp
DemoteRegToStack.cpp
+ Evaluator.cpp
FlattenCFG.cpp
+ FunctionImportUtils.cpp
GlobalStatus.cpp
InlineFunction.cpp
InstructionNamer.cpp
@@ -26,10 +28,13 @@ add_llvm_library(LLVMTransformUtils
LowerInvoke.cpp
LowerSwitch.cpp
Mem2Reg.cpp
+ MemorySSA.cpp
MetaRenamer.cpp
ModuleUtils.cpp
+ NameAnonFunctions.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
+ SanitizerStats.cpp
SimplifyCFG.cpp
SimplifyIndVar.cpp
SimplifyInstructions.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 6454afb8bc42d..c5ca56360fc88 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -119,6 +119,15 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
.addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
OldAttrs.getFnAttributes()));
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ OldFunc->getAllMetadata(MDs);
+ for (auto MD : MDs)
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
@@ -163,65 +172,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
-// Find the MDNode which corresponds to the subprogram data that described F.
-static DISubprogram *FindSubprogram(const Function *F,
- DebugInfoFinder &Finder) {
- for (DISubprogram *Subprogram : Finder.subprograms()) {
- if (Subprogram->describes(F))
- return Subprogram;
- }
- return nullptr;
-}
-
-// Add an operand to an existing MDNode. The new operand will be added at the
-// back of the operand list.
-static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs,
- Metadata *NewSP) {
- SmallVector<Metadata *, 16> NewSPs;
- NewSPs.reserve(SPs.size() + 1);
- for (auto *SP : SPs)
- NewSPs.push_back(SP);
- NewSPs.push_back(NewSP);
- CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
-}
-
-// Clone the module-level debug info associated with OldFunc. The cloned data
-// will point to NewFunc instead.
-static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
- ValueToValueMapTy &VMap) {
- DebugInfoFinder Finder;
- Finder.processModule(*OldFunc->getParent());
-
- const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
- if (!OldSubprogramMDNode) return;
-
- auto *NewSubprogram =
- cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
- NewFunc->setSubprogram(NewSubprogram);
-
- for (auto *CU : Finder.compile_units()) {
- auto Subprograms = CU->getSubprograms();
- // If the compile unit's function list contains the old function, it should
- // also contain the new one.
- for (auto *SP : Subprograms) {
- if (SP == OldSubprogramMDNode) {
- AddOperand(CU, Subprograms, NewSubprogram);
- break;
- }
- }
- }
-}
-
-/// Return a copy of the specified function, but without
-/// embedding the function into another module. Also, any references specified
-/// in the VMap are changed to refer to their mapped value instead of the
-/// original one. If any of the arguments to the function are in the VMap,
-/// the arguments are deleted from the resultant function. The VMap is
-/// updated to include mappings from all of the instructions and basicblocks in
-/// the function from their old to new values.
+/// Return a copy of the specified function and add it to that function's
+/// module. Also, any references specified in the VMap are changed to refer to
+/// their mapped value instead of the original one. If any of the arguments to
+/// the function are in the VMap, the arguments are deleted from the resultant
+/// function. The VMap is updated to include mappings from all of the
+/// instructions and basicblocks in the function from their old to new values.
///
-Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
+Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo) {
std::vector<Type*> ArgTypes;
@@ -237,7 +195,8 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
ArgTypes, F->getFunctionType()->isVarArg());
// Create the new function...
- Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+ Function *NewF =
+ Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
@@ -247,11 +206,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
VMap[&I] = &*DestI++; // Add mapping to VMap
}
- if (ModuleLevelChanges)
- CloneDebugInfoMetadata(NewF, F, VMap);
-
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "",
+ CodeInfo);
+
return NewF;
}
@@ -338,9 +296,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
- VMap[&*II] = V;
- delete NewInst;
- continue;
+ if (!NewInst->mayHaveSideEffects()) {
+ VMap[&*II] = V;
+ delete NewInst;
+ continue;
+ }
}
}
@@ -372,7 +332,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
if (!Cond) {
- Value *V = VMap[BI->getCondition()];
+ Value *V = VMap.lookup(BI->getCondition());
Cond = dyn_cast_or_null<ConstantInt>(V);
}
@@ -388,7 +348,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
if (!Cond) { // Or known constant after constant prop in the callee...
- Value *V = VMap[SI->getCondition()];
+ Value *V = VMap.lookup(SI->getCondition());
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
@@ -475,7 +435,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Defer PHI resolution until rest of function is resolved.
SmallVector<const PHINode*, 16> PHIToResolve;
for (const BasicBlock &BI : *OldFunc) {
- Value *V = VMap[&BI];
+ Value *V = VMap.lookup(&BI);
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (!NewBB) continue; // Dead block.
@@ -519,7 +479,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
- Value *V = VMap[PN->getIncomingBlock(pred)];
+ Value *V = VMap.lookup(PN->getIncomingBlock(pred));
if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
VMap,
@@ -529,7 +489,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
PN->setIncomingBlock(pred, MappedBlock);
} else {
PN->removeIncomingValue(pred, false);
- --pred, --e; // Revisit the next entry.
+ --pred; // Revisit the next entry.
+ --e;
}
}
}
@@ -558,10 +519,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// entries.
BasicBlock::iterator I = NewBB->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
- for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
- E = PredCount.end(); PCI != E; ++PCI) {
- BasicBlock *Pred = PCI->first;
- for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+ for (const auto &PCI : PredCount) {
+ BasicBlock *Pred = PCI.first;
+ for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
PN->removeIncomingValue(Pred, false);
}
}
@@ -684,7 +644,7 @@ void llvm::remapInstructionsInBlocks(
for (auto *BB : Blocks)
for (auto &Inst : *BB)
RemapInstruction(&Inst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
@@ -697,6 +657,8 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
+ assert(OrigLoop->getSubLoops().empty() &&
+ "Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
@@ -727,13 +689,19 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
// Update LoopInfo.
NewLoop->addBasicBlockToLoop(NewBB, *LI);
- // Update DominatorTree.
- BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
- DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+ // Add DominatorTree node. After seeing all blocks, update to correct IDom.
+ DT->addNewBlock(NewBB, NewPH);
Blocks.push_back(NewBB);
}
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ // Update DominatorTree.
+ BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
+ cast<BasicBlock>(VMap[IDomBB]));
+ }
+
// Move them physically from the end of the block list.
F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
NewPH);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index ab083353ece6b..17e34c4ffa0f5 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::CloneModule(const Module *M,
std::unique_ptr<Module> llvm::CloneModule(
const Module *M, ValueToValueMapTy &VMap,
- std::function<bool(const GlobalValue *)> ShouldCloneDefinition) {
+ function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
std::unique_ptr<Module> New =
llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
@@ -53,7 +53,7 @@ std::unique_ptr<Module> llvm::CloneModule(
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
GlobalVariable *GV = new GlobalVariable(*New,
- I->getType()->getElementType(),
+ I->getValueType(),
I->isConstant(), I->getLinkage(),
(Constant*) nullptr, I->getName(),
(GlobalVariable*) nullptr,
@@ -64,12 +64,11 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// Loop over the functions in the module, making external functions as before
- for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *NF =
- Function::Create(cast<FunctionType>(I->getType()->getElementType()),
- I->getLinkage(), I->getName(), New.get());
- NF->copyAttributesFrom(&*I);
- VMap[&*I] = NF;
+ for (const Function &I : *M) {
+ Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
+ I.getLinkage(), I.getName(), New.get());
+ NF->copyAttributesFrom(&I);
+ VMap[&I] = NF;
}
// Loop over the aliases in the module
@@ -109,6 +108,9 @@ std::unique_ptr<Module> llvm::CloneModule(
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
+ if (I->isDeclaration())
+ continue;
+
GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
if (!ShouldCloneDefinition(&*I)) {
// Skip after setting the correct linkage for an external reference.
@@ -121,27 +123,31 @@ std::unique_ptr<Module> llvm::CloneModule(
// Similarly, copy over function bodies now...
//
- for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *F = cast<Function>(VMap[&*I]);
- if (!ShouldCloneDefinition(&*I)) {
+ for (const Function &I : *M) {
+ if (I.isDeclaration())
+ continue;
+
+ Function *F = cast<Function>(VMap[&I]);
+ if (!ShouldCloneDefinition(&I)) {
// Skip after setting the correct linkage for an external reference.
F->setLinkage(GlobalValue::ExternalLinkage);
+ // Personality function is not valid on a declaration.
+ F->setPersonalityFn(nullptr);
continue;
}
- if (!I->isDeclaration()) {
- Function::arg_iterator DestI = F->arg_begin();
- for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
- ++J) {
- DestI->setName(J->getName());
- VMap[&*J] = &*DestI++;
- }
-
- SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns);
+
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[&*J] = &*DestI++;
}
- if (I->hasPersonalityFn())
- F->setPersonalityFn(MapValue(I->getPersonalityFn(), VMap));
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns);
+
+ if (I.hasPersonalityFn())
+ F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
}
// And aliases
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 823696d88e652..9f2181f87cee1 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -77,15 +77,15 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
// Loop over the blocks, adding them to our set-vector, and aborting with an
// empty set if we encounter invalid blocks.
- for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) {
- if (!Result.insert(*I))
+ do {
+ if (!Result.insert(*BBBegin))
llvm_unreachable("Repeated basic blocks in extraction input");
- if (!isBlockValidForExtraction(**I)) {
+ if (!isBlockValidForExtraction(**BBBegin)) {
Result.clear();
return Result;
}
- }
+ } while (++BBBegin != BBEnd);
#ifndef NDEBUG
for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
@@ -159,23 +159,18 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
ValueSet &Outputs) const {
- for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(),
- E = Blocks.end();
- I != E; ++I) {
- BasicBlock *BB = *I;
-
+ for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
- II != IE; ++II) {
- for (User::op_iterator OI = II->op_begin(), OE = II->op_end();
- OI != OE; ++OI)
+ for (Instruction &II : *BB) {
+ for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
+ ++OI)
if (definedInCaller(Blocks, *OI))
Inputs.insert(*OI);
- for (User *U : II->users())
+ for (User *U : II.users())
if (!definedInRegion(Blocks, U)) {
- Outputs.insert(&*II);
+ Outputs.insert(&II);
break;
}
}
@@ -263,25 +258,21 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
}
void CodeExtractor::splitReturnBlocks() {
- for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
- I != E; ++I)
- if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+ for (BasicBlock *Block : Blocks)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
BasicBlock *New =
- (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret");
+ Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
if (DT) {
// Old dominates New. New node dominates all other nodes dominated
// by Old.
- DomTreeNode *OldNode = DT->getNode(*I);
- SmallVector<DomTreeNode*, 8> Children;
- for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
- DI != DE; ++DI)
- Children.push_back(*DI);
+ DomTreeNode *OldNode = DT->getNode(Block);
+ SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
+ OldNode->end());
- DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+ DomTreeNode *NewNode = DT->addNewBlock(New, Block);
- for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(),
- E = Children.end(); I != E; ++I)
- DT->changeImmediateDominator(*I, NewNode);
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
}
}
}
@@ -310,28 +301,26 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
std::vector<Type*> paramTy;
// Add the types of the input values to the function's argument list
- for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end();
- i != e; ++i) {
- const Value *value = *i;
+ for (Value *value : inputs) {
DEBUG(dbgs() << "value used in func: " << *value << "\n");
paramTy.push_back(value->getType());
}
// Add the types of the output values to the function's argument list.
- for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end();
- I != E; ++I) {
- DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+ for (Value *output : outputs) {
+ DEBUG(dbgs() << "instr used in func: " << *output << "\n");
if (AggregateArgs)
- paramTy.push_back((*I)->getType());
+ paramTy.push_back(output->getType());
else
- paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+ paramTy.push_back(PointerType::getUnqual(output->getType()));
}
- DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
- for (std::vector<Type*>::iterator i = paramTy.begin(),
- e = paramTy.end(); i != e; ++i)
- DEBUG(dbgs() << **i << ", ");
- DEBUG(dbgs() << ")\n");
+ DEBUG({
+ dbgs() << "Function type: " << *RetTy << " f(";
+ for (Type *i : paramTy)
+ dbgs() << *i << ", ";
+ dbgs() << ")\n";
+ });
StructType *StructTy;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
@@ -372,9 +361,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
RewriteVal = &*AI++;
std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
- for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
- use != useE; ++use)
- if (Instruction* inst = dyn_cast<Instruction>(*use))
+ for (User *use : Users)
+ if (Instruction *inst = dyn_cast<Instruction>(use))
if (Blocks.count(inst->getParent()))
inst->replaceUsesOfWith(inputs[i], RewriteVal);
}
@@ -429,19 +417,19 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
LLVMContext &Context = newFunction->getContext();
// Add inputs as params, or to be filled into the struct
- for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+ for (Value *input : inputs)
if (AggregateArgs)
- StructValues.push_back(*i);
+ StructValues.push_back(input);
else
- params.push_back(*i);
+ params.push_back(input);
// Create allocas for the outputs
- for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+ for (Value *output : outputs) {
if (AggregateArgs) {
- StructValues.push_back(*i);
+ StructValues.push_back(output);
} else {
AllocaInst *alloca =
- new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc",
+ new AllocaInst(output->getType(), nullptr, output->getName() + ".loc",
&codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
@@ -522,9 +510,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
unsigned switchVal = 0;
- for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
- e = Blocks.end(); i != e; ++i) {
- TerminatorInst *TI = (*i)->getTerminator();
+ for (BasicBlock *Block : Blocks) {
+ TerminatorInst *TI = Block->getTerminator();
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (!Blocks.count(TI->getSuccessor(i))) {
BasicBlock *OldTarget = TI->getSuccessor(i);
@@ -576,10 +563,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Make sure we are looking at the original successor block, not
// at a newly inserted exit block, which won't be in the dominator
// info.
- for (std::map<BasicBlock*, BasicBlock*>::iterator I =
- ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
- if (DefBlock == I->second) {
- DefBlock = I->first;
+ for (const auto &I : ExitBlockMap)
+ if (DefBlock == I.second) {
+ DefBlock = I.first;
break;
}
@@ -677,13 +663,12 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
- for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(),
- e = Blocks.end(); i != e; ++i) {
+ for (BasicBlock *Block : Blocks) {
// Delete the basic block from the old function, and the list of blocks
- oldBlocks.remove(*i);
+ oldBlocks.remove(Block);
// Insert this basic block into the new function
- newBlocks.push_back(*i);
+ newBlocks.push_back(Block);
}
}
@@ -721,9 +706,9 @@ Function *CodeExtractor::extractCodeRegion() {
findInputsOutputs(inputs, outputs);
SmallPtrSet<BasicBlock *, 1> ExitBlocks;
- for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
- I != E; ++I)
- for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ for (BasicBlock *Block : Blocks)
+ for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
+ ++SI)
if (!Blocks.count(*SI))
ExitBlocks.insert(*SI);
NumExitBlocks = ExitBlocks.size();
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
new file mode 100644
index 0000000000000..cd130abf45192
--- /dev/null
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -0,0 +1,596 @@
+//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Function evaluator for LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "evaluator"
+
+using namespace llvm;
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
+
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // Simple global addresses are supported, do not allow dllimport or
+ // thread-local globals.
+ if (auto *GV = dyn_cast<GlobalValue>(C))
+ return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+ // Simple integer, undef, constant aggregate zero, etc are all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantAggregate>(C)) {
+ for (Value *Op : C->operands())
+ if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
+ return false;
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C).second)
+ return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
+}
+
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/*_odr/linkonce linkage or external globals.
+ return GV->hasUniqueInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasUniqueInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+ }
+ }
+
+ return false;
+}
+
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+ if (I != MutatedMemory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return nullptr;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return nullptr; // don't know how to evaluate.
+}
+
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+ BasicBlock *&NextBB) {
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = nullptr;
+
+ DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (!SI->isSimple()) {
+ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+ Constant *Ptr = getVal(SI->getOperand(1));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+ DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+ if (!isSimpleEnoughPointerToCommit(Ptr)) {
+ // If this is too complex for us to commit, reject it.
+ DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+ return false;
+ }
+
+ Constant *Val = getVal(SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
+ DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+ << "\n");
+ return false;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+
+ DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+ }
+ }
+
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(BO->getOperand(0)),
+ getVal(BO->getOperand(1)));
+ DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(CI->getOperand(0)),
+ getVal(CI->getOperand(1)));
+ DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(CI->getOperand(0)),
+ CI->getType());
+ DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+ getVal(SI->getOperand(1)),
+ getVal(SI->getOperand(2)));
+ DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getExtractValue(
+ getVal(EVI->getAggregateOperand()), EVI->getIndices());
+ DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getInsertValue(
+ getVal(IVI->getAggregateOperand()),
+ getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+ DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(*i));
+ InstResult =
+ ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
+ DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+ << "\n");
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+
+ if (!LI->isSimple()) {
+ DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+
+ Constant *Ptr = getVal(LI->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+ DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: " << *Ptr << "\n");
+ }
+ InstResult = ComputeLoadResult(Ptr);
+ if (!InstResult) {
+ DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) {
+ DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
+ Type *Ty = AI->getAllocatedType();
+ AllocaTmps.push_back(
+ make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+ UndefValue::get(Ty), AI->getName()));
+ InstResult = AllocaTmps.back().get();
+ DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallSite CS(&*CurInst);
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ DEBUG(dbgs() << "Ignoring debug info.\n");
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) {
+ DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+ "intrinsic.\n");
+ return false;
+ }
+ Constant *Ptr = getVal(MSI->getDest());
+ Constant *Val = getVal(MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty()) {
+ DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
+ return false;
+ }
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = GV->getValueType();
+ if (!Size->isAllOnesValue() &&
+ Size->getValue().getLimitedValue() >=
+ DL.getTypeStoreSize(ElemTy)) {
+ Invariants.insert(GV);
+ DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+ << "\n");
+ } else {
+ DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
+ }
+ // Continue even if we do nothing.
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+ return false;
+ }
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ if (!Callee || Callee->isInterposable()) {
+ DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
+
+ SmallVector<Constant*, 8> Formals;
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+ Formals.push_back(getVal(*i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
+ InstResult = C;
+ DEBUG(dbgs() << "Constant folded function call. Result: " <<
+ *InstResult << "\n");
+ } else {
+ DEBUG(dbgs() << "Can not constant fold function call.\n");
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg()) {
+ DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+ return false;
+ }
+
+ Constant *RetVal = nullptr;
+ // Execute the call, if successful, use the return value.
+ ValueStack.emplace_back();
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ ValueStack.pop_back();
+ InstResult = RetVal;
+
+ if (InstResult) {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: "
+ << *InstResult << "\n\n");
+ } else {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ }
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NextBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NextBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = nullptr;
+ } else {
+ // invoke, unwind, resume, unreachable.
+ DEBUG(dbgs() << "Can not handle terminator.");
+ return false; // Cannot handle this terminator.
+ }
+
+ // We succeeded at evaluating this block!
+ DEBUG(dbgs() << "Successfully evaluated block.\n");
+ return true;
+ } else {
+ // Did not know how to evaluate this!
+ DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
+ return false;
+ }
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
+
+ setVal(&*CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ return true;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ setVal(&*AI, ActualArgs[ArgNo]);
+
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = &F->front();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (1) {
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
+ DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+ if (!EvaluateBlock(CurInst, NextBB))
+ return false;
+
+ if (!NextBB) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands())
+ RetVal = getVal(RI->getOperand(0));
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB).second)
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = nullptr;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
+
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
new file mode 100644
index 0000000000000..fcb25baf32167
--- /dev/null
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -0,0 +1,243 @@
+//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionImportGlobalProcessing class, used
+// to perform the necessary global value handling for function importing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+using namespace llvm;
+
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+bool FunctionImportGlobalProcessing::doImportAsDefinition(
+ const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) {
+
+ // For alias, we tie the definition to the base object. Extract it and recurse
+ if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
+ if (GA->hasWeakAnyLinkage())
+ return false;
+ const GlobalObject *GO = GA->getBaseObject();
+ if (!GO->hasLinkOnceODRLinkage())
+ return false;
+ return FunctionImportGlobalProcessing::doImportAsDefinition(
+ GO, GlobalsToImport);
+ }
+ // Only import the globals requested for importing.
+ if (GlobalsToImport->count(SGV))
+ return true;
+ // Otherwise no.
+ return false;
+}
+
+bool FunctionImportGlobalProcessing::doImportAsDefinition(
+ const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
+ GlobalsToImport);
+}
+
+bool FunctionImportGlobalProcessing::doPromoteLocalToGlobal(
+ const GlobalValue *SGV) {
+ assert(SGV->hasLocalLinkage());
+ // Both the imported references and the original local variable must
+ // be promoted.
+ if (!isPerformingImport() && !isModuleExporting())
+ return false;
+
+ // Local const variables never need to be promoted unless they are address
+ // taken. The imported uses can simply use the clone created in this module.
+ // For now we are conservative in determining which variables are not
+ // address taken by checking the unnamed addr flag. To be more aggressive,
+ // the address taken information must be checked earlier during parsing
+ // of the module and recorded in the summary index for use when importing
+ // from that module.
+ auto *GVar = dyn_cast<GlobalVariable>(SGV);
+ if (GVar && GVar->isConstant() && GVar->hasGlobalUnnamedAddr())
+ return false;
+
+ if (GVar && GVar->hasSection())
+ // Some sections like "__DATA,__cfstring" are "magic" and promotion is not
+ // allowed. Just disable promotion on any GVar with sections right now.
+ return false;
+
+ // Eventually we only need to promote functions in the exporting module that
+ // are referenced by a potentially exported function (i.e. one that is in the
+ // summary index).
+ return true;
+}
+
+std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV) {
+ // For locals that must be promoted to global scope, ensure that
+ // the promoted name uniquely identifies the copy in the original module,
+ // using the ID assigned during combined index creation. When importing,
+ // we rename all locals (not just those that are promoted) in order to
+ // avoid naming conflicts between locals imported from different modules.
+ if (SGV->hasLocalLinkage() &&
+ (doPromoteLocalToGlobal(SGV) || isPerformingImport()))
+ return ModuleSummaryIndex::getGlobalNameForLocal(
+ SGV->getName(),
+ ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
+ return SGV->getName();
+}
+
+GlobalValue::LinkageTypes
+FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV) {
+ // Any local variable that is referenced by an exported function needs
+ // to be promoted to global scope. Since we don't currently know which
+ // functions reference which local variables/functions, we must treat
+ // all as potentially exported if this module is exporting anything.
+ if (isModuleExporting()) {
+ if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV))
+ return GlobalValue::ExternalLinkage;
+ return SGV->getLinkage();
+ }
+
+ // Otherwise, if we aren't importing, no linkage change is needed.
+ if (!isPerformingImport())
+ return SGV->getLinkage();
+
+ switch (SGV->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ // External defnitions are converted to available_externally
+ // definitions upon import, so that they are available for inlining
+ // and/or optimization, but are turned into declarations later
+ // during the EliminateAvailableExternally pass.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ // An imported external declaration stays external.
+ return SGV->getLinkage();
+
+ case GlobalValue::AvailableExternallyLinkage:
+ // An imported available_externally definition converts
+ // to external if imported as a declaration.
+ if (!doImportAsDefinition(SGV))
+ return GlobalValue::ExternalLinkage;
+ // An imported available_externally declaration stays that way.
+ return SGV->getLinkage();
+
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ // These both stay the same when importing the definition.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakAnyLinkage:
+ // Can't import weak_any definitions correctly, or we might change the
+ // program semantics, since the linker will pick the first weak_any
+ // definition and importing would change the order they are seen by the
+ // linker. The module linking caller needs to enforce this.
+ assert(!doImportAsDefinition(SGV));
+ // If imported as a declaration, it becomes external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakODRLinkage:
+ // For weak_odr linkage, there is a guarantee that all copies will be
+ // equivalent, so the issue described above for weak_any does not exist,
+ // and the definition can be imported. It can be treated similarly
+ // to an imported externally visible global value.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+
+ case GlobalValue::AppendingLinkage:
+ // It would be incorrect to import an appending linkage variable,
+ // since it would cause global constructors/destructors to be
+ // executed multiple times. This should have already been handled
+ // by linkIfNeeded, and we will assert in shouldLinkFromSource
+ // if we try to import, so we simply return AppendingLinkage.
+ return GlobalValue::AppendingLinkage;
+
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ // If we are promoting the local to global scope, it is handled
+ // similarly to a normal externally visible global.
+ if (doPromoteLocalToGlobal(SGV)) {
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+ }
+ // A non-promoted imported local definition stays local.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::ExternalWeakLinkage:
+ // External weak doesn't apply to definitions, must be a declaration.
+ assert(!doImportAsDefinition(SGV));
+ // Linkage stays external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::CommonLinkage:
+ // Linkage stays common on definitions.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+ }
+
+ llvm_unreachable("unknown linkage type");
+}
+
+void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+ if (GV.hasLocalLinkage() &&
+ (doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
+ GV.setName(getName(&GV));
+ GV.setLinkage(getLinkage(&GV));
+ if (!GV.hasLocalLinkage())
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+ } else
+ GV.setLinkage(getLinkage(&GV));
+
+ // Remove functions imported as available externally defs from comdats,
+ // as this is a declaration for the linker, and will be dropped eventually.
+ // It is illegal for comdats to contain declarations.
+ auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
+ // The IRMover should not have placed any imported declarations in
+ // a comdat, so the only declaration that should be in a comdat
+ // at this point would be a definition imported as available_externally.
+ assert(GO->hasAvailableExternallyLinkage() &&
+ "Expected comdat on definition (possibly available external)");
+ GO->setComdat(nullptr);
+ }
+}
+
+void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
+ if (!moduleCanBeRenamedForThinLTO(M)) {
+ // We would have blocked importing from this module by suppressing index
+ // generation. We still may be able to import into this module though.
+ assert(!isPerformingImport() &&
+ "Should have blocked importing from module with local used in ASM");
+ return;
+ }
+
+ for (GlobalVariable &GV : M.globals())
+ processGlobalForThinLTO(GV);
+ for (Function &SF : M)
+ processGlobalForThinLTO(SF);
+ for (GlobalAlias &GA : M.aliases())
+ processGlobalForThinLTO(GA);
+}
+
+bool FunctionImportGlobalProcessing::run() {
+ processGlobalsForThinLTO();
+ return false;
+}
+
+bool llvm::renameModuleForThinLTO(
+ Module &M, const ModuleSummaryIndex &Index,
+ DenseSet<const GlobalValue *> *GlobalsToImport) {
+ FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
+ return ThinLTOProcessing.run();
+}
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 3893a752503b6..266be41fbeadd 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -20,11 +20,11 @@ using namespace llvm;
/// and release, then return AcquireRelease.
///
static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
- if (X == Acquire && Y == Release)
- return AcquireRelease;
- if (Y == Acquire && X == Release)
- return AcquireRelease;
- return (AtomicOrdering)std::max(X, Y);
+ if (X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release)
+ return AtomicOrdering::AcquireRelease;
+ if (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release)
+ return AtomicOrdering::AcquireRelease;
+ return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
}
/// It is safe to destroy a constant iff it is only used by constants itself.
@@ -105,7 +105,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
}
}
- if (StoredVal == GV->getInitializer()) {
+ if (GV->hasInitializer() && StoredVal == GV->getInitializer()) {
if (GS.StoredType < GlobalStatus::InitializerStored)
GS.StoredType = GlobalStatus::InitializerStored;
} else if (isa<LoadInst>(StoredVal) &&
@@ -185,4 +185,4 @@ GlobalStatus::GlobalStatus()
: IsCompared(false), IsLoaded(false), StoredType(NotStored),
StoredOnceValue(nullptr), AccessingFunction(nullptr),
HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
- Ordering(NotAtomic) {}
+ Ordering(AtomicOrdering::NotAtomic) {}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 79282a2a703b3..1fbb19d2b8add 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -427,6 +427,17 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
+ // We do not need to (and in fact, cannot) convert possibly throwing calls
+ // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
+ // invokes. The caller's "segment" of the deoptimization continuation
+ // attached to the newly inlined @llvm.experimental_deoptimize
+ // (resp. @llvm.experimental.guard) call should contain the exception
+ // handling logic, if any.
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
+ F->getIntrinsicID() == Intrinsic::experimental_guard)
+ continue;
+
if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
// This call is nested inside a funclet. If that funclet has an unwind
// destination within the inlinee, then unwinding out of this call would
@@ -677,6 +688,34 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
UnwindDest->removePredecessor(InvokeBB);
}
+/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
+/// that metadata should be propagated to all memory-accessing cloned
+/// instructions.
+static void PropagateParallelLoopAccessMetadata(CallSite CS,
+ ValueToValueMapTy &VMap) {
+ MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ if (!M)
+ return;
+
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+ M = MDNode::concatenate(PM, M);
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ } else if (NI->mayReadOrWriteMemory()) {
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ }
+ }
+}
+
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
@@ -693,12 +732,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// inter-procedural alias analysis passes. We can revisit this if it becomes
// an efficiency or overhead problem.
- for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end();
- I != IE; ++I)
- for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
- if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope))
+ for (const BasicBlock &I : *CalledFunc)
+ for (const Instruction &J : I) {
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
MD.insert(M);
- if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias))
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
MD.insert(M);
}
@@ -720,20 +758,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// the noalias scopes and the lists of those scopes.
SmallVector<TempMDTuple, 16> DummyNodes;
DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
- for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
- I != IE; ++I) {
+ for (const MDNode *I : MD) {
DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
- MDMap[*I].reset(DummyNodes.back().get());
+ MDMap[I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
// metadata references with either a dummy node or an already-created new
// node.
- for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
- I != IE; ++I) {
+ for (const MDNode *I : MD) {
SmallVector<Metadata *, 4> NewOps;
- for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) {
- const Metadata *V = (*I)->getOperand(i);
+ for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
+ const Metadata *V = I->getOperand(i);
if (const MDNode *M = dyn_cast<MDNode>(V))
NewOps.push_back(MDMap[M]);
else
@@ -741,7 +777,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
}
MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
- MDTuple *TempM = cast<MDTuple>(MDMap[*I]);
+ MDTuple *TempM = cast<MDTuple>(MDMap[I]);
assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
@@ -801,10 +837,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
const Function *CalledFunc = CS.getCalledFunction();
SmallVector<const Argument *, 4> NoAliasArgs;
- for (const Argument &I : CalledFunc->args()) {
- if (I.hasNoAliasAttr() && !I.hasNUses(0))
- NoAliasArgs.push_back(&I);
- }
+ for (const Argument &Arg : CalledFunc->args())
+ if (Arg.hasNoAliasAttr() && !Arg.use_empty())
+ NoAliasArgs.push_back(&Arg);
if (NoAliasArgs.empty())
return;
@@ -885,17 +920,16 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
IsArgMemOnlyCall = true;
}
- for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(),
- AE = ICS.arg_end(); AI != AE; ++AI) {
+ for (Value *Arg : ICS.args()) {
// We need to check the underlying objects of all arguments, not just
// the pointer arguments, because we might be passing pointers as
// integers, etc.
// However, if we know that the call only accesses pointer arguments,
// then we only need to check the pointer arguments.
- if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy())
+ if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
continue;
- PtrArgs.push_back(*AI);
+ PtrArgs.push_back(Arg);
}
}
@@ -913,9 +947,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
SmallVector<Metadata *, 4> Scopes, NoAliases;
SmallSetVector<const Argument *, 4> NAPtrArgs;
- for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
+ for (const Value *V : PtrArgs) {
SmallVector<Value *, 4> Objects;
- GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
+ GetUnderlyingObjects(const_cast<Value*>(V),
Objects, DL, /* LI = */ nullptr);
for (Value *O : Objects)
@@ -1228,7 +1262,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
/// Rebuild the entire inlined-at chain for this instruction so that the top of
/// the chain now is inlined-at the new call site.
static DebugLoc
-updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
+updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
+ LLVMContext &Ctx,
DenseMap<const DILocation *, DILocation *> &IANodes) {
SmallVector<DILocation *, 3> InlinedAtLocations;
DILocation *Last = InlinedAtNode;
@@ -1249,8 +1284,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
// Starting from the top, rebuild the nodes to point to the new inlined-at
// location (then rebuilding the rest of the chain behind it) and update the
// map of already-constructed inlined-at nodes.
- for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(),
- InlinedAtLocations.rend())) {
+ for (const DILocation *MD : reverse(InlinedAtLocations)) {
Last = IANodes[MD] = DILocation::getDistinct(
Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
}
@@ -1264,7 +1298,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
Instruction *TheCall) {
- DebugLoc TheCallDL = TheCall->getDebugLoc();
+ const DebugLoc &TheCallDL = TheCall->getDebugLoc();
if (!TheCallDL)
return;
@@ -1422,6 +1456,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ // Determine if we are dealing with a call in an EHPad which does not unwind
+ // to caller.
+ bool EHPadForCallUnwindsLocally = false;
+ if (CallSiteEHPad && CS.isCall()) {
+ UnwindDestMemoTy FuncletUnwindMap;
+ Value *CallSiteUnwindDestToken =
+ getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
+
+ EHPadForCallUnwindsLocally =
+ CallSiteUnwindDestToken &&
+ !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
Function::iterator LastBlock = --Caller->end();
@@ -1552,6 +1599,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+ // Propagate llvm.mem.parallel_loop_access if necessary.
+ PropagateParallelLoopAccessMetadata(CS, VMap);
+
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
if (IFI.ACT)
@@ -1602,7 +1652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
}
- bool InlinedMustTailCalls = false;
+ bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
if (InlinedFunctionInfo.ContainsCalls) {
CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
if (CallInst *CI = dyn_cast<CallInst>(TheCall))
@@ -1615,6 +1665,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (!CI)
continue;
+ if (Function *F = CI->getCalledFunction())
+ InlinedDeoptimizeCalls |=
+ F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
+
// We need to reduce the strength of any inlined tail calls. For
// musttail, we have to avoid introducing potential unbounded stack
// growth. For example, if functions 'f' and 'g' are mutually recursive
@@ -1677,11 +1731,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
builder.CreateLifetimeStart(AI, AllocaSize);
for (ReturnInst *RI : Returns) {
- // Don't insert llvm.lifetime.end calls between a musttail call and a
- // return. The return kills all local allocas.
+ // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
+ // call and a return. The return kills all local allocas.
if (InlinedMustTailCalls &&
RI->getParent()->getTerminatingMustTailCall())
continue;
+ if (InlinedDeoptimizeCalls &&
+ RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
}
@@ -1702,10 +1759,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
for (ReturnInst *RI : Returns) {
- // Don't insert llvm.stackrestore calls between a musttail call and a
- // return. The return will restore the stack pointer.
+ // Don't insert llvm.stackrestore calls between a musttail or deoptimize
+ // call and a return. The return will restore the stack pointer.
if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
continue;
+ if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
}
@@ -1758,7 +1817,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
else
NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
- NewInst->setDebugLoc(I->getDebugLoc());
NewInst->takeName(I);
I->replaceAllUsesWith(NewInst);
I->eraseFromParent();
@@ -1766,6 +1824,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
OpBundles.clear();
}
+ // It is problematic if the inlinee has a cleanupret which unwinds to
+ // caller and we inline it into a call site which doesn't unwind but into
+ // an EH pad that does. Such an edge must be dynamically unreachable.
+ // As such, we replace the cleanupret with unreachable.
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
+ if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
+ changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
+
Instruction *I = BB->getFirstNonPHI();
if (!I->isEHPad())
continue;
@@ -1781,6 +1847,64 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ if (InlinedDeoptimizeCalls) {
+ // We need to at least remove the deoptimizing returns from the Return set,
+ // so that the control flow from those returns does not get merged into the
+ // caller (but terminate it instead). If the caller's return type does not
+ // match the callee's return type, we also need to change the return type of
+ // the intrinsic.
+ if (Caller->getReturnType() == TheCall->getType()) {
+ auto NewEnd = remove_if(Returns, [](ReturnInst *RI) {
+ return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
+ });
+ Returns.erase(NewEnd, Returns.end());
+ } else {
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
+ Caller->getParent(), Intrinsic::experimental_deoptimize,
+ {Caller->getReturnType()});
+
+ for (ReturnInst *RI : Returns) {
+ CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
+ if (!DeoptCall) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+
+ // The calling convention on the deoptimize call itself may be bogus,
+ // since the code we're inlining may have undefined behavior (and may
+ // never actually execute at runtime); but all
+ // @llvm.experimental.deoptimize declarations have to have the same
+ // calling convention in a well-formed module.
+ auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
+ NewDeoptIntrinsic->setCallingConv(CallingConv);
+ auto *CurBB = RI->getParent();
+ RI->eraseFromParent();
+
+ SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
+ DeoptCall->arg_end());
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ DeoptCall->getOperandBundlesAsDefs(OpBundles);
+ DeoptCall->eraseFromParent();
+ assert(!OpBundles.empty() &&
+ "Expected at least the deopt operand bundle");
+
+ IRBuilder<> Builder(CurBB);
+ CallInst *NewDeoptCall =
+ Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
+ NewDeoptCall->setCallingConv(CallingConv);
+ if (NewDeoptCall->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(NewDeoptCall);
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+ }
+
// Handle any inlined musttail call sites. In order for a new call site to be
// musttail, the source of the clone and the inlined call site must have been
// musttail. Therefore it's safe to return without merging control into the
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index da890a2970051..8a1973d1db051 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -37,13 +37,13 @@ namespace {
if (!AI->hasName() && !AI->getType()->isVoidTy())
AI->setName("arg");
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BB->hasName())
- BB->setName("bb");
-
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (!I->hasName() && !I->getType()->isVoidTy())
- I->setName("tmp");
+ for (BasicBlock &BB : F) {
+ if (!BB.hasName())
+ BB.setName("bb");
+
+ for (Instruction &I : BB)
+ if (!I.hasName() && !I.getType()->isVoidTy())
+ I.setName("tmp");
}
return true;
}
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 5687afa61e2a6..5a90dcb033b2a 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -390,6 +390,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
Rem->getOperand(1), Builder);
+ // Check whether this is the insert point while Rem is still valid.
+ bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint();
Rem->replaceAllUsesWith(Remainder);
Rem->dropAllReferences();
Rem->eraseFromParent();
@@ -397,7 +399,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
// If we didn't actually generate an urem instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked())
+ if (IsInsertPoint)
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -446,6 +448,9 @@ bool llvm::expandDivision(BinaryOperator *Div) {
// Lower the code to unsigned division, and reset Div to point to the udiv.
Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
Div->getOperand(1), Builder);
+
+ // Check whether this is the insert point while Div is still valid.
+ bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint();
Div->replaceAllUsesWith(Quotient);
Div->dropAllReferences();
Div->eraseFromParent();
@@ -453,7 +458,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
// If we didn't actually generate an udiv instruction, we're done
// This happens for example if the input were constant. In this case the
// Builder insertion point was unchanged
- if (Div == Builder.GetInsertPoint().getNodePtrUnchecked())
+ if (IsInsertPoint)
return true;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index b4b2e148dfbb1..9658966779b9a 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -27,10 +27,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -41,6 +42,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -52,154 +54,156 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
/// Return true if the specified block is in the list.
static bool isExitBlock(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] == BB)
- return true;
- return false;
+ return find(ExitBlocks, BB) != ExitBlocks.end();
}
-/// Given an instruction in the loop, check to see if it has any uses that are
-/// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the
-/// uses.
-static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks,
- PredIteratorCache &PredCache, LoopInfo *LI) {
+/// For every instruction from the worklist, check to see if it has any uses
+/// that are outside the current loop. If so, insert LCSSA PHI nodes and
+/// rewrite the uses.
+bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
+ DominatorTree &DT, LoopInfo &LI) {
SmallVector<Use *, 16> UsesToRewrite;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ PredIteratorCache PredCache;
+ bool Changed = false;
- // Tokens cannot be used in PHI nodes, so we skip over them.
- // We can run into tokens which are live out of a loop with catchswitch
- // instructions in Windows EH if the catchswitch has one catchpad which
- // is inside the loop and another which is not.
- if (Inst.getType()->isTokenTy())
- return false;
+ while (!Worklist.empty()) {
+ UsesToRewrite.clear();
+ ExitBlocks.clear();
- BasicBlock *InstBB = Inst.getParent();
+ Instruction *I = Worklist.pop_back_val();
+ BasicBlock *InstBB = I->getParent();
+ Loop *L = LI.getLoopFor(InstBB);
+ L->getExitBlocks(ExitBlocks);
- for (Use &U : Inst.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(U);
+ if (ExitBlocks.empty())
+ continue;
- if (InstBB != UserBB && !L.contains(UserBB))
- UsesToRewrite.push_back(&U);
- }
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (I->getType()->isTokenTy())
+ continue;
- // If there are no uses outside the loop, exit with no change.
- if (UsesToRewrite.empty())
- return false;
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(U);
- ++NumLCSSA; // We are applying the transformation
+ if (InstBB != UserBB && !L->contains(UserBB))
+ UsesToRewrite.push_back(&U);
+ }
- // Invoke instructions are special in that their result value is not available
- // along their unwind edge. The code below tests to see whether DomBB
- // dominates the value, so adjust DomBB to the normal destination block,
- // which is effectively where the value is first usable.
- BasicBlock *DomBB = Inst.getParent();
- if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
- DomBB = Inv->getNormalDest();
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty())
+ continue;
- DomTreeNode *DomNode = DT.getNode(DomBB);
+ ++NumLCSSA; // We are applying the transformation
- SmallVector<PHINode *, 16> AddedPHIs;
- SmallVector<PHINode *, 8> PostProcessPHIs;
+ // Invoke instructions are special in that their result value is not
+ // available along their unwind edge. The code below tests to see whether
+ // DomBB dominates the value, so adjust DomBB to the normal destination
+ // block, which is effectively where the value is first usable.
+ BasicBlock *DomBB = InstBB;
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(I))
+ DomBB = Inv->getNormalDest();
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst.getType(), Inst.getName());
+ DomTreeNode *DomNode = DT.getNode(DomBB);
- // Insert the LCSSA phi's into all of the exit blocks dominated by the
- // value, and add them to the Phi's map.
- for (BasicBlock *ExitBB : ExitBlocks) {
- if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
- continue;
+ SmallVector<PHINode *, 16> AddedPHIs;
+ SmallVector<PHINode *, 8> PostProcessPHIs;
- // If we already inserted something for this BB, don't reprocess it.
- if (SSAUpdate.HasValueForBlock(ExitBB))
- continue;
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(I->getType(), I->getName());
- PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
- Inst.getName() + ".lcssa", &ExitBB->front());
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (BasicBlock *ExitBB : ExitBlocks) {
+ if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+ continue;
- // Add inputs from inside the loop for this PHI.
- for (BasicBlock *Pred : PredCache.get(ExitBB)) {
- PN->addIncoming(&Inst, Pred);
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB))
+ continue;
- // If the exit block has a predecessor not within the loop, arrange for
- // the incoming value use corresponding to that predecessor to be
- // rewritten in terms of a different LCSSA PHI.
- if (!L.contains(Pred))
- UsesToRewrite.push_back(
- &PN->getOperandUse(PN->getOperandNumForIncomingValue(
- PN->getNumIncomingValues() - 1)));
+ PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa", &ExitBB->front());
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock *Pred : PredCache.get(ExitBB)) {
+ PN->addIncoming(I, Pred);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!L->contains(Pred))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+ PN->getNumIncomingValues() - 1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+
+ // LoopSimplify might fail to simplify some loops (e.g. when indirect
+ // branches are involved). In such situations, it might happen that an
+ // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
+ // create PHIs in such an exit block, we are also inserting PHIs into L2's
+ // header. This could break LCSSA form for L2 because these inserted PHIs
+ // can also have uses outside of L2. Remember all PHIs in such situation
+ // as to revisit than later on. FIXME: Remove this if indirectbr support
+ // into LoopSimplify gets improved.
+ if (auto *OtherLoop = LI.getLoopFor(ExitBB))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(PN);
}
- AddedPHIs.push_back(PN);
-
- // Remember that this phi makes the value alive in this block.
- SSAUpdate.AddAvailableValue(ExitBB, PN);
-
- // LoopSimplify might fail to simplify some loops (e.g. when indirect
- // branches are involved). In such situations, it might happen that an exit
- // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create
- // PHIs in such an exit block, we are also inserting PHIs into L2's header.
- // This could break LCSSA form for L2 because these inserted PHIs can also
- // have uses outside of L2. Remember all PHIs in such situation as to
- // revisit than later on. FIXME: Remove this if indirectbr support into
- // LoopSimplify gets improved.
- if (auto *OtherLoop = LI->getLoopFor(ExitBB))
- if (!L.contains(OtherLoop))
- PostProcessPHIs.push_back(PN);
- }
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (Use *UseToRewrite : UsesToRewrite) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses
+ // in the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UseToRewrite->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UseToRewrite);
+
+ if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
+ UseToRewrite->set(&UserBB->front());
+ continue;
+ }
- // Rewrite all uses outside the loop in terms of the new PHIs we just
- // inserted.
- for (Use *UseToRewrite : UsesToRewrite) {
- // If this use is in an exit block, rewrite to use the newly inserted PHI.
- // This is required for correctness because SSAUpdate doesn't handle uses in
- // the same block. It assumes the PHI we inserted is at the end of the
- // block.
- Instruction *User = cast<Instruction>(UseToRewrite->getUser());
- BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(*UseToRewrite);
-
- if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
- // Tell the VHs that the uses changed. This updates SCEV's caches.
- if (UseToRewrite->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
- UseToRewrite->set(&UserBB->front());
- continue;
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UseToRewrite);
}
- // Otherwise, do full PHI insertion.
- SSAUpdate.RewriteUse(*UseToRewrite);
- }
+ // Post process PHI instructions that were inserted into another disjoint
+ // loop and update their exits properly.
+ for (auto *PostProcessPN : PostProcessPHIs) {
+ if (PostProcessPN->use_empty())
+ continue;
- // Post process PHI instructions that were inserted into another disjoint loop
- // and update their exits properly.
- for (auto *I : PostProcessPHIs) {
- if (I->use_empty())
- continue;
+ // Reprocess each PHI instruction.
+ Worklist.push_back(PostProcessPN);
+ }
- BasicBlock *PHIBB = I->getParent();
- Loop *OtherLoop = LI->getLoopFor(PHIBB);
- SmallVector<BasicBlock *, 8> EBs;
- OtherLoop->getExitBlocks(EBs);
- if (EBs.empty())
- continue;
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (PHINode *PN : AddedPHIs)
+ if (PN->use_empty())
+ PN->eraseFromParent();
- // Recurse and re-process each PHI instruction. FIXME: we should really
- // convert this entire thing to a worklist approach where we process a
- // vector of instructions...
- processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI);
+ Changed = true;
}
-
- // Remove PHI nodes that did not have any uses rewritten.
- for (PHINode *PN : AddedPHIs)
- if (PN->use_empty())
- PN->eraseFromParent();
-
- return true;
+ return Changed;
}
/// Return true if the specified block dominates at least
@@ -209,11 +213,9 @@ blockDominatesAnExit(BasicBlock *BB,
DominatorTree &DT,
const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
DomTreeNode *DomNode = DT.getNode(BB);
- for (BasicBlock *ExitBB : ExitBlocks)
- if (DT.dominates(DomNode, DT.getNode(ExitBB)))
- return true;
-
- return false;
+ return llvm::any_of(ExitBlocks, [&](BasicBlock * EB) {
+ return DT.dominates(DomNode, DT.getNode(EB));
+ });
}
bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
@@ -227,10 +229,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
if (ExitBlocks.empty())
return false;
- PredIteratorCache PredCache;
+ SmallVector<Instruction *, 8> Worklist;
// Look at all the instructions in the loop, checking to see if they have uses
- // outside the loop. If so, rewrite those uses.
+ // outside the loop. If so, put them into the worklist to rewrite those uses.
for (BasicBlock *BB : L.blocks()) {
// For large loops, avoid use-scanning by using dominance information: In
// particular, if a block does not dominate any of the loop exits, then none
@@ -246,9 +248,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
!isa<PHINode>(I.user_back())))
continue;
- Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI);
+ Worklist.push_back(&I);
}
}
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI);
// If we modified the code, remove any caches about the loop from SCEV to
// avoid dangling entries.
@@ -274,11 +277,20 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
return Changed;
}
+/// Process all loops in the function, inner-most out.
+static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+ for (auto &L : *LI)
+ Changed |= formLCSSARecursively(*L, DT, LI, SE);
+ return Changed;
+}
+
namespace {
-struct LCSSA : public FunctionPass {
+struct LCSSAWrapperPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : FunctionPass(ID) {
- initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ LCSSAWrapperPass() : FunctionPass(ID) {
+ initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
}
// Cached analysis information for the current function.
@@ -298,6 +310,7 @@ struct LCSSA : public FunctionPass {
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
@@ -305,30 +318,39 @@ struct LCSSA : public FunctionPass {
};
}
-char LCSSA::ID = 0;
-INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+char LCSSAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
-INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-
-Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-char &llvm::LCSSAID = LCSSA::ID;
+INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
+Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
+char &llvm::LCSSAID = LCSSAWrapperPass::ID;
-/// Process all loops in the function, inner-most out.
-bool LCSSA::runOnFunction(Function &F) {
- bool Changed = false;
+/// Transform \p F into loop-closed SSA form.
+bool LCSSAWrapperPass::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
SE = SEWP ? &SEWP->getSE() : nullptr;
- // Simplify each loop nest in the function.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= formLCSSARecursively(**I, *DT, LI, SE);
-
- return Changed;
+ return formLCSSAOnAllLoops(LI, *DT, SE);
}
+PreservedAnalyses LCSSAPass::run(Function &F, AnalysisManager<Function> &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ if (!formLCSSAOnAllLoops(&LI, DT, SE))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ PreservedAnalyses PA;
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index abc9b65f7a394..f1838d891466e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -42,11 +42,13 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "local"
@@ -148,9 +150,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
SmallVector<uint32_t, 8> Weights;
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
++MD_i) {
- ConstantInt *CI =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(MD_i));
- assert(CI);
+ auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
Weights.push_back(CI->getValue().getZExtValue());
}
// Merge weight of this case to the default weight.
@@ -321,8 +321,12 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
II->getIntrinsicID() == Intrinsic::lifetime_end)
return isa<UndefValue>(II->getArgOperand(1));
- // Assumptions are dead if their condition is trivially true.
- if (II->getIntrinsicID() == Intrinsic::assume) {
+ // Assumptions are dead if their condition is trivially true. Guards on
+ // true are operationally no-ops. In the future we can consider more
+ // sophisticated tradeoffs for guards considering potential for check
+ // widening, but for now we keep things simple.
+ if (II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::experimental_guard) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
return !Cond->isZero();
@@ -452,14 +456,23 @@ simplifyAndDCEInstruction(Instruction *I,
if (Value *SimpleV = SimplifyInstruction(I, DL)) {
// Add the users to the worklist. CAREFUL: an instruction can use itself,
// in the case of a phi node.
- for (User *U : I->users())
- if (U != I)
+ for (User *U : I->users()) {
+ if (U != I) {
WorkList.insert(cast<Instruction>(U));
+ }
+ }
// Replace the instruction with its simplified value.
- I->replaceAllUsesWith(SimpleV);
- I->eraseFromParent();
- return true;
+ bool Changed = false;
+ if (!I->use_empty()) {
+ I->replaceAllUsesWith(SimpleV);
+ Changed = true;
+ }
+ if (isInstructionTriviallyDead(I, TLI)) {
+ I->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
}
return false;
}
@@ -486,7 +499,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
// Iterate over the original function, only adding insts to the worklist
// if they actually need to be revisited. This avoids having to pre-init
// the worklist with the entire function's worth of instructions.
- for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) {
+ for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end());
+ BI != E;) {
assert(!BI->isTerminator());
Instruction *I = &*BI;
++BI;
@@ -1025,7 +1039,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
///
/// See if there is a dbg.value intrinsic for DIVar before I.
-static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) {
+static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
+ Instruction *I) {
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
@@ -1035,7 +1050,8 @@ static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) {
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
if (DVI->getValue() == I->getOperand(0) &&
DVI->getOffset() == 0 &&
- DVI->getVariable() == DIVar)
+ DVI->getVariable() == DIVar &&
+ DVI->getExpression() == DIExpr)
return true;
}
return false;
@@ -1049,9 +1065,6 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- if (LdStHasDebugValue(DIVar, SI))
- return true;
-
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
Argument *ExtendedArg = nullptr;
@@ -1066,25 +1079,25 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// to the alloca described by DDI, if it's first operand is an extend,
// we're guaranteed that before extension, the value was narrower than
// the size of the alloca, hence the size of the described variable.
- SmallVector<uint64_t, 3> NewDIExpr;
+ SmallVector<uint64_t, 3> Ops;
unsigned PieceOffset = 0;
// If this already is a bit piece, we drop the bit piece from the expression
// and record the offset.
if (DIExpr->isBitPiece()) {
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
PieceOffset = DIExpr->getBitPieceOffset();
} else {
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
}
- NewDIExpr.push_back(dwarf::DW_OP_bit_piece);
- NewDIExpr.push_back(PieceOffset); //Offset
+ Ops.push_back(dwarf::DW_OP_bit_piece);
+ Ops.push_back(PieceOffset); // Offset
const DataLayout &DL = DDI->getModule()->getDataLayout();
- NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
- Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar,
- Builder.createExpression(NewDIExpr),
- DDI->getDebugLoc(), SI);
- }
- else
+ Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+ auto NewDIExpr = Builder.createExpression(Ops);
+ if (!LdStHasDebugValue(DIVar, NewDIExpr, SI))
+ Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr,
+ DDI->getDebugLoc(), SI);
+ } else if (!LdStHasDebugValue(DIVar, DIExpr, SI))
Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
DDI->getDebugLoc(), SI);
return true;
@@ -1098,7 +1111,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- if (LdStHasDebugValue(DIVar, LI))
+ if (LdStHasDebugValue(DIVar, DIExpr, LI))
return true;
// We are now tracking the loaded value instead of the address. In the
@@ -1140,12 +1153,14 @@ bool llvm::LowerDbgDeclare(Function &F) {
// the stack slot (and at a lexical-scope granularity). Later
// passes will attempt to elide the stack slot.
if (AI && !isArray(AI)) {
- for (User *U : AI->users())
- if (StoreInst *SI = dyn_cast<StoreInst>(U))
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ for (auto &AIUse : AI->uses()) {
+ User *U = AIUse.getUser();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AIUse.getOperandNo() == 1)
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
@@ -1157,6 +1172,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
DIB.createExpression(NewDIExpr),
DDI->getDebugLoc(), CI);
}
+ }
DDI->eraseFromParent();
}
}
@@ -1175,6 +1191,38 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
+static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
+ Expr.push_back(dwarf::DW_OP_deref);
+}
+
+static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) {
+ if (Offset > 0) {
+ Expr.push_back(dwarf::DW_OP_plus);
+ Expr.push_back(Offset);
+ } else if (Offset < 0) {
+ Expr.push_back(dwarf::DW_OP_minus);
+ Expr.push_back(-Offset);
+ }
+}
+
+static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder,
+ DIExpression *DIExpr, bool Deref,
+ int Offset) {
+ if (!Deref && !Offset)
+ return DIExpr;
+ // Create a copy of the original DIDescriptor for user variable, prepending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ SmallVector<uint64_t, 4> NewDIExpr;
+ if (Deref)
+ DIExprAddDeref(NewDIExpr);
+ DIExprAddOffset(NewDIExpr, Offset);
+ if (DIExpr)
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ return Builder.createExpression(NewDIExpr);
+}
+
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
bool Deref, int Offset) {
@@ -1186,25 +1234,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- if (Deref || Offset) {
- // Create a copy of the original DIDescriptor for user variable, prepending
- // "deref" operation to a list of address elements, as new llvm.dbg.declare
- // will take a value storing address of the memory for variable, not
- // alloca itself.
- SmallVector<uint64_t, 4> NewDIExpr;
- if (Deref)
- NewDIExpr.push_back(dwarf::DW_OP_deref);
- if (Offset > 0) {
- NewDIExpr.push_back(dwarf::DW_OP_plus);
- NewDIExpr.push_back(Offset);
- } else if (Offset < 0) {
- NewDIExpr.push_back(dwarf::DW_OP_minus);
- NewDIExpr.push_back(-Offset);
- }
- if (DIExpr)
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
- DIExpr = Builder.createExpression(NewDIExpr);
- }
+ DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset);
// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
@@ -1219,12 +1249,73 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
Deref, Offset);
}
-void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
+ DIBuilder &Builder, int Offset) {
+ DebugLoc Loc = DVI->getDebugLoc();
+ auto *DIVar = DVI->getVariable();
+ auto *DIExpr = DVI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ // This is an alloca-based llvm.dbg.value. The first thing it should do with
+ // the alloca pointer is dereference it. Otherwise we don't know how to handle
+ // it and give up.
+ if (!DIExpr || DIExpr->getNumElements() < 1 ||
+ DIExpr->getElement(0) != dwarf::DW_OP_deref)
+ return;
+
+ // Insert the offset immediately after the first deref.
+ // We could just change the offset argument of dbg.value, but it's unsigned...
+ if (Offset) {
+ SmallVector<uint64_t, 4> NewDIExpr;
+ DIExprAddDeref(NewDIExpr);
+ DIExprAddOffset(NewDIExpr, Offset);
+ NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
+ DIExpr = Builder.createExpression(NewDIExpr);
+ }
+
+ Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
+ Loc, DVI);
+ DVI->eraseFromParent();
+}
+
+void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, int Offset) {
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) {
+ Use &U = *UI++;
+ if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
+ replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
+ }
+}
+
+unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
+ unsigned NumDeadInst = 0;
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != &BB->front()) {
+ // Delete the next to last instruction.
+ Instruction *Inst = &*--EndInst->getIterator();
+ if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
+ EndInst = Inst;
+ continue;
+ }
+ if (!isa<DbgInfoIntrinsic>(Inst))
+ ++NumDeadInst;
+ Inst->eraseFromParent();
+ }
+ return NumDeadInst;
+}
+
+unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- (*SI)->removePredecessor(BB);
+ for (BasicBlock *Successor : successors(BB))
+ Successor->removePredecessor(BB);
// Insert a call to llvm.trap right before this. This turns the undefined
// behavior into a hard fail instead of falling through into random code.
@@ -1237,12 +1328,15 @@ void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
new UnreachableInst(I->getContext(), I);
// All instructions after this are dead.
+ unsigned NumInstrsRemoved = 0;
BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
while (BBI != BBE) {
if (!BBI->use_empty())
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
BB->getInstList().erase(BBI++);
+ ++NumInstrsRemoved;
}
+ return NumInstrsRemoved;
}
/// changeToCall - Convert the specified invoke into a normal call.
@@ -1280,36 +1374,52 @@ static bool markAliveBlocks(Function &F,
// Do a quick scan of the basic block, turning any obviously unreachable
// instructions into LLVM unreachable insts. The instruction combining pass
// canonicalizes unreachable insts into stores to null or undef.
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ for (Instruction &I : *BB) {
// Assumptions that are known to be false are equivalent to unreachable.
// Also, if the condition is undefined, then we make the choice most
// beneficial to the optimizer, and choose that to also be unreachable.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::assume) {
- bool MakeUnreachable = false;
- if (isa<UndefValue>(II->getArgOperand(0)))
- MakeUnreachable = true;
- else if (ConstantInt *Cond =
- dyn_cast<ConstantInt>(II->getArgOperand(0)))
- MakeUnreachable = Cond->isZero();
-
- if (MakeUnreachable) {
+ if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(&*BBI, false);
+ changeToUnreachable(II, false);
Changed = true;
break;
}
}
- if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ // A call to the guard intrinsic bails out of the current compilation
+ // unit if the predicate passed to it is false. If the predicate is a
+ // constant false, then we know the guard will bail out of the current
+ // compile unconditionally, so all code following it is dead.
+ //
+ // Note: unlike in llvm.assume, it is not "obviously profitable" for
+ // guards to treat `undef` as `false` since a guard on `undef` can
+ // still be useful for widening.
+ if (match(II->getArgOperand(0), m_Zero()))
+ if (!isa<UnreachableInst>(II->getNextNode())) {
+ changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ Value *Callee = CI->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ Changed = true;
+ break;
+ }
if (CI->doesNotReturn()) {
// If we found a call to a no-return function, insert an unreachable
// instruction after it. Make sure there isn't *already* one there
// though.
- ++BBI;
- if (!isa<UnreachableInst>(BBI)) {
+ if (!isa<UnreachableInst>(CI->getNextNode())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(&*BBI, false);
+ changeToUnreachable(CI->getNextNode(), false);
Changed = true;
}
break;
@@ -1319,7 +1429,7 @@ static bool markAliveBlocks(Function &F,
// Store to undef and store to null are undefined and used to signal that
// they should be changed to unreachable by passes that can't modify the
// CFG.
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
// Don't touch volatile stores.
if (SI->isVolatile()) continue;
@@ -1393,9 +1503,9 @@ static bool markAliveBlocks(Function &F,
}
Changed |= ConstantFoldTerminator(BB, true);
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.insert(*SI).second)
- Worklist.push_back(*SI);
+ for (BasicBlock *Successor : successors(BB))
+ if (Reachable.insert(Successor).second)
+ Worklist.push_back(Successor);
} while (!Worklist.empty());
return Changed;
}
@@ -1438,7 +1548,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
- SmallPtrSet<BasicBlock*, 128> Reachable;
+ SmallPtrSet<BasicBlock*, 16> Reachable;
bool Changed = markAliveBlocks(F, Reachable);
// If there are unreachable blocks in the CFG...
@@ -1454,10 +1564,9 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
if (Reachable.count(&*BB))
continue;
- for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE;
- ++SI)
- if (Reachable.count(*SI))
- (*SI)->removePredecessor(&*BB);
+ for (BasicBlock *Successor : successors(&*BB))
+ if (Reachable.count(Successor))
+ Successor->removePredecessor(&*BB);
if (LVI)
LVI->eraseBlock(&*BB);
BB->dropAllReferences();
@@ -1495,6 +1604,7 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
break;
case LLVMContext::MD_noalias:
+ case LLVMContext::MD_mem_parallel_loop_access:
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;
case LLVMContext::MD_range:
@@ -1566,7 +1676,7 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
UI != UE;) {
Use &U = *UI++;
auto *I = cast<Instruction>(U.getUser());
- if (DT.dominates(BB, I->getParent())) {
+ if (DT.properlyDominates(BB, I->getParent())) {
U.set(To);
DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
<< *To << " in " << *U << "\n");
@@ -1577,18 +1687,18 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
}
bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
- if (isa<IntrinsicInst>(CS.getInstruction()))
- // Most LLVM intrinsics are things which can never take a safepoint.
- // As a result, we don't need to have the stack parsable at the
- // callsite. This is a highly useful optimization since intrinsic
- // calls are fairly prevalent, particularly in debug builds.
- return true;
-
// Check if the function is specifically marked as a gc leaf function.
if (CS.hasFnAttr("gc-leaf-function"))
return true;
- if (const Function *F = CS.getCalledFunction())
- return F->hasFnAttribute("gc-leaf-function");
+ if (const Function *F = CS.getCalledFunction()) {
+ if (F->hasFnAttribute("gc-leaf-function"))
+ return true;
+
+ if (auto IID = F->getIntrinsicID())
+ // Most LLVM intrinsics do not take safepoints.
+ return IID != Intrinsic::experimental_gc_statepoint &&
+ IID != Intrinsic::experimental_deoptimize;
+ }
return false;
}
@@ -1723,7 +1833,23 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If the AndMask is zero for this bit, clear the bit.
if ((AndMask & Bit) == 0)
Result->Provenance[i] = BitPart::Unset;
+ return Result;
+ }
+ // If this is a zext instruction zero extend the result.
+ if (I->getOpcode() == Instruction::ZExt) {
+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ auto NarrowBitWidth =
+ cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth();
+ for (unsigned i = 0; i < NarrowBitWidth; ++i)
+ Result->Provenance[i] = Res->Provenance[i];
+ for (unsigned i = NarrowBitWidth; i < BitWidth; ++i)
+ Result->Provenance[i] = BitPart::Unset;
return Result;
}
}
@@ -1754,7 +1880,7 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
/// Given an OR instruction, check to see if this is a bitreverse
/// idiom. If so, insert the new intrinsic and return true.
-bool llvm::recognizeBitReverseOrBSwapIdiom(
+bool llvm::recognizeBSwapOrBitReverseIdiom(
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
SmallVectorImpl<Instruction *> &InsertedInsts) {
if (Operator::getOpcode(I) != Instruction::Or)
@@ -1766,6 +1892,15 @@ bool llvm::recognizeBitReverseOrBSwapIdiom(
return false; // Can't do vectors or integers > 128 bits.
unsigned BW = ITy->getBitWidth();
+ unsigned DemandedBW = BW;
+ IntegerType *DemandedTy = ITy;
+ if (I->hasOneUse()) {
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) {
+ DemandedTy = cast<IntegerType>(Trunc->getType());
+ DemandedBW = DemandedTy->getBitWidth();
+ }
+ }
+
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
@@ -1775,11 +1910,12 @@ bool llvm::recognizeBitReverseOrBSwapIdiom(
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
- bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;
- for (unsigned i = 0; i < BW; ++i) {
- OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
+ bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true;
+ for (unsigned i = 0; i < DemandedBW; ++i) {
+ OKForBSwap &=
+ bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW);
OKForBitReverse &=
- bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
+ bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW);
}
Intrinsic::ID Intrin;
@@ -1790,7 +1926,51 @@ bool llvm::recognizeBitReverseOrBSwapIdiom(
else
return false;
+ if (ITy != DemandedTy) {
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
+ Value *Provider = Res->Provider;
+ IntegerType *ProviderTy = cast<IntegerType>(Provider->getType());
+ // We may need to truncate the provider.
+ if (DemandedTy != ProviderTy) {
+ auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy,
+ "trunc", I);
+ InsertedInsts.push_back(Trunc);
+ Provider = Trunc;
+ }
+ auto *CI = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(CI);
+ auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
+ InsertedInsts.push_back(ExtInst);
+ return true;
+ }
+
Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
return true;
}
+
+// CodeGen has special handling for some string functions that may replace
+// them with target-specific intrinsics. Since that'd skip our interceptors
+// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
+// we mark affected calls as NoBuiltin, which will disable optimization
+// in CodeGen.
+void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI,
+ const TargetLibraryInfo *TLI) {
+ Function *F = CI->getCalledFunction();
+ LibFunc::Func Func;
+ if (!F || F->hasLocalLinkage() || !F->hasName() ||
+ !TLI->getLibFunc(F->getName(), Func))
+ return;
+ switch (Func) {
+ default: break;
+ case LibFunc::memcmp:
+ case LibFunc::memchr:
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcmp:
+ case LibFunc::strlen:
+ case LibFunc::strnlen:
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
+ break;
+ }
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 1fa469595d168..b3a928bf77531 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,6 +37,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -489,14 +490,9 @@ ReprocessLoop:
DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
<< P->getName() << "\n");
- // Inform each successor of each dead pred.
- for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI)
- (*SI)->removePredecessor(P);
// Zap the dead pred's terminator and replace it with unreachable.
TerminatorInst *TI = P->getTerminator();
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- P->getTerminator()->eraseFromParent();
- new UnreachableInst(P->getContext(), P);
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false);
Changed = true;
}
}
@@ -506,14 +502,13 @@ ReprocessLoop:
// trip count computations.
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- E = ExitingBlocks.end(); I != E; ++I)
- if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ for (BasicBlock *ExitingBlock : ExitingBlocks)
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
if (BI->isConditional()) {
if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
- << (*I)->getName() << "\n");
+ << ExitingBlock->getName() << "\n");
BI->setCondition(ConstantInt::get(Cond->getType(),
!L->contains(BI->getSuccessor(0))));
@@ -545,9 +540,7 @@ ReprocessLoop:
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
- for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
- E = ExitBlockSet.end(); I != E; ++I) {
- BasicBlock *ExitBlock = *I;
+ for (BasicBlock *ExitBlock : ExitBlockSet) {
for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
PI != PE; ++PI)
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
@@ -691,8 +684,10 @@ ReprocessLoop:
}
DT->eraseNode(ExitingBlock);
- BI->getSuccessor(0)->removePredecessor(ExitingBlock);
- BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(0)->removePredecessor(
+ ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ BI->getSuccessor(1)->removePredecessor(
+ ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
ExitingBlock->eraseFromParent();
}
}
@@ -731,11 +726,6 @@ namespace {
initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
}
- DominatorTree *DT;
- LoopInfo *LI;
- ScalarEvolution *SE;
- AssumptionCache *AC;
-
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -753,7 +743,8 @@ namespace {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
- AU.addPreserved<DependenceAnalysis>();
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
@@ -768,9 +759,6 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
@@ -783,20 +771,64 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
///
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- SE = SEWP ? &SEWP->getSE() : nullptr;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+#ifndef NDEBUG
+ if (PreserveLCSSA) {
+ assert(DT && "DT not available.");
+ assert(LI && "LI not available.");
+ bool InLCSSA =
+ all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); });
+ assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken.");
+ }
+#endif
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
+#ifndef NDEBUG
+ if (PreserveLCSSA) {
+ bool InLCSSA =
+ all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); });
+ assert(InLCSSA && "LCSSA is broken after loop-simplify.");
+ }
+#endif
return Changed;
}
+PreservedAnalyses LoopSimplifyPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ bool Changed = false;
+ LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+
+ // FIXME: This pass should verify that the loops on which it's operating
+ // are in canonical SSA form, and that the pass itself preserves this form.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DependenceAnalysis>();
+ return PA;
+}
+
// FIXME: Restore this code when we re-enable verification in verifyAnalysis
// below.
#if 0
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index eea9237ba80c6..7f1f78fa8b411 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -34,6 +34,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -44,9 +45,14 @@ using namespace llvm;
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
-/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by VMap.
-static inline void RemapInstruction(Instruction *I,
+static cl::opt<bool>
+UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden,
+ cl::desc("Allow runtime unrolled loops to be unrolled "
+ "with epilog instead of prolog."));
+
+/// Convert the instruction operands from referencing the current values into
+/// those specified by VMap.
+static inline void remapInstruction(Instruction *I,
ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
@@ -64,8 +70,8 @@ static inline void RemapInstruction(Instruction *I,
}
}
-/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
-/// only has one predecessor, and that predecessor only has one successor.
+/// Folds a basic block into its predecessor if it only has one predecessor, and
+/// that predecessor only has one successor.
/// The LoopInfo Analysis that is passed will be kept consistent. If folding is
/// successful references to the containing loop must be removed from
/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
@@ -73,8 +79,9 @@ static inline void RemapInstruction(Instruction *I,
/// of loops that have already been forgotten to prevent redundant, expensive
/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
static BasicBlock *
-FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
- SmallPtrSetImpl<Loop *> &ForgottenLoops) {
+foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
+ SmallPtrSetImpl<Loop *> &ForgottenLoops,
+ DominatorTree *DT) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -106,7 +113,16 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
// OldName will be valid until erased.
StringRef OldName = BB->getName();
- // Erase basic block from the function...
+ // Erase the old block and update dominator info.
+ if (DT)
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(OnlyPred);
+ SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+ for (auto *DI : Children)
+ DT->changeImmediateDominator(DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
// ScalarEvolution holds references to loop exit blocks.
if (SE) {
@@ -126,6 +142,35 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
return OnlyPred;
}
+/// Check if unrolling created a situation where we need to insert phi nodes to
+/// preserve LCSSA form.
+/// \param Blocks is a vector of basic blocks representing unrolled loop.
+/// \param L is the outer loop.
+/// It's possible that some of the blocks are in L, and some are not. In this
+/// case, if there is a use is outside L, and definition is inside L, we need to
+/// insert a phi-node, otherwise LCSSA will be broken.
+/// The function is just a helper function for llvm::UnrollLoop that returns
+/// true if this situation occurs, indicating that LCSSA needs to be fixed.
+static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
+ LoopInfo *LI) {
+ for (BasicBlock *BB : Blocks) {
+ if (LI->getLoopFor(BB) == L)
+ continue;
+ for (Instruction &I : *BB) {
+ for (Use &U : I.operands()) {
+ if (auto Def = dyn_cast<Instruction>(U)) {
+ Loop *DefLoop = LI->getLoopFor(Def->getParent());
+ if (!DefLoop)
+ continue;
+ if (DefLoop->contains(L))
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
@@ -155,7 +200,7 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC,
@@ -218,20 +263,48 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool CompletelyUnroll = Count == TripCount;
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
- Loop *ParentL = L->getParentLoop();
- bool AllExitsAreInsideParentLoop = !ParentL ||
- std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
- [&](BasicBlock *BB) { return ParentL->contains(BB); });
+ std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
+
+ // Go through all exits of L and see if there are any phi-nodes there. We just
+ // conservatively assume that they're inserted to preserve LCSSA form, which
+ // means that complete unrolling might break this form. We need to either fix
+ // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
+ // now we just recompute LCSSA for the outer loop, but it should be possible
+ // to fix it in-place.
+ bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
+ std::any_of(ExitBlocks.begin(), ExitBlocks.end(),
+ [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
// flag is specified.
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
- if (RuntimeTripCount &&
- !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
- PreserveLCSSA))
- return false;
+ // Loops containing convergent instructions must have a count that divides
+ // their TripMultiple.
+ DEBUG(
+ {
+ bool HasConvergent = false;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto CS = CallSite(&I))
+ HasConvergent |= CS.isConvergent();
+ assert((!HasConvergent || TripMultiple % Count == 0) &&
+ "Unroll count must divide trip multiple if loop contains a "
+ "convergent operation.");
+ });
+ // Don't output the runtime loop remainder if Count is a multiple of
+ // TripMultiple. Such a remainder is never needed, and is unsafe if the loop
+ // contains a convergent instruction.
+ if (RuntimeTripCount && TripMultiple % Count != 0 &&
+ !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
+ UnrollRuntimeEpilog, LI, SE, DT,
+ PreserveLCSSA)) {
+ if (Force)
+ RuntimeTripCount = false;
+ else
+ return false;
+ }
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
@@ -308,6 +381,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+ std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -349,13 +423,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (*BB == Header)
// Loop over all of the PHI nodes in the block, changing them to use
// the incoming values from the previous block.
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+ for (PHINode *OrigPHI : OrigPHINode) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
if (It > 1 && L->contains(InValI))
InVal = LastValueMap[InValI];
- VMap[OrigPHINode[i]] = InVal;
+ VMap[OrigPHI] = InVal;
New->getInstList().erase(NewPHI);
}
@@ -366,11 +440,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
LastValueMap[VI->first] = VI->second;
// Add phi entries for newly created values to all exit blocks.
- for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
- SI != SE; ++SI) {
- if (L->contains(*SI))
+ for (BasicBlock *Succ : successors(*BB)) {
+ if (L->contains(Succ))
continue;
- for (BasicBlock::iterator BBI = (*SI)->begin();
+ for (BasicBlock::iterator BBI = Succ->begin();
PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
Value *Incoming = phi->getIncomingValueForBlock(*BB);
ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
@@ -387,18 +460,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Latches.push_back(New);
NewBlocks.push_back(New);
+ UnrolledLoopBlocks.push_back(New);
+
+ // Update DomTree: since we just copy the loop body, and each copy has a
+ // dedicated entry block (copy of the header block), this header's copy
+ // dominates all copied blocks. That means, dominance relations in the
+ // copied body are the same as in the original body.
+ if (DT) {
+ if (*BB == Header)
+ DT->addNewBlock(New, Latches[It - 1]);
+ else {
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
+ }
+ }
}
// Remap all instructions in the most recent iteration
- for (unsigned i = 0; i < NewBlocks.size(); ++i)
- for (BasicBlock::iterator I = NewBlocks[i]->begin(),
- E = NewBlocks[i]->end(); I != E; ++I)
- ::RemapInstruction(&*I, LastValueMap);
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ ::remapInstruction(&I, LastValueMap);
}
// Loop over the PHI nodes in the original block, setting incoming values.
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *PN = OrigPHINode[i];
+ for (PHINode *PN : OrigPHINode) {
if (CompletelyUnroll) {
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
@@ -453,11 +541,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Remove phi operands at this loop exit
if (Dest != LoopExit) {
BasicBlock *BB = Latches[i];
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
- SI != SE; ++SI) {
- if (*SI == Headers[i])
+ for (BasicBlock *Succ: successors(BB)) {
+ if (Succ == Headers[i])
continue;
- for (BasicBlock::iterator BBI = (*SI)->begin();
+ for (BasicBlock::iterator BBI = Succ->begin();
PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
Phi->removeIncomingValue(BB, false);
}
@@ -468,16 +555,43 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Term->eraseFromParent();
}
}
+ // Update dominators of blocks we might reach through exits.
+ // Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can now reach it from the copied
+ // iterations too. Thus, the new idom of the block will be the nearest
+ // common dominator of the previous idom and common dominator of all copies of
+ // the previous idom. This is equivalent to the nearest common dominator of
+ // the previous idom and the first latch, which dominates all copies of the
+ // previous idom.
+ if (DT && Count > 1) {
+ for (auto *BB : OriginalLoopBlocks) {
+ auto *BBDomNode = DT->getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->getChildren()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
+ for (auto *ChildBB : ChildrenToUpdate)
+ DT->changeImmediateDominator(ChildBB, NewIDom);
+ }
+ }
// Merge adjacent basic blocks, if possible.
SmallPtrSet<Loop *, 4> ForgottenLoops;
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+ for (BasicBlock *Latch : Latches) {
+ BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
- ForgottenLoops))
+ if (BasicBlock *Fold =
+ foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) {
+ // Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
+ UnrolledLoopBlocks.end(), Dest),
+ UnrolledLoopBlocks.end());
+ }
}
}
@@ -485,10 +599,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// whole function's cache.
AC->clear();
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- // Incrementally updating domtree after loop unrolling would be easy.
- if (DT)
+ // FIXME: We only preserve DT info for complete unrolling now. Incrementally
+ // updating domtree after partial loop unrolling should also be easy.
+ if (DT && !CompletelyUnroll)
DT->recalculate(*L->getHeader()->getParent());
+ else if (DT)
+ DEBUG(DT->verifyDomTree());
// Simplify any new induction variables in the partially unrolled loop.
if (SE && !CompletelyUnroll) {
@@ -508,19 +624,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// go.
const DataLayout &DL = Header->getModule()->getDataLayout();
const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
- for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
- BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
- for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+ for (BasicBlock *BB : NewLoopBlocks) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = &*I++;
- if (isInstructionTriviallyDead(Inst))
- (*BB)->getInstList().erase(Inst);
- else if (Value *V = SimplifyInstruction(Inst, DL))
- if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ if (Value *V = SimplifyInstruction(Inst, DL))
+ if (LI->replacementPreservesLCSSAForm(Inst, V))
Inst->replaceAllUsesWith(V);
- (*BB)->getInstList().erase(Inst);
- }
+ if (isInstructionTriviallyDead(Inst))
+ BB->getInstList().erase(Inst);
}
+ }
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
@@ -530,6 +644,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (CompletelyUnroll)
LI->markAsRemoved(L);
+ // After complete unrolling most of the blocks should be contained in OuterL.
+ // However, some of them might happen to be out of OuterL (e.g. if they
+ // precede a loop exit). In this case we might need to insert PHI nodes in
+ // order to preserve LCSSA form.
+ // We don't need to check this if we already know that we need to fix LCSSA
+ // form.
+ // TODO: For now we just recompute LCSSA for the outer loop in this case, but
+ // it should be possible to fix it in-place.
+ if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
+ NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
+
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
// at least one layer outside of the loop that was unrolled so that any
@@ -538,7 +663,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
- bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
+ simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
@@ -548,7 +673,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
while (OuterL->getParentLoop() != LatchLoop)
OuterL = OuterL->getParentLoop();
- if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
+ if (NeedToFixLCSSA)
formLCSSARecursively(*OuterL, *DT, LI, SE);
else
assert(OuterL->isLCSSAForm(*DT) &&
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0d68f18ad0e5e..861a50cf354d8 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -16,8 +16,8 @@
// case, we need to generate code to execute these 'left over' iterations.
//
// The current strategy generates an if-then-else sequence prior to the
-// unrolled loop to execute the 'left over' iterations. Other strategies
-// include generate a loop before or after the unrolled loop.
+// unrolled loop to execute the 'left over' iterations before or after the
+// unrolled loop.
//
//===----------------------------------------------------------------------===//
@@ -60,91 +60,220 @@ STATISTIC(NumRuntimeUnrolled,
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
- BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
- BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *PrologExit, BasicBlock *PreHeader,
+ BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
+ DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
// The new PHI node is inserted in the prolog end basic block.
- // The new PHI name is added as an operand of a PHI node in either
+ // The new PHI node value is added as an operand of a PHI node in either
// the loop header or the loop exit block.
- for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
- SBI != SBE; ++SBI) {
- for (BasicBlock::iterator BBI = (*SBI)->begin();
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
-
+ for (BasicBlock *Succ : successors(Latch)) {
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
- PrologEnd->getTerminator());
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(PN)) {
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
+ PreHeader);
} else {
- NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
}
Value *V = PN->getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (L->contains(I)) {
- V = VMap[I];
+ V = VMap.lookup(I);
}
}
// Adding a value to the new PHI node from the last prolog block
// that was created.
- NewPN->addIncoming(V, LastPrologBB);
+ NewPN->addIncoming(V, PrologLatch);
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
if (L->contains(PN)) {
- PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
} else {
- PN->addIncoming(NewPN, PrologEnd);
+ PN->addIncoming(NewPN, PrologExit);
}
}
}
- // Create a branch around the orignal loop, which is taken if there are no
+ // Create a branch around the original loop, which is taken if there are no
// iterations remaining to be executed after running the prologue.
- Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *InsertPt = PrologExit->getTerminator();
IRBuilder<> B(InsertPt);
assert(Count != 0 && "nonsensical Count!");
- // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
- // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
- // and all of the iterations of this loop were executed by the prologue. Note
- // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+ // This means %xtraiter is (BECount + 1) and all of the iterations of this
+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)
+ // then (BECount + 1) cannot unsigned-overflow.
Value *BrLoopExit =
B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, Exit, NewPH);
+ B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
+ InsertPt->eraseFromParent();
+}
+
+/// Connect the unrolling epilog code to the original loop.
+/// The unrolling epilog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit to combine
+/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI operands in the epilog loop by the new PHI nodes
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+///
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
+ BasicBlock *Exit, BasicBlock *PreHeader,
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Loop structure should be the following:
+ //
+ // PreHeader
+ // NewPreHeader
+ // Header
+ // ...
+ // Latch
+ // NewExit (PN)
+ // EpilogPreHeader
+ // EpilogHeader
+ // ...
+ // EpilogLatch
+ // Exit (EpilogPN)
+
+ // Update PHI nodes at NewExit and Exit.
+ for (Instruction &BBI : *NewExit) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // PN should be used in another PHI located in Exit block as
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
+ // Basicaly it should look like:
+ // NewExit:
+ // PN = PHI [I, Latch]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, EpilogPreHeader]
+ //
+ // There is EpilogPreHeader incoming block instead of NewExit as
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
+ assert(PN->hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
+
+ // Add incoming PreHeader from branch around the Loop
+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I && L->contains(I))
+ // If value comes from an instruction in the loop add VMap value.
+ V = VMap.lookup(I);
+ // For the instruction out of the loop, constant or undefined value
+ // insert value itself.
+ EpilogPN->addIncoming(V, EpilogLatch);
+
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
+ "EpilogPN should have EpilogPreHeader incoming block");
+ // Change EpilogPreHeader incoming block to NewExit.
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
+ NewExit);
+ // Now PHIs should look like:
+ // NewExit:
+ // PN = PHI [I, Latch], [undef, PreHeader]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
+ }
+
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
+ // Update corresponding PHI nodes in epilog loop.
+ for (BasicBlock *Succ : successors(Latch)) {
+ // Skip this as we already updated phis in exit blocks.
+ if (!L->contains(Succ))
+ continue;
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // Add new PHI nodes to the loop exit block and update epilog
+ // PHIs with the new PHI values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ NewExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the unrolling loop preheader.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Adding a value to the new PHI node from the unrolling loop latch.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
+
+ // Update the existing PHI node operand with the value from the new PHI
+ // node. Corresponding instruction in epilog loop should be PHI.
+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);
+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ }
+ }
+
+ Instruction *InsertPt = NewExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ assert(Exit && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolling loop)
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
}
/// Create a clone of the blocks in a loop and connect them together.
-/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
-/// loop will be created including all cloned blocks, and the iterator of it
-/// switches to count NewIter down to 0.
+/// If CreateRemainderLoop is false, loop structure will not be cloned,
+/// otherwise a new loop will be created including all cloned blocks, and the
+/// iterator of it switches to count NewIter down to 0.
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
+/// If loop structure is cloned InsertTop should be new preheader, InsertBot
+/// new loop exit.
///
-static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
+static void CloneLoopBlocks(Loop *L, Value *NewIter,
+ const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder,
BasicBlock *InsertTop, BasicBlock *InsertBot,
+ BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
- BasicBlock *Preheader = L->getLoopPreheader();
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
- if (!UnrollProlog) {
+ if (CreateRemainderLoop) {
NewLoop = new Loop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
if (NewLoop)
@@ -176,19 +305,20 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// For the first block, add a CFG connection to this newly
// created block.
InsertTop->getTerminator()->setSuccessor(0, NewBB);
-
}
+
if (Latch == *BB) {
- // For the last block, if UnrollProlog is true, create a direct jump to
- // InsertBot. If not, create a loop back to cloned head.
+ // For the last block, if CreateRemainderLoop is false, create a direct
+ // jump to InsertBot. If not, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (UnrollProlog) {
+ if (!CreateRemainderLoop) {
Builder.CreateBr(InsertBot);
} else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
FirstLoopBB->getFirstNonPHI());
Value *IdxSub =
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (UnrollProlog) {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ if (!CreateRemainderLoop) {
+ if (UseEpilogRemainder) {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ NewPHI->removeIncomingValue(Latch, false);
+ } else {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ }
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
NewPHI->setIncomingBlock(idx, InsertTop);
@@ -217,8 +353,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
idx = NewPHI->getBasicBlockIndex(Latch);
Value *InVal = NewPHI->getIncomingValue(idx);
NewPHI->setIncomingBlock(idx, NewLatch);
- if (VMap[InVal])
- NewPHI->setIncomingValue(idx, VMap[InVal]);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
}
}
if (NewLoop) {
@@ -254,11 +390,11 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
}
}
-/// Insert code in the prolog code when unrolling a loop with a
+/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
-/// of loop bodes in the loop after unrolling. (Some folks refer
+/// of loop bodies in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
@@ -266,37 +402,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
+/// ***Prolog case***
/// extraiters = tripcount % loopfactor
/// if (extraiters == 0) jump Loop:
-/// else jump Prol
+/// else jump Prol:
/// Prol: LoopBody;
/// extraiters -= 1 // Omitted if unroll factor is 2.
/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
-/// if (tripcount < loopfactor) jump End
+/// if (tripcount < loopfactor) jump End:
/// Loop:
/// ...
/// End:
///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA) {
+/// ***Epilog case***
+/// extraiters = tripcount % loopfactor
+/// if (tripcount < loopfactor) jump LoopExit:
+/// unroll_iters = tripcount - extraiters
+/// Loop: LoopBody; (executes unroll_iter times);
+/// unroll_iter -= 1
+/// if (unroll_iter != 0) jump Loop:
+/// LoopExit:
+/// if (extraiters == 0) jump EpilExit:
+/// Epil: LoopBody; (executes extraiters times)
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
+/// EpilExit:
+
+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
+ if (!L->isLoopSimplifyForm())
+ return false;
+ BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
+ if (!Exit)
return false;
- // Use Scalar Evolution to compute the trip count. This allows more
- // loops to be unrolled than relying on induction var simplification
+ // Use Scalar Evolution to compute the trip count. This allows more loops to
+ // be unrolled than relying on induction var simplification.
if (!SE)
return false;
- // Only unroll loops with a computable trip count and the trip count needs
- // to be an int value (allowing a pointer type is a TODO item)
+ // Only unroll loops with a computable trip count, and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item).
const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
@@ -304,21 +459,19 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
- // Add 1 since the backedge count doesn't include the first loop iteration
+ // Add 1 since the backedge count doesn't include the first loop iteration.
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
BasicBlock *Header = L->getHeader();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
- if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
- return false;
-
- // We only handle cases when the unroll factor is a power of 2.
- // Count is the loop unroll factor, the number of extra copies added + 1.
- if (!isPowerOf2_32(Count))
+ if (!AllowExpensiveTripCount &&
+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
return false;
// This constraint lets us deal with an overflowing trip count easily; see the
@@ -326,51 +479,115 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
if (Log2_32(Count) > BEWidth)
return false;
- // If this loop is nested, then the loop unroller changes the code in
- // parent loop, so the Scalar Evolution pass needs to be run again
+ // If this loop is nested, then the loop unroller changes the code in the
+ // parent loop, so the Scalar Evolution pass needs to be run again.
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
- BasicBlock *PH = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
- // It helps to splits the original preheader twice, one for the end of the
- // prolog code and one for a new loop preheader
- BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
- BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ // Loop structure is the following:
+ //
+ // PreHeader
+ // Header
+ // ...
+ // Latch
+ // Exit
+
+ BasicBlock *NewPreHeader;
+ BasicBlock *NewExit = nullptr;
+ BasicBlock *PrologExit = nullptr;
+ BasicBlock *EpilogPreHeader = nullptr;
+ BasicBlock *PrologPreHeader = nullptr;
+
+ if (UseEpilogRemainder) {
+ // If epilog remainder
+ // Split PreHeader to insert a branch around loop for unrolling.
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ // Split Exit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
+ DT, LI, PreserveLCSSA);
+ // Split NewExit to insert epilog remainder loop.
+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+ } else {
+ // If prolog remainder
+ // Split the original preheader twice to insert prolog remainder loop
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
+ DT, LI);
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
+ // Split PrologExit to get NewPreHeader.
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ }
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // *NewPreHeader *PrologPreHeader
+ // Header *PrologExit
+ // ... *NewPreHeader
+ // Latch Header
+ // *NewExit ...
+ // *EpilogPreHeader Latch
+ // Exit Exit
+
+ // Calculate conditions for branch around loop for unrolling
+ // in epilog case and around prolog remainder loop in prolog case.
// Compute the number of extra iterations required, which is:
- // extra iterations = run-time trip count % (loop unroll factor + 1)
+ // extra iterations = run-time trip count % loop unroll factor
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
-
IRBuilder<> B(PreHeaderBR);
- Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
-
- // If ModVal is zero, we know that either
- // 1. there are no iteration to be run in the prologue loop
- // OR
- // 2. the addition computing TripCount overflowed
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
- // number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
-
- Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
-
- // Branch to either the extra iterations or the cloned/unrolled loop
- // We will fix up the true branch label when adding loop body copies
- B.CreateCondBr(BranchVal, PEnd, PEnd);
- assert(PreHeaderBR->isUnconditional() &&
- PreHeaderBR->getSuccessor(0) == PEnd &&
- "CFG edges in Preheader are not correct");
+ Value *ModVal;
+ // Calculate ModVal = (BECount + 1) % Count.
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count)) {
+ // When Count is power of 2 we don't BECount for epilog case, however we'll
+ // need it for a branch around unrolling loop for prolog case.
+ ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+ } else {
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Value *ModValTmp = B.CreateURem(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count));
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ ModVal = B.CreateURem(ModValAdd,
+ ConstantInt::get(BECount->getType(), Count),
+ "xtraiter");
+ }
+ Value *BranchVal =
+ UseEpilogRemainder ? B.CreateICmpULT(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count - 1)) :
+ B.CreateIsNotNull(ModVal, "lcmp.mod");
+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
- // cloned blocks in the prolog code
+ // cloned blocks in the prolog/epilog code
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -382,34 +599,80 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- bool UnrollPrologue = Count == 2;
+ // For unroll factor 2 remainder loop will have 1 iterations.
+ // Do not create 1 iteration loop.
+ bool CreateRemainderLoop = (Count != 2);
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
- CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
- VMap, LI);
-
- // Insert the cloned blocks into function just before the original loop
- F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
-
- // Rewrite the cloned instruction operands to use the values
- // created when the clone is created.
- for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
- for (BasicBlock::iterator I = NewBlocks[i]->begin(),
- E = NewBlocks[i]->end();
- I != E; ++I) {
- RemapInstruction(&*I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
+ CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+
+ // Insert the cloned blocks into the function.
+ F->getBasicBlockList().splice(InsertBot->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(),
+ F->end());
+
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // NewPreHeader PrologPreHeader
+ // Header PrologHeader
+ // ... ...
+ // Latch PrologLatch
+ // NewExit PrologExit
+ // EpilogPreHeader NewPreHeader
+ // EpilogHeader Header
+ // ... ...
+ // EpilogLatch Latch
+ // Exit Exit
+
+ // Rewrite the cloned instruction operands to use the values created when the
+ // clone is created.
+ for (BasicBlock *BB : NewBlocks) {
+ for (Instruction &I : *BB) {
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
}
}
- // Connect the prolog code to the original loop and update the
- // PHI functions.
- BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
- PreserveLCSSA);
+ if (UseEpilogRemainder) {
+ // Connect the epilog code to the original loop and update the
+ // PHI functions.
+ ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,
+ PreserveLCSSA);
+
+ // Update counter in loop for unrolling.
+ // I should be multiply of Count.
+ IRBuilder<> B2(NewPreHeader->getTerminator());
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ B2.SetInsertPoint(LatchBR);
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
+ Header->getFirstNonPHI());
+ Value *IdxSub =
+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".nsub");
+ Value *IdxCmp;
+ if (LatchBR->getSuccessor(0) == Header)
+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
+ else
+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(TestVal, NewPreHeader);
+ NewIdx->addIncoming(IdxSub, Latch);
+ LatchBR->setCondition(IdxCmp);
+ } else {
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
+ VMap, DT, LI, PreserveLCSSA);
+ }
NumRuntimeUnrolled++;
return true;
}
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index fa958e913b7bd..3902c67c6a013 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -11,13 +11,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -423,7 +430,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
default:
return InstDesc(false, I);
case Instruction::PHI:
- return InstDesc(I, Prev.getMinMaxKind());
+ return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
case Instruction::Sub:
case Instruction::Add:
return InstDesc(Kind == RK_IntegerAdd, I);
@@ -466,12 +473,10 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes) {
- bool HasFunNoNaNAttr = false;
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
- if (F.hasFnAttribute("no-nans-fp-math"))
- HasFunNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+ bool HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
@@ -514,6 +519,43 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
+bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT) {
+
+ // Ensure the phi node is in the loop header and has two incoming values.
+ if (Phi->getParent() != TheLoop->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Ensure the loop has a preheader and a single latch block. The loop
+ // vectorizer will need the latch to set up the next iteration of the loop.
+ auto *Preheader = TheLoop->getLoopPreheader();
+ auto *Latch = TheLoop->getLoopLatch();
+ if (!Preheader || !Latch)
+ return false;
+
+ // Ensure the phi node's incoming blocks are the loop preheader and latch.
+ if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+ Phi->getBasicBlockIndex(Latch) < 0)
+ return false;
+
+ // Get the previous value. The previous value comes from the latch edge while
+ // the initial value comes form the preheader edge.
+ auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
+ return false;
+
+ // Ensure every user of the phi node is dominated by the previous value. The
+ // dominance requirement ensures the loop vectorizer will not need to
+ // vectorize the initial value prior to the first iteration of the loop.
+ for (User *U : Phi->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ if (!DT->dominates(Previous, I))
+ return false;
+
+ return true;
+}
+
/// This function returns the identity element (or neutral element) for
/// the operation K.
Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
@@ -612,61 +654,120 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- ConstantInt *Step)
- : StartValue(Start), IK(K), StepValue(Step) {
+ const SCEV *Step)
+ : StartValue(Start), IK(K), Step(Step) {
assert(IK != IK_NoInduction && "Not an induction");
+
+ // Start value type should match the induction kind and the value
+ // itself should not be null.
assert(StartValue && "StartValue is null");
- assert(StepValue && !StepValue->isZero() && "StepValue is zero");
assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
"StartValue is not a pointer for pointer induction");
assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
"StartValue is not an integer for integer induction");
- assert(StepValue->getType()->isIntegerTy() &&
- "StepValue is not an integer");
+
+ // Check the Step Value. It should be non-zero integer value.
+ assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+ "Step value is zero");
+
+ assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+ "Step value should be constant for pointer induction");
+ assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
}
int InductionDescriptor::getConsecutiveDirection() const {
- if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
- return StepValue->getSExtValue();
+ ConstantInt *ConstStep = getConstIntStepValue();
+ if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+ return ConstStep->getSExtValue();
return 0;
}
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+ if (isa<SCEVConstant>(Step))
+ return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+ return nullptr;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
+ ScalarEvolution *SE,
+ const DataLayout& DL) const {
+
+ SCEVExpander Exp(*SE, DL, "induction");
switch (IK) {
- case IK_IntInduction:
+ case IK_IntInduction: {
assert(Index->getType() == StartValue->getType() &&
"Index type does not match StartValue type");
- if (StepValue->isMinusOne())
- return B.CreateSub(StartValue, Index);
- if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
- return B.CreateAdd(StartValue, Index);
- case IK_PtrInduction:
- assert(Index->getType() == StepValue->getType() &&
+ // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
+ // and calculate (Start + Index * Step) for all cases, without
+ // special handling for "isOne" and "isMinusOne".
+ // But in the real life the result code getting worse. We mix SCEV
+ // expressions and ADD/SUB operations and receive redundant
+ // intermediate values being calculated in different ways and
+ // Instcombine is unable to reduce them all.
+
+ if (getConstIntStepValue() &&
+ getConstIntStepValue()->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ if (getConstIntStepValue() &&
+ getConstIntStepValue()->isOne())
+ return B.CreateAdd(StartValue, Index);
+ const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
+ SE->getMulExpr(Step, SE->getSCEV(Index)));
+ return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+ }
+ case IK_PtrInduction: {
+ assert(Index->getType() == Step->getType() &&
"Index type does not match StepValue type");
- if (StepValue->isMinusOne())
- Index = B.CreateNeg(Index);
- else if (!StepValue->isOne())
- Index = B.CreateMul(Index, StepValue);
+ assert(isa<SCEVConstant>(Step) &&
+ "Expected constant step for pointer induction");
+ const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
+ Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
return B.CreateGEP(nullptr, StartValue, Index);
-
+ }
case IK_NoInduction:
return nullptr;
}
llvm_unreachable("invalid enum");
}
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
- InductionDescriptor &D) {
+bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+ PredicatedScalarEvolution &PSE,
+ InductionDescriptor &D,
+ bool Assume) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ const SCEV *PhiScev = PSE.getSCEV(Phi);
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ // We need this expression to be an AddRecExpr.
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Phi);
+
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ return isInductionPHI(Phi, PSE.getSE(), D, AR);
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+ ScalarEvolution *SE,
+ InductionDescriptor &D,
+ const SCEV *Expr) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
return false;
// Check that the PHI is consecutive.
- const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
if (!AR) {
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return false;
@@ -678,17 +779,22 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
- const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
+ // The stride may be a constant or a loop invariant integer value.
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
return false;
- ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, CV);
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step);
return true;
}
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ // Pointer induction should be a constant.
+ if (!ConstStep)
+ return false;
+
+ ConstantInt *CV = ConstStep->getValue();
Type *PointerElementType = PhiTy->getPointerElementType();
// The pointer stride cannot be determined if the pointer element type is not
// sized.
@@ -703,8 +809,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
- auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
-
+ auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
+ true /* signed */);
D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
return true;
}
@@ -727,3 +833,137 @@ SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
return UsedOutside;
}
+
+void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
+ // By definition, all loop passes need the LoopInfo analysis and the
+ // Dominator tree it depends on. Because they all participate in the loop
+ // pass manager, they must also preserve these.
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
+ // here because users shouldn't directly get them from this header.
+ extern char &LoopSimplifyID;
+ extern char &LCSSAID;
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+
+ // Loop passes are designed to run inside of a loop pass manager which means
+ // that any function analyses they require must be required by the first loop
+ // pass in the manager (so that it is computed before the loop pass manager
+ // runs) and preserved by all loop pasess in the manager. To make this
+ // reasonably robust, the set needed for most loop passes is maintained here.
+ // If your loop pass requires an analysis not listed here, you will need to
+ // carefully audit the loop pass manager nesting structure that results.
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+}
+
+/// Manually defined generic "LoopPass" dependency initialization. This is used
+/// to initialize the exact set of passes from above in \c
+/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
+/// with:
+///
+/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
+///
+/// As-if "LoopPass" were a pass.
+void llvm::initializeLoopPassPass(PassRegistry &Registry) {
+ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+ INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+}
+
+/// \brief Find string metadata for loop
+///
+/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
+/// operand or null otherwise. If the string metadata is not found return
+/// Optional's not-a-value.
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+ StringRef Name) {
+ MDNode *LoopID = TheLoop->getLoopID();
+ // Return none if LoopID is false.
+ if (!LoopID)
+ return None;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ // Iterate over LoopID operands and look for MDString Metadata
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+ // Return true if MDString holds expected MetaData.
+ if (Name.equals(S->getString()))
+ switch (MD->getNumOperands()) {
+ case 1:
+ return nullptr;
+ case 2:
+ return &MD->getOperand(1);
+ default:
+ llvm_unreachable("loop metadata has 0 or 1 operand");
+ }
+ }
+ return None;
+}
+
+/// Returns true if the instruction in a loop is guaranteed to execute at least
+/// once.
+bool llvm::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo) {
+ // We have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ // If there's a throw in the header block, we can't guarantee we'll reach
+ // Inst.
+ return !SafetyInfo->HeaderMayThrow;
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (SafetyInfo->MayThrow)
+ return false;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // Verify that the block dominates each of the exit blocks of the loop.
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
+ return false;
+
+ // As a degenerate case, if the loop is statically infinite then we haven't
+ // proven anything since there are no exit blocks.
+ if (ExitBlocks.empty())
+ return false;
+
+ // FIXME: In general, we have to prove that the loop isn't an infinite loop.
+ // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
+ // just a special case of this.)
+ return true;
+}
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index 9a2a06cf68915..b3c61691da30a 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -18,11 +18,18 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
+static cl::opt<bool>
+ AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true),
+ cl::Hidden,
+ cl::desc("Add no-alias annotation for instructions that "
+ "are disambiguated by memchecks"));
+
LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
DominatorTree *DT, ScalarEvolution *SE,
bool UseLAIChecks)
@@ -32,12 +39,12 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
assert(L->getLoopPreheader() && "No preheader");
if (UseLAIChecks) {
setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
- setSCEVChecks(LAI.PSE.getUnionPredicate());
+ setSCEVChecks(LAI.getPSE().getUnionPredicate());
}
}
void LoopVersioning::setAliasChecks(
- const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
AliasChecks = std::move(Checks);
}
@@ -56,9 +63,8 @@ void LoopVersioning::versionLoop(
BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
std::tie(FirstCheckInst, MemRuntimeCheck) =
LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
- assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
- const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
SCEVRuntimeCheck =
@@ -71,7 +77,7 @@ void LoopVersioning::versionLoop(
if (MemRuntimeCheck && SCEVRuntimeCheck) {
RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
- SCEVRuntimeCheck, "ldist.safe");
+ SCEVRuntimeCheck, "lver.safe");
if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
I->insertBefore(RuntimeCheckBB->getTerminator());
} else
@@ -119,16 +125,14 @@ void LoopVersioning::addPHINodes(
const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
assert(PHIBlock && "No single successor to loop exit block");
+ PHINode *PN;
+ // First add a single-operand PHI for each DefsUsedOutside if one does not
+ // exists yet.
for (auto *Inst : DefsUsedOutside) {
- auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
- PHINode *PN;
-
- // First see if we have a single-operand PHI with the value defined by the
+ // See if we have a single-operand PHI with the value defined by the
// original loop.
for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
- assert(PN->getNumOperands() == 1 &&
- "Exit block should only have on predecessor");
if (PN->getIncomingValue(0) == Inst)
break;
}
@@ -141,7 +145,179 @@ void LoopVersioning::addPHINodes(
User->replaceUsesOfWith(Inst, PN);
PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
}
- // Add the new incoming value from the non-versioned loop.
- PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock());
}
+
+ // Then for each PHI add the operand for the edge from the cloned loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ assert(PN->getNumOperands() == 1 &&
+ "Exit block should only have on predecessor");
+
+ // If the definition was cloned used that otherwise use the same value.
+ Value *ClonedValue = PN->getIncomingValue(0);
+ auto Mapped = VMap.find(ClonedValue);
+ if (Mapped != VMap.end())
+ ClonedValue = Mapped->second;
+
+ PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock());
+ }
+}
+
+void LoopVersioning::prepareNoAliasMetadata() {
+ // We need to turn the no-alias relation between pointer checking groups into
+ // no-aliasing annotations between instructions.
+ //
+ // We accomplish this by mapping each pointer checking group (a set of
+ // pointers memchecked together) to an alias scope and then also mapping each
+ // group to the list of scopes it can't alias.
+
+ const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking();
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+
+ // First allocate an aliasing scope for each pointer checking group.
+ //
+ // While traversing through the checking groups in the loop, also create a
+ // reverse map from pointers to the pointer checking group they were assigned
+ // to.
+ MDBuilder MDB(Context);
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain");
+
+ for (const auto &Group : RtPtrChecking->CheckingGroups) {
+ GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain);
+
+ for (unsigned PtrIdx : Group.Members)
+ PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group;
+ }
+
+ // Go through the checks and for each pointer group, collect the scopes for
+ // each non-aliasing pointer group.
+ DenseMap<const RuntimePointerChecking::CheckingPtrGroup *,
+ SmallVector<Metadata *, 4>>
+ GroupToNonAliasingScopes;
+
+ for (const auto &Check : AliasChecks)
+ GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]);
+
+ // Finally, transform the above to actually map to scope list which is what
+ // the metadata uses.
+
+ for (auto Pair : GroupToNonAliasingScopes)
+ GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second);
+}
+
+void LoopVersioning::annotateLoopWithNoAlias() {
+ if (!AnnotateNoAlias)
+ return;
+
+ // First prepare the maps.
+ prepareNoAliasMetadata();
+
+ // Add the scope and no-alias metadata to the instructions.
+ for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) {
+ annotateInstWithNoAlias(I);
+ }
+}
+
+void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
+ const Instruction *OrigInst) {
+ if (!AnnotateNoAlias)
+ return;
+
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+ const Value *Ptr = isa<LoadInst>(OrigInst)
+ ? cast<LoadInst>(OrigInst)->getPointerOperand()
+ : cast<StoreInst>(OrigInst)->getPointerOperand();
+
+ // Find the group for the pointer and then add the scope metadata.
+ auto Group = PtrToGroup.find(Ptr);
+ if (Group != PtrToGroup.end()) {
+ VersionedInst->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(Context, GroupToScope[Group->second])));
+
+ // Add the no-alias metadata.
+ auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second);
+ if (NonAliasingScopeList != GroupToNonAliasingScopeList.end())
+ VersionedInst->setMetadata(
+ LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_noalias),
+ NonAliasingScopeList->second));
+ }
+}
+
+namespace {
+/// \brief Also expose this is a pass. Currently this is only used for
+/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
+/// array accesses from the loop.
+class LoopVersioningPass : public FunctionPass {
+public:
+ LoopVersioningPass() : FunctionPass(ID) {
+ initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ // Build up a worklist of inner-loops to version. This is necessary as the
+ // act of versioning a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ const LoopAccessInfo &LAI = LAA->getInfo(L);
+ if (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+ LoopVersioning LVer(LAI, L, LI, DT, SE);
+ LVer.versionLoop();
+ LVer.annotateLoopWithNoAlias();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+#define LVER_OPTION "loop-versioning"
+#define DEBUG_TYPE LVER_OPTION
+
+char LoopVersioningPass::ID;
+static const char LVer_name[] = "Loop Versioning";
+
+INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopVersioningPass() {
+ return new LoopVersioningPass();
+}
}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index b0ad4d5e84a1b..1b31c5ae580a1 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -14,14 +14,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "lowerinvoke"
@@ -53,8 +52,8 @@ FunctionPass *llvm::createLowerInvokePass() {
bool LowerInvoke::runOnFunction(Function &F) {
bool Changed = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ for (BasicBlock &BB : F)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
// Insert a normal call instruction...
CallInst *NewCall = CallInst::Create(II->getCalledValue(),
@@ -69,10 +68,10 @@ bool LowerInvoke::runOnFunction(Function &F) {
BranchInst::Create(II->getNormalDest(), II);
// Remove any PHI node entries from the exception destination.
- II->getUnwindDest()->removePredecessor(&*BB);
+ II->getUnwindDest()->removePredecessor(&BB);
// Remove the invoke instruction now.
- BB->getInstList().erase(II);
+ BB.getInstList().erase(II);
++NumInvokes; Changed = true;
}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 52beb1542497e..5c07469869ff7 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -59,12 +59,6 @@ namespace {
bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // This is a cluster of orthogonal Transforms
- AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreservedID(LowerInvokePassID);
- }
-
struct CaseRange {
ConstantInt* Low;
ConstantInt* High;
@@ -192,8 +186,8 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
}
// Remove incoming values in the reverse order to prevent invalidating
// *successive* index.
- for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III)
- PN->removeIncomingValue(*III);
+ for (unsigned III : reverse(Indices))
+ PN->removeIncomingValue(III);
}
}
diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile
deleted file mode 100644
index d1e9336d67f02..0000000000000
--- a/lib/Transforms/Utils/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMTransformUtils
-BUILD_ARCHIVE = 1
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index aa1e35ddba024..1419254bcb4f4 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -12,12 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
@@ -26,51 +27,11 @@ using namespace llvm;
STATISTIC(NumPromoted, "Number of alloca's promoted");
-namespace {
- struct PromotePass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(ID) {
- initializePromotePassPass(*PassRegistry::getPassRegistry());
- }
-
- // runOnFunction - To run this pass, first we calculate the alloca
- // instructions that are safe for promotion, then we promote each one.
- //
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
- // This is a cluster of orthogonal Transforms
- AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreservedID(LowerSwitchID);
- AU.addPreservedID(LowerInvokePassID);
- }
- };
-} // end of anonymous namespace
-
-char PromotePass::ID = 0;
-INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
- false, false)
-
-bool PromotePass::runOnFunction(Function &F) {
- std::vector<AllocaInst*> Allocas;
-
- BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
-
- if (F.hasFnAttribute(Attribute::OptimizeNone))
- return false;
-
- bool Changed = false;
-
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
+ AssumptionCache &AC) {
+ std::vector<AllocaInst *> Allocas;
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ bool Changed = false;
while (1) {
Allocas.clear();
@@ -78,22 +39,69 @@ bool PromotePass::runOnFunction(Function &F) {
// Find allocas that are safe to promote, by looking at all instructions in
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
if (isAllocaPromotable(AI))
Allocas.push_back(AI);
- if (Allocas.empty()) break;
+ if (Allocas.empty())
+ break;
PromoteMemToReg(Allocas, DT, nullptr, &AC);
NumPromoted += Allocas.size();
Changed = true;
}
-
return Changed;
}
+PreservedAnalyses PromotePass::run(Function &F, AnalysisManager<Function> &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ if (!promoteMemoryToRegister(F, DT, AC))
+ return PreservedAnalyses::all();
+
+ // FIXME: This should also 'preserve the CFG'.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct PromoteLegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromoteLegacyPass() : FunctionPass(ID) {
+ initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ return promoteMemoryToRegister(F, DT, AC);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ }
+ };
+} // end of anonymous namespace
+
+char PromoteLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
+ "Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
//
FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
- return new PromotePass();
+ return new PromoteLegacyPass();
}
diff --git a/lib/Transforms/Utils/MemorySSA.cpp b/lib/Transforms/Utils/MemorySSA.cpp
new file mode 100644
index 0000000000000..8ba3cae43b188
--- /dev/null
+++ b/lib/Transforms/Utils/MemorySSA.cpp
@@ -0,0 +1,1361 @@
+//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the MemorySSA class.
+//
+//===----------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/MemorySSA.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "memoryssa"
+using namespace llvm;
+STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups");
+STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits");
+STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts");
+
+INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
+ true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
+ true)
+
+INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",
+ "Memory SSA Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
+ "Memory SSA Printer", false, false)
+
+static cl::opt<bool>
+ VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
+ cl::desc("Verify MemorySSA in legacy printer pass."));
+
+namespace llvm {
+/// \brief An assembly annotator class to print Memory SSA information in
+/// comments.
+class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
+ friend class MemorySSA;
+ const MemorySSA *MSSA;
+
+public:
+ MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
+
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
+ OS << "; " << *MA << "\n";
+ }
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+ OS << "; " << *MA << "\n";
+ }
+};
+
+/// \brief A MemorySSAWalker that does AA walks and caching of lookups to
+/// disambiguate accesses.
+///
+/// FIXME: The current implementation of this can take quadratic space in rare
+/// cases. This can be fixed, but it is something to note until it is fixed.
+///
+/// In order to trigger this behavior, you need to store to N distinct locations
+/// (that AA can prove don't alias), perform M stores to other memory
+/// locations that AA can prove don't alias any of the initial N locations, and
+/// then load from all of the N locations. In this case, we insert M cache
+/// entries for each of the N loads.
+///
+/// For example:
+/// define i32 @foo() {
+/// %a = alloca i32, align 4
+/// %b = alloca i32, align 4
+/// store i32 0, i32* %a, align 4
+/// store i32 0, i32* %b, align 4
+///
+/// ; Insert M stores to other memory that doesn't alias %a or %b here
+///
+/// %c = load i32, i32* %a, align 4 ; Caches M entries in
+/// ; CachedUpwardsClobberingAccess for the
+/// ; MemoryLocation %a
+/// %d = load i32, i32* %b, align 4 ; Caches M entries in
+/// ; CachedUpwardsClobberingAccess for the
+/// ; MemoryLocation %b
+///
+/// ; For completeness' sake, loading %a or %b again would not cache *another*
+/// ; M entries.
+/// %r = add i32 %c, %d
+/// ret i32 %r
+/// }
+class MemorySSA::CachingWalker final : public MemorySSAWalker {
+public:
+ CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
+ ~CachingWalker() override;
+
+ MemoryAccess *getClobberingMemoryAccess(const Instruction *) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+ MemoryLocation &) override;
+ void invalidateInfo(MemoryAccess *) override;
+
+protected:
+ struct UpwardsMemoryQuery;
+ MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &,
+ const MemoryLocation &);
+
+ void doCacheInsert(const MemoryAccess *, MemoryAccess *,
+ const UpwardsMemoryQuery &, const MemoryLocation &);
+
+ void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &,
+ const MemoryLocation &);
+
+private:
+ MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &,
+ UpwardsMemoryQuery &, bool);
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+ bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &,
+ const MemoryLocation &Loc) const;
+ void verifyRemoved(MemoryAccess *);
+ SmallDenseMap<ConstMemoryAccessPair, MemoryAccess *>
+ CachedUpwardsClobberingAccess;
+ DenseMap<const MemoryAccess *, MemoryAccess *> CachedUpwardsClobberingCall;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+};
+}
+
+namespace {
+struct RenamePassData {
+ DomTreeNode *DTN;
+ DomTreeNode::const_iterator ChildIt;
+ MemoryAccess *IncomingVal;
+
+ RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
+ MemoryAccess *M)
+ : DTN(D), ChildIt(It), IncomingVal(M) {}
+ void swap(RenamePassData &RHS) {
+ std::swap(DTN, RHS.DTN);
+ std::swap(ChildIt, RHS.ChildIt);
+ std::swap(IncomingVal, RHS.IncomingVal);
+ }
+};
+}
+
+namespace llvm {
+/// \brief Rename a single basic block into MemorySSA form.
+/// Uses the standard SSA renaming algorithm.
+/// \returns The new incoming value.
+MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
+ MemoryAccess *IncomingVal) {
+ auto It = PerBlockAccesses.find(BB);
+ // Skip most processing if the list is empty.
+ if (It != PerBlockAccesses.end()) {
+ AccessList *Accesses = It->second.get();
+ for (MemoryAccess &L : *Accesses) {
+ switch (L.getValueID()) {
+ case Value::MemoryUseVal:
+ cast<MemoryUse>(&L)->setDefiningAccess(IncomingVal);
+ break;
+ case Value::MemoryDefVal:
+ // We can't legally optimize defs, because we only allow single
+ // memory phis/uses on operations, and if we optimize these, we can
+ // end up with multiple reaching defs. Uses do not have this
+ // problem, since they do not produce a value
+ cast<MemoryDef>(&L)->setDefiningAccess(IncomingVal);
+ IncomingVal = &L;
+ break;
+ case Value::MemoryPhiVal:
+ IncomingVal = &L;
+ break;
+ }
+ }
+ }
+
+ // Pass through values to our successors
+ for (const BasicBlock *S : successors(BB)) {
+ auto It = PerBlockAccesses.find(S);
+ // Rename the phi nodes in our successor block
+ if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
+ continue;
+ AccessList *Accesses = It->second.get();
+ auto *Phi = cast<MemoryPhi>(&Accesses->front());
+ Phi->addIncoming(IncomingVal, BB);
+ }
+
+ return IncomingVal;
+}
+
+/// \brief This is the standard SSA renaming algorithm.
+///
+/// We walk the dominator tree in preorder, renaming accesses, and then filling
+/// in phi nodes in our successors.
+void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
+ SmallPtrSet<BasicBlock *, 16> &Visited) {
+ SmallVector<RenamePassData, 32> WorkStack;
+ IncomingVal = renameBlock(Root->getBlock(), IncomingVal);
+ WorkStack.push_back({Root, Root->begin(), IncomingVal});
+ Visited.insert(Root->getBlock());
+
+ while (!WorkStack.empty()) {
+ DomTreeNode *Node = WorkStack.back().DTN;
+ DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
+ IncomingVal = WorkStack.back().IncomingVal;
+
+ if (ChildIt == Node->end()) {
+ WorkStack.pop_back();
+ } else {
+ DomTreeNode *Child = *ChildIt;
+ ++WorkStack.back().ChildIt;
+ BasicBlock *BB = Child->getBlock();
+ Visited.insert(BB);
+ IncomingVal = renameBlock(BB, IncomingVal);
+ WorkStack.push_back({Child, Child->begin(), IncomingVal});
+ }
+ }
+}
+
+/// \brief Compute dominator levels, used by the phi insertion algorithm above.
+void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) {
+ for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
+ DFI != DFE; ++DFI)
+ DomLevels[*DFI] = DFI.getPathLength() - 1;
+}
+
+/// \brief This handles unreachable block accesses by deleting phi nodes in
+/// unreachable blocks, and marking all other unreachable MemoryAccess's as
+/// being uses of the live on entry definition.
+void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
+ assert(!DT->isReachableFromEntry(BB) &&
+ "Reachable block found while handling unreachable blocks");
+
+ // Make sure phi nodes in our reachable successors end up with a
+ // LiveOnEntryDef for our incoming edge, even though our block is forward
+ // unreachable. We could just disconnect these blocks from the CFG fully,
+ // but we do not right now.
+ for (const BasicBlock *S : successors(BB)) {
+ if (!DT->isReachableFromEntry(S))
+ continue;
+ auto It = PerBlockAccesses.find(S);
+ // Rename the phi nodes in our successor block
+ if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
+ continue;
+ AccessList *Accesses = It->second.get();
+ auto *Phi = cast<MemoryPhi>(&Accesses->front());
+ Phi->addIncoming(LiveOnEntryDef.get(), BB);
+ }
+
+ auto It = PerBlockAccesses.find(BB);
+ if (It == PerBlockAccesses.end())
+ return;
+
+ auto &Accesses = It->second;
+ for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
+ auto Next = std::next(AI);
+ // If we have a phi, just remove it. We are going to replace all
+ // users with live on entry.
+ if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI))
+ UseOrDef->setDefiningAccess(LiveOnEntryDef.get());
+ else
+ Accesses->erase(AI);
+ AI = Next;
+ }
+}
+
+MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
+ : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+ NextID(0) {
+ buildMemorySSA();
+}
+
+MemorySSA::MemorySSA(MemorySSA &&MSSA)
+ : AA(MSSA.AA), DT(MSSA.DT), F(MSSA.F),
+ ValueToMemoryAccess(std::move(MSSA.ValueToMemoryAccess)),
+ PerBlockAccesses(std::move(MSSA.PerBlockAccesses)),
+ LiveOnEntryDef(std::move(MSSA.LiveOnEntryDef)),
+ Walker(std::move(MSSA.Walker)), NextID(MSSA.NextID) {
+ // Update the Walker MSSA pointer so it doesn't point to the moved-from MSSA
+ // object any more.
+ Walker->MSSA = this;
+}
+
+MemorySSA::~MemorySSA() {
+ // Drop all our references
+ for (const auto &Pair : PerBlockAccesses)
+ for (MemoryAccess &MA : *Pair.second)
+ MA.dropAllReferences();
+}
+
+MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
+ auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
+
+ if (Res.second)
+ Res.first->second = make_unique<AccessList>();
+ return Res.first->second.get();
+}
+
+void MemorySSA::buildMemorySSA() {
+ // We create an access to represent "live on entry", for things like
+ // arguments or users of globals, where the memory they use is defined before
+ // the beginning of the function. We do not actually insert it into the IR.
+ // We do not define a live on exit for the immediate uses, and thus our
+ // semantics do *not* imply that something with no immediate uses can simply
+ // be removed.
+ BasicBlock &StartingPoint = F.getEntryBlock();
+ LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
+ &StartingPoint, NextID++);
+
+ // We maintain lists of memory accesses per-block, trading memory for time. We
+ // could just look up the memory access for every possible instruction in the
+ // stream.
+ SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
+ SmallPtrSet<BasicBlock *, 32> DefUseBlocks;
+ // Go through each block, figure out where defs occur, and chain together all
+ // the accesses.
+ for (BasicBlock &B : F) {
+ bool InsertIntoDef = false;
+ AccessList *Accesses = nullptr;
+ for (Instruction &I : B) {
+ MemoryUseOrDef *MUD = createNewAccess(&I);
+ if (!MUD)
+ continue;
+ InsertIntoDef |= isa<MemoryDef>(MUD);
+
+ if (!Accesses)
+ Accesses = getOrCreateAccessList(&B);
+ Accesses->push_back(MUD);
+ }
+ if (InsertIntoDef)
+ DefiningBlocks.insert(&B);
+ if (Accesses)
+ DefUseBlocks.insert(&B);
+ }
+
+ // Compute live-in.
+ // Live in is normally defined as "all the blocks on the path from each def to
+ // each of it's uses".
+ // MemoryDef's are implicit uses of previous state, so they are also uses.
+ // This means we don't really have def-only instructions. The only
+ // MemoryDef's that are not really uses are those that are of the LiveOnEntry
+ // variable (because LiveOnEntry can reach anywhere, and every def is a
+ // must-kill of LiveOnEntry).
+ // In theory, you could precisely compute live-in by using alias-analysis to
+ // disambiguate defs and uses to see which really pair up with which.
+ // In practice, this would be really expensive and difficult. So we simply
+ // assume all defs are also uses that need to be kept live.
+ // Because of this, the end result of this live-in computation will be "the
+ // entire set of basic blocks that reach any use".
+
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(DefUseBlocks.begin(),
+ DefUseBlocks.end());
+ // Now that we have a set of blocks where a value is live-in, recursively add
+ // predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to.
+ LiveInBlockWorklist.append(pred_begin(BB), pred_end(BB));
+ }
+
+ // Determine where our MemoryPhi's should go
+ ForwardIDFCalculator IDFs(*DT);
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.setLiveInBlocks(LiveInBlocks);
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ IDFs.calculate(IDFBlocks);
+
+ // Now place MemoryPhi nodes.
+ for (auto &BB : IDFBlocks) {
+ // Insert phi node
+ AccessList *Accesses = getOrCreateAccessList(BB);
+ MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
+ ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
+ // Phi's always are placed at the front of the block.
+ Accesses->push_front(Phi);
+ }
+
+ // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
+ // filled in with all blocks.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
+
+ MemorySSAWalker *Walker = getWalker();
+
+ // Now optimize the MemoryUse's defining access to point to the nearest
+ // dominating clobbering def.
+ // This ensures that MemoryUse's that are killed by the same store are
+ // immediate users of that store, one of the invariants we guarantee.
+ for (auto DomNode : depth_first(DT)) {
+ BasicBlock *BB = DomNode->getBlock();
+ auto AI = PerBlockAccesses.find(BB);
+ if (AI == PerBlockAccesses.end())
+ continue;
+ AccessList *Accesses = AI->second.get();
+ for (auto &MA : *Accesses) {
+ if (auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ Instruction *Inst = MU->getMemoryInst();
+ MU->setDefiningAccess(Walker->getClobberingMemoryAccess(Inst));
+ }
+ }
+ }
+
+ // Mark the uses in unreachable blocks as live on entry, so that they go
+ // somewhere.
+ for (auto &BB : F)
+ if (!Visited.count(&BB))
+ markUnreachableAsLiveOnEntry(&BB);
+}
+
+MemorySSAWalker *MemorySSA::getWalker() {
+ if (Walker)
+ return Walker.get();
+
+ Walker = make_unique<CachingWalker>(this, AA, DT);
+ return Walker.get();
+}
+
+MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
+ assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
+ AccessList *Accesses = getOrCreateAccessList(BB);
+ MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
+ ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
+ // Phi's always are placed at the front of the block.
+ Accesses->push_front(Phi);
+ return Phi;
+}
+
+MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
+ MemoryAccess *Definition) {
+ assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
+ MemoryUseOrDef *NewAccess = createNewAccess(I);
+ assert(
+ NewAccess != nullptr &&
+ "Tried to create a memory access for a non-memory touching instruction");
+ NewAccess->setDefiningAccess(Definition);
+ return NewAccess;
+}
+
+MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I,
+ MemoryAccess *Definition,
+ const BasicBlock *BB,
+ InsertionPlace Point) {
+ MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+ auto *Accesses = getOrCreateAccessList(BB);
+ if (Point == Beginning) {
+ // It goes after any phi nodes
+ auto AI = std::find_if(
+ Accesses->begin(), Accesses->end(),
+ [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
+
+ Accesses->insert(AI, NewAccess);
+ } else {
+ Accesses->push_back(NewAccess);
+ }
+
+ return NewAccess;
+}
+MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I,
+ MemoryAccess *Definition,
+ MemoryAccess *InsertPt) {
+ assert(I->getParent() == InsertPt->getBlock() &&
+ "New and old access must be in the same block");
+ MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+ auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
+ Accesses->insert(AccessList::iterator(InsertPt), NewAccess);
+ return NewAccess;
+}
+
+MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I,
+ MemoryAccess *Definition,
+ MemoryAccess *InsertPt) {
+ assert(I->getParent() == InsertPt->getBlock() &&
+ "New and old access must be in the same block");
+ MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+ auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
+ Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess);
+ return NewAccess;
+}
+
+/// \brief Helper function to create new memory accesses
+MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
+ // The assume intrinsic has a control dependency which we model by claiming
+ // that it writes arbitrarily. Ignore that fake memory dependency here.
+ // FIXME: Replace this special casing with a more accurate modelling of
+ // assume's control dependency.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ return nullptr;
+
+ // Find out what affect this instruction has on memory.
+ ModRefInfo ModRef = AA->getModRefInfo(I);
+ bool Def = bool(ModRef & MRI_Mod);
+ bool Use = bool(ModRef & MRI_Ref);
+
+ // It's possible for an instruction to not modify memory at all. During
+ // construction, we ignore them.
+ if (!Def && !Use)
+ return nullptr;
+
+ assert((Def || Use) &&
+ "Trying to create a memory access with a non-memory instruction");
+
+ MemoryUseOrDef *MUD;
+ if (Def)
+ MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
+ else
+ MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
+ ValueToMemoryAccess.insert(std::make_pair(I, MUD));
+ return MUD;
+}
+
+MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock,
+ enum InsertionPlace Where) {
+ // Handle the initial case
+ if (Where == Beginning)
+ // The only thing that could define us at the beginning is a phi node
+ if (MemoryPhi *Phi = getMemoryAccess(UseBlock))
+ return Phi;
+
+ DomTreeNode *CurrNode = DT->getNode(UseBlock);
+ // Need to be defined by our dominator
+ if (Where == Beginning)
+ CurrNode = CurrNode->getIDom();
+ Where = End;
+ while (CurrNode) {
+ auto It = PerBlockAccesses.find(CurrNode->getBlock());
+ if (It != PerBlockAccesses.end()) {
+ auto &Accesses = It->second;
+ for (MemoryAccess &RA : reverse(*Accesses)) {
+ if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA))
+ return &RA;
+ }
+ }
+ CurrNode = CurrNode->getIDom();
+ }
+ return LiveOnEntryDef.get();
+}
+
+/// \brief Returns true if \p Replacer dominates \p Replacee .
+bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
+ const MemoryAccess *Replacee) const {
+ if (isa<MemoryUseOrDef>(Replacee))
+ return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
+ const auto *MP = cast<MemoryPhi>(Replacee);
+ // For a phi node, the use occurs in the predecessor block of the phi node.
+ // Since we may occur multiple times in the phi node, we have to check each
+ // operand to ensure Replacer dominates each operand where Replacee occurs.
+ for (const Use &Arg : MP->operands()) {
+ if (Arg.get() != Replacee &&
+ !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
+ return false;
+ }
+ return true;
+}
+
+/// \brief If all arguments of a MemoryPHI are defined by the same incoming
+/// argument, return that argument.
+static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
+ MemoryAccess *MA = nullptr;
+
+ for (auto &Arg : MP->operands()) {
+ if (!MA)
+ MA = cast<MemoryAccess>(Arg);
+ else if (MA != Arg)
+ return nullptr;
+ }
+ return MA;
+}
+
+/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
+///
+/// Because of the way the intrusive list and use lists work, it is important to
+/// do removal in the right order.
+void MemorySSA::removeFromLookups(MemoryAccess *MA) {
+ assert(MA->use_empty() &&
+ "Trying to remove memory access that still has uses");
+ if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->setDefiningAccess(nullptr);
+ // Invalidate our walker's cache if necessary
+ if (!isa<MemoryUse>(MA))
+ Walker->invalidateInfo(MA);
+ // The call below to erase will destroy MA, so we can't change the order we
+ // are doing things here
+ Value *MemoryInst;
+ if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+ MemoryInst = MUD->getMemoryInst();
+ } else {
+ MemoryInst = MA->getBlock();
+ }
+ ValueToMemoryAccess.erase(MemoryInst);
+
+ auto AccessIt = PerBlockAccesses.find(MA->getBlock());
+ std::unique_ptr<AccessList> &Accesses = AccessIt->second;
+ Accesses->erase(MA);
+ if (Accesses->empty())
+ PerBlockAccesses.erase(AccessIt);
+}
+
+void MemorySSA::removeMemoryAccess(MemoryAccess *MA) {
+ assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def");
+ // We can only delete phi nodes if they have no uses, or we can replace all
+ // uses with a single definition.
+ MemoryAccess *NewDefTarget = nullptr;
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
+ // Note that it is sufficient to know that all edges of the phi node have
+ // the same argument. If they do, by the definition of dominance frontiers
+ // (which we used to place this phi), that argument must dominate this phi,
+ // and thus, must dominate the phi's uses, and so we will not hit the assert
+ // below.
+ NewDefTarget = onlySingleValue(MP);
+ assert((NewDefTarget || MP->use_empty()) &&
+ "We can't delete this memory phi");
+ } else {
+ NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
+ }
+
+ // Re-point the uses at our defining access
+ if (!MA->use_empty())
+ MA->replaceAllUsesWith(NewDefTarget);
+
+ // The call below to erase will destroy MA, so we can't change the order we
+ // are doing things here
+ removeFromLookups(MA);
+}
+
+void MemorySSA::print(raw_ostream &OS) const {
+ MemorySSAAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+}
+
+void MemorySSA::dump() const {
+ MemorySSAAnnotatedWriter Writer(this);
+ F.print(dbgs(), &Writer);
+}
+
+void MemorySSA::verifyMemorySSA() const {
+ verifyDefUses(F);
+ verifyDomination(F);
+ verifyOrdering(F);
+}
+
+/// \brief Verify that the order and existence of MemoryAccesses matches the
+/// order and existence of memory affecting instructions.
+void MemorySSA::verifyOrdering(Function &F) const {
+ // Walk all the blocks, comparing what the lookups think and what the access
+ // lists think, as well as the order in the blocks vs the order in the access
+ // lists.
+ SmallVector<MemoryAccess *, 32> ActualAccesses;
+ for (BasicBlock &B : F) {
+ const AccessList *AL = getBlockAccesses(&B);
+ MemoryAccess *Phi = getMemoryAccess(&B);
+ if (Phi)
+ ActualAccesses.push_back(Phi);
+ for (Instruction &I : B) {
+ MemoryAccess *MA = getMemoryAccess(&I);
+ assert((!MA || AL) && "We have memory affecting instructions "
+ "in this block but they are not in the "
+ "access list");
+ if (MA)
+ ActualAccesses.push_back(MA);
+ }
+ // Either we hit the assert, really have no accesses, or we have both
+ // accesses and an access list
+ if (!AL)
+ continue;
+ assert(AL->size() == ActualAccesses.size() &&
+ "We don't have the same number of accesses in the block as on the "
+ "access list");
+ auto ALI = AL->begin();
+ auto AAI = ActualAccesses.begin();
+ while (ALI != AL->end() && AAI != ActualAccesses.end()) {
+ assert(&*ALI == *AAI && "Not the same accesses in the same order");
+ ++ALI;
+ ++AAI;
+ }
+ ActualAccesses.clear();
+ }
+}
+
+/// \brief Verify the domination properties of MemorySSA by checking that each
+/// definition dominates all of its uses.
+void MemorySSA::verifyDomination(Function &F) const {
+ for (BasicBlock &B : F) {
+ // Phi nodes are attached to basic blocks
+ if (MemoryPhi *MP = getMemoryAccess(&B)) {
+ for (User *U : MP->users()) {
+ BasicBlock *UseBlock;
+ // Phi operands are used on edges, we simulate the right domination by
+ // acting as if the use occurred at the end of the predecessor block.
+ if (MemoryPhi *P = dyn_cast<MemoryPhi>(U)) {
+ for (const auto &Arg : P->operands()) {
+ if (Arg == MP) {
+ UseBlock = P->getIncomingBlock(Arg);
+ break;
+ }
+ }
+ } else {
+ UseBlock = cast<MemoryAccess>(U)->getBlock();
+ }
+ (void)UseBlock;
+ assert(DT->dominates(MP->getBlock(), UseBlock) &&
+ "Memory PHI does not dominate it's uses");
+ }
+ }
+
+ for (Instruction &I : B) {
+ MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
+ if (!MD)
+ continue;
+
+ for (User *U : MD->users()) {
+ BasicBlock *UseBlock;
+ (void)UseBlock;
+ // Things are allowed to flow to phi nodes over their predecessor edge.
+ if (auto *P = dyn_cast<MemoryPhi>(U)) {
+ for (const auto &Arg : P->operands()) {
+ if (Arg == MD) {
+ UseBlock = P->getIncomingBlock(Arg);
+ break;
+ }
+ }
+ } else {
+ UseBlock = cast<MemoryAccess>(U)->getBlock();
+ }
+ assert(DT->dominates(MD->getBlock(), UseBlock) &&
+ "Memory Def does not dominate it's uses");
+ }
+ }
+ }
+}
+
+/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
+/// appears in the use list of \p Def.
+///
+/// llvm_unreachable is used instead of asserts because this may be called in
+/// a build without asserts. In that case, we don't want this to turn into a
+/// nop.
+void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
+ // The live on entry use may cause us to get a NULL def here
+ if (!Def) {
+ if (!isLiveOnEntryDef(Use))
+ llvm_unreachable("Null def but use not point to live on entry def");
+ } else if (std::find(Def->user_begin(), Def->user_end(), Use) ==
+ Def->user_end()) {
+ llvm_unreachable("Did not find use in def's use list");
+ }
+}
+
+/// \brief Verify the immediate use information, by walking all the memory
+/// accesses and verifying that, for each use, it appears in the
+/// appropriate def's use list
+void MemorySSA::verifyDefUses(Function &F) const {
+ for (BasicBlock &B : F) {
+ // Phi nodes are attached to basic blocks
+ if (MemoryPhi *Phi = getMemoryAccess(&B)) {
+ assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+ pred_begin(&B), pred_end(&B))) &&
+ "Incomplete MemoryPhi Node");
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+ }
+
+ for (Instruction &I : B) {
+ if (MemoryAccess *MA = getMemoryAccess(&I)) {
+ assert(isa<MemoryUseOrDef>(MA) &&
+ "Found a phi node not attached to a bb");
+ verifyUseInDefs(cast<MemoryUseOrDef>(MA)->getDefiningAccess(), MA);
+ }
+ }
+ }
+}
+
+MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const {
+ return ValueToMemoryAccess.lookup(I);
+}
+
+MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
+ return cast_or_null<MemoryPhi>(getMemoryAccess((const Value *)BB));
+}
+
+/// \brief Determine, for two memory accesses in the same block,
+/// whether \p Dominator dominates \p Dominatee.
+/// \returns True if \p Dominator dominates \p Dominatee.
+bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
+ const MemoryAccess *Dominatee) const {
+
+ assert((Dominator->getBlock() == Dominatee->getBlock()) &&
+ "Asking for local domination when accesses are in different blocks!");
+
+ // A node dominates itself.
+ if (Dominatee == Dominator)
+ return true;
+
+ // When Dominatee is defined on function entry, it is not dominated by another
+ // memory access.
+ if (isLiveOnEntryDef(Dominatee))
+ return false;
+
+ // When Dominator is defined on function entry, it dominates the other memory
+ // access.
+ if (isLiveOnEntryDef(Dominator))
+ return true;
+
+ // Get the access list for the block
+ const AccessList *AccessList = getBlockAccesses(Dominator->getBlock());
+ AccessList::const_reverse_iterator It(Dominator->getIterator());
+
+ // If we hit the beginning of the access list before we hit dominatee, we must
+ // dominate it
+ return std::none_of(It, AccessList->rend(),
+ [&](const MemoryAccess &MA) { return &MA == Dominatee; });
+}
+
+const static char LiveOnEntryStr[] = "liveOnEntry";
+
+void MemoryDef::print(raw_ostream &OS) const {
+ MemoryAccess *UO = getDefiningAccess();
+
+ OS << getID() << " = MemoryDef(";
+ if (UO && UO->getID())
+ OS << UO->getID();
+ else
+ OS << LiveOnEntryStr;
+ OS << ')';
+}
+
+void MemoryPhi::print(raw_ostream &OS) const {
+ bool First = true;
+ OS << getID() << " = MemoryPhi(";
+ for (const auto &Op : operands()) {
+ BasicBlock *BB = getIncomingBlock(Op);
+ MemoryAccess *MA = cast<MemoryAccess>(Op);
+ if (!First)
+ OS << ',';
+ else
+ First = false;
+
+ OS << '{';
+ if (BB->hasName())
+ OS << BB->getName();
+ else
+ BB->printAsOperand(OS, false);
+ OS << ',';
+ if (unsigned ID = MA->getID())
+ OS << ID;
+ else
+ OS << LiveOnEntryStr;
+ OS << '}';
+ }
+ OS << ')';
+}
+
+MemoryAccess::~MemoryAccess() {}
+
+void MemoryUse::print(raw_ostream &OS) const {
+ MemoryAccess *UO = getDefiningAccess();
+ OS << "MemoryUse(";
+ if (UO && UO->getID())
+ OS << UO->getID();
+ else
+ OS << LiveOnEntryStr;
+ OS << ')';
+}
+
+void MemoryAccess::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
+char MemorySSAPrinterLegacyPass::ID = 0;
+
+MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
+ initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
+}
+
+void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+}
+
+bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
+ auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSA.print(dbgs());
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
+ return false;
+}
+
+char MemorySSAAnalysis::PassID;
+
+MemorySSA MemorySSAAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ return MemorySSA(F, &AA, &DT);
+}
+
+PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "MemorySSA for function: " << F.getName() << "\n";
+ AM.getResult<MemorySSAAnalysis>(F).print(OS);
+
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ AM.getResult<MemorySSAAnalysis>(F).verifyMemorySSA();
+
+ return PreservedAnalyses::all();
+}
+
+char MemorySSAWrapperPass::ID = 0;
+
+MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {
+ initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
+
+void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<AAResultsWrapperPass>();
+}
+
+bool MemorySSAWrapperPass::runOnFunction(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MSSA.reset(new MemorySSA(F, &AA, &DT));
+ return false;
+}
+
+void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
+
+void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
+ MSSA->print(OS);
+}
+
+MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
+
+MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
+ DominatorTree *D)
+ : MemorySSAWalker(M), AA(A), DT(D) {}
+
+MemorySSA::CachingWalker::~CachingWalker() {}
+
+struct MemorySSA::CachingWalker::UpwardsMemoryQuery {
+ // True if we saw a phi whose predecessor was a backedge
+ bool SawBackedgePhi;
+ // True if our original query started off as a call
+ bool IsCall;
+ // The pointer location we started the query with. This will be empty if
+ // IsCall is true.
+ MemoryLocation StartingLoc;
+ // This is the instruction we were querying about.
+ const Instruction *Inst;
+ // Set of visited Instructions for this query.
+ DenseSet<MemoryAccessPair> Visited;
+ // Vector of visited call accesses for this query. This is separated out
+ // because you can always cache and lookup the result of call queries (IE when
+ // IsCall == true) for every call in the chain. The calls have no AA location
+ // associated with them with them, and thus, no context dependence.
+ SmallVector<const MemoryAccess *, 32> VisitedCalls;
+ // The MemoryAccess we actually got called with, used to test local domination
+ const MemoryAccess *OriginalAccess;
+
+ UpwardsMemoryQuery()
+ : SawBackedgePhi(false), IsCall(false), Inst(nullptr),
+ OriginalAccess(nullptr) {}
+
+ UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
+ : SawBackedgePhi(false), IsCall(ImmutableCallSite(Inst)), Inst(Inst),
+ OriginalAccess(Access) {}
+};
+
+void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
+
+ // TODO: We can do much better cache invalidation with differently stored
+ // caches. For now, for MemoryUses, we simply remove them
+ // from the cache, and kill the entire call/non-call cache for everything
+ // else. The problem is for phis or defs, currently we'd need to follow use
+ // chains down and invalidate anything below us in the chain that currently
+ // terminates at this access.
+
+ // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse
+ // is by definition never a barrier, so nothing in the cache could point to
+ // this use. In that case, we only need invalidate the info for the use
+ // itself.
+
+ if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) {
+ UpwardsMemoryQuery Q;
+ Instruction *I = MU->getMemoryInst();
+ Q.IsCall = bool(ImmutableCallSite(I));
+ Q.Inst = I;
+ if (!Q.IsCall)
+ Q.StartingLoc = MemoryLocation::get(I);
+ doCacheRemove(MA, Q, Q.StartingLoc);
+ } else {
+ // If it is not a use, the best we can do right now is destroy the cache.
+ CachedUpwardsClobberingCall.clear();
+ CachedUpwardsClobberingAccess.clear();
+ }
+
+#ifdef EXPENSIVE_CHECKS
+ // Run this only when expensive checks are enabled.
+ verifyRemoved(MA);
+#endif
+}
+
+void MemorySSA::CachingWalker::doCacheRemove(const MemoryAccess *M,
+ const UpwardsMemoryQuery &Q,
+ const MemoryLocation &Loc) {
+ if (Q.IsCall)
+ CachedUpwardsClobberingCall.erase(M);
+ else
+ CachedUpwardsClobberingAccess.erase({M, Loc});
+}
+
+void MemorySSA::CachingWalker::doCacheInsert(const MemoryAccess *M,
+ MemoryAccess *Result,
+ const UpwardsMemoryQuery &Q,
+ const MemoryLocation &Loc) {
+ // This is fine for Phis, since there are times where we can't optimize them.
+ // Making a def its own clobber is never correct, though.
+ assert((Result != M || isa<MemoryPhi>(M)) &&
+ "Something can't clobber itself!");
+ ++NumClobberCacheInserts;
+ if (Q.IsCall)
+ CachedUpwardsClobberingCall[M] = Result;
+ else
+ CachedUpwardsClobberingAccess[{M, Loc}] = Result;
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::doCacheLookup(const MemoryAccess *M,
+ const UpwardsMemoryQuery &Q,
+ const MemoryLocation &Loc) {
+ ++NumClobberCacheLookups;
+ MemoryAccess *Result;
+
+ if (Q.IsCall)
+ Result = CachedUpwardsClobberingCall.lookup(M);
+ else
+ Result = CachedUpwardsClobberingAccess.lookup({M, Loc});
+
+ if (Result)
+ ++NumClobberCacheHits;
+ return Result;
+}
+
+bool MemorySSA::CachingWalker::instructionClobbersQuery(
+ const MemoryDef *MD, UpwardsMemoryQuery &Q,
+ const MemoryLocation &Loc) const {
+ Instruction *DefMemoryInst = MD->getMemoryInst();
+ assert(DefMemoryInst && "Defining instruction not actually an instruction");
+
+ if (!Q.IsCall)
+ return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod;
+
+ // If this is a call, mark it for caching
+ if (ImmutableCallSite(DefMemoryInst))
+ Q.VisitedCalls.push_back(MD);
+ ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst));
+ return I != MRI_NoModRef;
+}
+
+MemoryAccessPair MemorySSA::CachingWalker::UpwardsDFSWalk(
+ MemoryAccess *StartingAccess, const MemoryLocation &Loc,
+ UpwardsMemoryQuery &Q, bool FollowingBackedge) {
+ MemoryAccess *ModifyingAccess = nullptr;
+
+ auto DFI = df_begin(StartingAccess);
+ for (auto DFE = df_end(StartingAccess); DFI != DFE;) {
+ MemoryAccess *CurrAccess = *DFI;
+ if (MSSA->isLiveOnEntryDef(CurrAccess))
+ return {CurrAccess, Loc};
+ // If this is a MemoryDef, check whether it clobbers our current query. This
+ // needs to be done before consulting the cache, because the cache reports
+ // the clobber for CurrAccess. If CurrAccess is a clobber for this query,
+ // and we ask the cache for information first, then we might skip this
+ // clobber, which is bad.
+ if (auto *MD = dyn_cast<MemoryDef>(CurrAccess)) {
+ // If we hit the top, stop following this path.
+ // While we can do lookups, we can't sanely do inserts here unless we were
+ // to track everything we saw along the way, since we don't know where we
+ // will stop.
+ if (instructionClobbersQuery(MD, Q, Loc)) {
+ ModifyingAccess = CurrAccess;
+ break;
+ }
+ }
+ if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc))
+ return {CacheResult, Loc};
+
+ // We need to know whether it is a phi so we can track backedges.
+ // Otherwise, walk all upward defs.
+ if (!isa<MemoryPhi>(CurrAccess)) {
+ ++DFI;
+ continue;
+ }
+
+#ifndef NDEBUG
+ // The loop below visits the phi's children for us. Because phis are the
+ // only things with multiple edges, skipping the children should always lead
+ // us to the end of the loop.
+ //
+ // Use a copy of DFI because skipChildren would kill our search stack, which
+ // would make caching anything on the way back impossible.
+ auto DFICopy = DFI;
+ assert(DFICopy.skipChildren() == DFE &&
+ "Skipping phi's children doesn't end the DFS?");
+#endif
+
+ const MemoryAccessPair PHIPair(CurrAccess, Loc);
+
+ // Don't try to optimize this phi again if we've already tried to do so.
+ if (!Q.Visited.insert(PHIPair).second) {
+ ModifyingAccess = CurrAccess;
+ break;
+ }
+
+ std::size_t InitialVisitedCallSize = Q.VisitedCalls.size();
+
+ // Recurse on PHI nodes, since we need to change locations.
+ // TODO: Allow graphtraits on pairs, which would turn this whole function
+ // into a normal single depth first walk.
+ MemoryAccess *FirstDef = nullptr;
+ for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end();
+ MPI != MPE; ++MPI) {
+ bool Backedge =
+ !FollowingBackedge &&
+ DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock());
+
+ MemoryAccessPair CurrentPair =
+ UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge);
+ // All the phi arguments should reach the same point if we can bypass
+ // this phi. The alternative is that they hit this phi node, which
+ // means we can skip this argument.
+ if (FirstDef && CurrentPair.first != PHIPair.first &&
+ CurrentPair.first != FirstDef) {
+ ModifyingAccess = CurrAccess;
+ break;
+ }
+
+ if (!FirstDef)
+ FirstDef = CurrentPair.first;
+ }
+
+ // If we exited the loop early, go with the result it gave us.
+ if (!ModifyingAccess) {
+ assert(FirstDef && "Found a Phi with no upward defs?");
+ ModifyingAccess = FirstDef;
+ } else {
+ // If we can't optimize this Phi, then we can't safely cache any of the
+ // calls we visited when trying to optimize it. Wipe them out now.
+ Q.VisitedCalls.resize(InitialVisitedCallSize);
+ }
+ break;
+ }
+
+ if (!ModifyingAccess)
+ return {MSSA->getLiveOnEntryDef(), Q.StartingLoc};
+
+ const BasicBlock *OriginalBlock = StartingAccess->getBlock();
+ assert(DFI.getPathLength() > 0 && "We dropped our path?");
+ unsigned N = DFI.getPathLength();
+ // If we found a clobbering def, the last element in the path will be our
+ // clobber, so we don't want to cache that to itself. OTOH, if we optimized a
+ // phi, we can add the last thing in the path to the cache, since that won't
+ // be the result.
+ if (DFI.getPath(N - 1) == ModifyingAccess)
+ --N;
+ for (; N > 1; --N) {
+ MemoryAccess *CacheAccess = DFI.getPath(N - 1);
+ BasicBlock *CurrBlock = CacheAccess->getBlock();
+ if (!FollowingBackedge)
+ doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
+ if (DT->dominates(CurrBlock, OriginalBlock) &&
+ (CurrBlock != OriginalBlock || !FollowingBackedge ||
+ MSSA->locallyDominates(CacheAccess, StartingAccess)))
+ break;
+ }
+
+ // Cache everything else on the way back. The caller should cache
+ // StartingAccess for us.
+ for (; N > 1; --N) {
+ MemoryAccess *CacheAccess = DFI.getPath(N - 1);
+ doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
+ }
+
+ return {ModifyingAccess, Loc};
+}
+
+/// \brief Walk the use-def chains starting at \p MA and find
+/// the MemoryAccess that actually clobbers Loc.
+///
+/// \returns our clobbering memory access
+MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+ MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
+ return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first;
+}
+
+MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+ MemoryAccess *StartingAccess, MemoryLocation &Loc) {
+ if (isa<MemoryPhi>(StartingAccess))
+ return StartingAccess;
+
+ auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess);
+ if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
+ return StartingUseOrDef;
+
+ Instruction *I = StartingUseOrDef->getMemoryInst();
+
+ // Conservatively, fences are always clobbers, so don't perform the walk if we
+ // hit a fence.
+ if (!ImmutableCallSite(I) && I->isFenceLike())
+ return StartingUseOrDef;
+
+ UpwardsMemoryQuery Q;
+ Q.OriginalAccess = StartingUseOrDef;
+ Q.StartingLoc = Loc;
+ Q.Inst = StartingUseOrDef->getMemoryInst();
+ Q.IsCall = false;
+
+ if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc))
+ return CacheResult;
+
+ // Unlike the other function, do not walk to the def of a def, because we are
+ // handed something we already believe is the clobbering access.
+ MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
+ ? StartingUseOrDef->getDefiningAccess()
+ : StartingUseOrDef;
+
+ MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
+ // Only cache this if it wouldn't make Clobber point to itself.
+ if (Clobber != StartingAccess)
+ doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc);
+ DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+ DEBUG(dbgs() << *StartingUseOrDef << "\n");
+ DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+ DEBUG(dbgs() << *Clobber << "\n");
+ return Clobber;
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(const Instruction *I) {
+ // There should be no way to lookup an instruction and get a phi as the
+ // access, since we only map BB's to PHI's. So, this must be a use or def.
+ auto *StartingAccess = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(I));
+
+ bool IsCall = bool(ImmutableCallSite(I));
+
+ // We can't sanely do anything with a fences, they conservatively
+ // clobber all memory, and have no locations to get pointers from to
+ // try to disambiguate.
+ if (!IsCall && I->isFenceLike())
+ return StartingAccess;
+
+ UpwardsMemoryQuery Q;
+ Q.OriginalAccess = StartingAccess;
+ Q.IsCall = IsCall;
+ if (!Q.IsCall)
+ Q.StartingLoc = MemoryLocation::get(I);
+ Q.Inst = I;
+ if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc))
+ return CacheResult;
+
+ // Start with the thing we already think clobbers this location
+ MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+
+ // At this point, DefiningAccess may be the live on entry def.
+ // If it is, we will not get a better result.
+ if (MSSA->isLiveOnEntryDef(DefiningAccess))
+ return DefiningAccess;
+
+ MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
+ // DFS won't cache a result for DefiningAccess. So, if DefiningAccess isn't
+ // our clobber, be sure that it gets a cache entry, too.
+ if (Result != DefiningAccess)
+ doCacheInsert(DefiningAccess, Result, Q, Q.StartingLoc);
+ doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc);
+ // TODO: When this implementation is more mature, we may want to figure out
+ // what this additional caching buys us. It's most likely A Good Thing.
+ if (Q.IsCall)
+ for (const MemoryAccess *MA : Q.VisitedCalls)
+ if (MA != Result)
+ doCacheInsert(MA, Result, Q, Q.StartingLoc);
+
+ DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+ DEBUG(dbgs() << *DefiningAccess << "\n");
+ DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+ DEBUG(dbgs() << *Result << "\n");
+
+ return Result;
+}
+
+// Verify that MA doesn't exist in any of the caches.
+void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) {
+#ifndef NDEBUG
+ for (auto &P : CachedUpwardsClobberingAccess)
+ assert(P.first.first != MA && P.second != MA &&
+ "Found removed MemoryAccess in cache.");
+ for (auto &P : CachedUpwardsClobberingCall)
+ assert(P.first != MA && P.second != MA &&
+ "Found removed MemoryAccess in cache.");
+#endif // !NDEBUG
+}
+
+MemoryAccess *
+DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) {
+ MemoryAccess *MA = MSSA->getMemoryAccess(I);
+ if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
+ return Use->getDefiningAccess();
+ return MA;
+}
+
+MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
+ MemoryAccess *StartingAccess, MemoryLocation &) {
+ if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
+ return Use->getDefiningAccess();
+ return StartingAccess;
+}
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 9ec28a3f3d47c..eb91885186244 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -21,8 +20,8 @@
using namespace llvm;
-static void appendToGlobalArray(const char *Array,
- Module &M, Function *F, int Priority) {
+static void appendToGlobalArray(const char *Array, Module &M, Function *F,
+ int Priority, Constant *Data) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
@@ -31,15 +30,26 @@ static void appendToGlobalArray(const char *Array,
SmallVector<Constant *, 16> CurrentCtors;
StructType *EltTy;
if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
- // If there is a global_ctors array, use the existing struct type, which can
- // have 2 or 3 fields.
- ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType());
- EltTy = cast<StructType>(ATy->getElementType());
+ ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
+ StructType *OldEltTy = cast<StructType>(ATy->getElementType());
+ // Upgrade a 2-field global array type to the new 3-field format if needed.
+ if (Data && OldEltTy->getNumElements() < 3)
+ EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+ IRB.getInt8PtrTy(), nullptr);
+ else
+ EltTy = OldEltTy;
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
- for (unsigned i = 0; i != n; ++i)
- CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ for (unsigned i = 0; i != n; ++i) {
+ auto Ctor = cast<Constant>(Init->getOperand(i));
+ if (EltTy != OldEltTy)
+ Ctor = ConstantStruct::get(
+ EltTy, Ctor->getAggregateElement((unsigned)0),
+ Ctor->getAggregateElement(1),
+ Constant::getNullValue(IRB.getInt8PtrTy()), nullptr);
+ CurrentCtors.push_back(Ctor);
+ }
}
GVCtor->eraseFromParent();
} else {
@@ -54,7 +64,8 @@ static void appendToGlobalArray(const char *Array,
CSVals[1] = F;
// FIXME: Drop support for the two element form in LLVM 4.0.
if (EltTy->getNumElements() >= 3)
- CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy());
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+ : Constant::getNullValue(IRB.getInt8PtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
@@ -70,29 +81,12 @@ static void appendToGlobalArray(const char *Array,
GlobalValue::AppendingLinkage, NewInit, Array);
}
-void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
- appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
}
-void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
- appendToGlobalArray("llvm.global_dtors", M, F, Priority);
-}
-
-GlobalVariable *
-llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
- bool CompilerUsed) {
- const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
- GlobalVariable *GV = M.getGlobalVariable(Name);
- if (!GV || !GV->hasInitializer())
- return GV;
-
- const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
- for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
- Value *Op = Init->getOperand(I);
- GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
- Set.insert(G);
- }
- return GV;
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
}
Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
@@ -132,4 +126,3 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
}
return std::make_pair(Ctor, InitFunction);
}
-
diff --git a/lib/Transforms/Utils/NameAnonFunctions.cpp b/lib/Transforms/Utils/NameAnonFunctions.cpp
new file mode 100644
index 0000000000000..c4f3839d8482a
--- /dev/null
+++ b/lib/Transforms/Utils/NameAnonFunctions.cpp
@@ -0,0 +1,102 @@
+//===- NameAnonFunctions.cpp - ThinLTO Summary-based Function Import ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements naming anonymous function to make sure they can be
+// refered to by ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+// Compute a "unique" hash for the module based on the name of the public
+// functions.
+class ModuleHasher {
+ Module &TheModule;
+ std::string TheHash;
+
+public:
+ ModuleHasher(Module &M) : TheModule(M) {}
+
+ /// Return the lazily computed hash.
+ std::string &get() {
+ if (!TheHash.empty())
+ // Cache hit :)
+ return TheHash;
+
+ MD5 Hasher;
+ for (auto &F : TheModule) {
+ if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName())
+ continue;
+ auto Name = F.getName();
+ Hasher.update(Name);
+ }
+ for (auto &GV : TheModule.globals()) {
+ if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName())
+ continue;
+ auto Name = GV.getName();
+ Hasher.update(Name);
+ }
+
+ // Now return the result.
+ MD5::MD5Result Hash;
+ Hasher.final(Hash);
+ SmallString<32> Result;
+ MD5::stringifyResult(Hash, Result);
+ TheHash = Result.str();
+ return TheHash;
+ }
+};
+
+// Rename all the anon functions in the module
+bool llvm::nameUnamedFunctions(Module &M) {
+ bool Changed = false;
+ ModuleHasher ModuleHash(M);
+ int count = 0;
+ for (auto &F : M) {
+ if (F.hasName())
+ continue;
+ F.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
+ Changed = true;
+ }
+ return Changed;
+}
+
+namespace {
+
+// Simple pass that provides a name to every anon function.
+class NameAnonFunction : public ModulePass {
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ const char *getPassName() const override { return "Name Anon Functions"; }
+
+ explicit NameAnonFunction() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override { return nameUnamedFunctions(M); }
+};
+char NameAnonFunction::ID = 0;
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(NameAnonFunction, "name-anon-functions",
+ "Provide a name to nameless functions", false, false)
+INITIALIZE_PASS_END(NameAnonFunction, "name-anon-functions",
+ "Provide a name to nameless functions", false, false)
+
+namespace llvm {
+ModulePass *createNameAnonFunctionPass() { return new NameAnonFunction(); }
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index c4f9b9f614078..cbf385d563399 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -523,7 +523,7 @@ void PromoteMem2Reg::run() {
AllocaInfo Info;
LargeBlockInfo LBI;
- IDFCalculator IDF(DT);
+ ForwardIDFCalculator IDF(DT);
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
@@ -802,7 +802,8 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
// actually live-in here.
LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
LiveInBlockWorklist.pop_back();
- --i, --e;
+ --i;
+ --e;
break;
}
diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp
new file mode 100644
index 0000000000000..9afd175c10ed5
--- /dev/null
+++ b/lib/Transforms/Utils/SanitizerStats.cpp
@@ -0,0 +1,108 @@
+//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements code generation for sanitizer statistics gathering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
+ StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
+ EmptyModuleStatsTy = makeModuleStatsTy();
+
+ ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
+ GlobalValue::InternalLinkage, nullptr);
+}
+
+ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
+ return ArrayType::get(StatTy, Inits.size());
+}
+
+StructType *SanitizerStatReport::makeModuleStatsTy() {
+ return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
+ Type::getInt32Ty(M->getContext()),
+ makeModuleStatsArrayTy()});
+}
+
+void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
+ Function *F = B.GetInsertBlock()->getParent();
+ Module *M = F->getParent();
+ PointerType *Int8PtrTy = B.getInt8PtrTy();
+ IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
+ ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
+
+ Inits.push_back(ConstantArray::get(
+ StatTy,
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
+ kSanitizerStatKindBits)),
+ Int8PtrTy)}));
+
+ FunctionType *StatReportTy =
+ FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
+ Constant *StatReport = M->getOrInsertFunction(
+ "__sanitizer_stat_report", StatReportTy);
+
+ auto InitAddr = ConstantExpr::getGetElementPtr(
+ EmptyModuleStatsTy, ModuleStatsGV,
+ ArrayRef<Constant *>{
+ ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
+ ConstantInt::get(IntPtrTy, Inits.size() - 1),
+ });
+ B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
+}
+
+void SanitizerStatReport::finish() {
+ if (Inits.empty()) {
+ ModuleStatsGV->eraseFromParent();
+ return;
+ }
+
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+
+ // Create a new ModuleStatsGV to replace the old one. We can't just set the
+ // old one's initializer because its type is different.
+ auto NewModuleStatsGV = new GlobalVariable(
+ *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage,
+ ConstantStruct::getAnon(
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantInt::get(Int32Ty, Inits.size()),
+ ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
+ ModuleStatsGV->replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
+ ModuleStatsGV->eraseFromParent();
+
+ // Create a global constructor to register NewModuleStatsGV.
+ auto F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage, "", M);
+ auto BB = BasicBlock::Create(M->getContext(), "", F);
+ IRBuilder<> B(BB);
+
+ FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
+ Constant *StatInit = M->getOrInsertFunction(
+ "__sanitizer_stat_init", StatInitTy);
+
+ B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
+ B.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e484b690597e9..0504646c304ef 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -45,6 +44,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <map>
@@ -58,17 +58,18 @@ using namespace PatternMatch;
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
// To catch this, we need to fold a compare and a select, hence '2' being the
// minimum reasonable default.
-static cl::opt<unsigned>
-PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2),
- cl::desc("Control the amount of phi node folding to perform (default = 2)"));
+static cl::opt<unsigned> PHINodeFoldingThreshold(
+ "phi-node-folding-threshold", cl::Hidden, cl::init(2),
+ cl::desc(
+ "Control the amount of phi node folding to perform (default = 2)"));
-static cl::opt<bool>
-DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
- cl::desc("Duplicate return instructions into unconditional branches"));
+static cl::opt<bool> DupRet(
+ "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
static cl::opt<bool>
-SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
- cl::desc("Sink common instructions down to the end block"));
+ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
static cl::opt<bool> HoistCondStores(
"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
@@ -96,48 +97,54 @@ static cl::opt<unsigned> MaxSpeculationDepth(
"speculatively executed instructions"));
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
-STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
-STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
-STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
+STATISTIC(NumLinearMaps,
+ "Number of switch instructions turned into linear mapping");
+STATISTIC(NumLookupTables,
+ "Number of switch instructions turned into lookup tables");
+STATISTIC(
+ NumLookupTablesHoles,
+ "Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
-STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSinkCommons,
+ "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
- // The first field contains the value that the switch produces when a certain
- // case group is selected, and the second field is a vector containing the
- // cases composing the case group.
- typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
+// The first field contains the value that the switch produces when a certain
+// case group is selected, and the second field is a vector containing the
+// cases composing the case group.
+typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
SwitchCaseResultVectorTy;
- // The first field contains the phi node that generates a result of the switch
- // and the second field contains the value generated for a certain case in the
- // switch for that PHI.
- typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
+// The first field contains the phi node that generates a result of the switch
+// and the second field contains the value generated for a certain case in the
+// switch for that PHI.
+typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
- /// ValueEqualityComparisonCase - Represents a case of a switch.
- struct ValueEqualityComparisonCase {
- ConstantInt *Value;
- BasicBlock *Dest;
+/// ValueEqualityComparisonCase - Represents a case of a switch.
+struct ValueEqualityComparisonCase {
+ ConstantInt *Value;
+ BasicBlock *Dest;
- ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
+ ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
: Value(Value), Dest(Dest) {}
- bool operator<(ValueEqualityComparisonCase RHS) const {
- // Comparing pointers is ok as we only rely on the order for uniquing.
- return Value < RHS.Value;
- }
+ bool operator<(ValueEqualityComparisonCase RHS) const {
+ // Comparing pointers is ok as we only rely on the order for uniquing.
+ return Value < RHS.Value;
+ }
- bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
- };
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
+};
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
const DataLayout &DL;
unsigned BonusInstThreshold;
AssumptionCache *AC;
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
Value *isValueEqualityComparison(TerminatorInst *TI);
- BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
- std::vector<ValueEqualityComparisonCase> &Cases);
+ BasicBlock *GetValueEqualityComparisonCases(
+ TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
BasicBlock *Pred,
IRBuilder<> &Builder);
@@ -152,13 +159,15 @@ class SimplifyCFGOpt {
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
bool SimplifyIndirectBr(IndirectBrInst *IBI);
- bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
- bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+ bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
- unsigned BonusInstThreshold, AssumptionCache *AC)
- : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
+ unsigned BonusInstThreshold, AssumptionCache *AC,
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
+ : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
+ LoopHeaders(LoopHeaders) {}
bool run(BasicBlock *BB);
};
}
@@ -166,19 +175,19 @@ public:
/// Return true if it is safe to merge these two
/// terminator instructions together.
static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
- if (SI1 == SI2) return false; // Can't merge with self!
+ if (SI1 == SI2)
+ return false; // Can't merge with self!
// It is not safe to merge these two switch instructions if they have a common
// successor, and if that successor has a PHI node, and if *that* PHI node has
// conflicting incoming values from the two switch blocks.
BasicBlock *SI1BB = SI1->getParent();
BasicBlock *SI2BB = SI2->getParent();
- SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
- if (SI1Succs.count(*I))
- for (BasicBlock::iterator BBI = (*I)->begin();
- isa<PHINode>(BBI); ++BBI) {
+ for (BasicBlock *Succ : successors(SI2BB))
+ if (SI1Succs.count(Succ))
+ for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
PHINode *PN = cast<PHINode>(BBI);
if (PN->getIncomingValueForBlock(SI1BB) !=
PN->getIncomingValueForBlock(SI2BB))
@@ -191,11 +200,12 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
/// Return true if it is safe and profitable to merge these two terminator
/// instructions together, where SI1 is an unconditional branch. PhiNodes will
/// store all PHI nodes in common successors.
-static bool isProfitableToFoldUnconditional(BranchInst *SI1,
- BranchInst *SI2,
- Instruction *Cond,
- SmallVectorImpl<PHINode*> &PhiNodes) {
- if (SI1 == SI2) return false; // Can't merge with self!
+static bool
+isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
+ Instruction *Cond,
+ SmallVectorImpl<PHINode *> &PhiNodes) {
+ if (SI1 == SI2)
+ return false; // Can't merge with self!
assert(SI1->isUnconditional() && SI2->isConditional());
// We fold the unconditional branch if we can easily update all PHI nodes in
@@ -204,7 +214,8 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
// 2> We have "Cond" as the incoming value for the unconditional branch;
// 3> SI2->getCondition() and Cond have same operands.
CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
- if (!Ci2) return false;
+ if (!Ci2)
+ return false;
if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
Cond->getOperand(1) == Ci2->getOperand(1)) &&
!(Cond->getOperand(0) == Ci2->getOperand(1) &&
@@ -213,11 +224,10 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
BasicBlock *SI1BB = SI1->getParent();
BasicBlock *SI2BB = SI2->getParent();
- SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
- if (SI1Succs.count(*I))
- for (BasicBlock::iterator BBI = (*I)->begin();
- isa<PHINode>(BBI); ++BBI) {
+ SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ for (BasicBlock *Succ : successors(SI2BB))
+ if (SI1Succs.count(Succ))
+ for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
PHINode *PN = cast<PHINode>(BBI);
if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
!isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
@@ -233,11 +243,11 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
/// of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
- if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+ if (!isa<PHINode>(Succ->begin()))
+ return; // Quick exit if nothing to do
PHINode *PN;
- for (BasicBlock::iterator I = Succ->begin();
- (PN = dyn_cast<PHINode>(I)); ++I)
+ for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
}
@@ -270,7 +280,7 @@ static unsigned ComputeSpeculationCost(const User *I,
/// V plus its non-dominating operands. If that cost is greater than
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSetImpl<Instruction*> *AggressiveInsts,
+ SmallPtrSetImpl<Instruction *> *AggressiveInsts,
unsigned &CostRemaining,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
@@ -294,7 +304,8 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// We don't want to allow weird loops that might have the "if condition" in
// the bottom of this block.
- if (PBB == BB) return false;
+ if (PBB == BB)
+ return false;
// If this instruction is defined in a block that contains an unconditional
// branch to BB, then it must be in the 'conditional' part of the "if
@@ -305,10 +316,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
- if (!AggressiveInsts) return false;
+ if (!AggressiveInsts)
+ return false;
// If we have seen this instruction before, don't count it again.
- if (AggressiveInsts->count(I)) return true;
+ if (AggressiveInsts->count(I))
+ return true;
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
@@ -366,8 +379,8 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
if (CI->getType() == PtrTy)
return CI;
else
- return cast<ConstantInt>
- (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ return cast<ConstantInt>(
+ ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
}
return nullptr;
}
@@ -403,11 +416,11 @@ struct ConstantComparesGatherer {
operator=(const ConstantComparesGatherer &) = delete;
private:
-
/// Try to set the current value used for the comparison, it succeeds only if
/// it wasn't set before or if the new value is the same as the old one
bool setValueOnce(Value *NewVal) {
- if(CompValue && CompValue != NewVal) return false;
+ if (CompValue && CompValue != NewVal)
+ return false;
CompValue = NewVal;
return (CompValue != nullptr);
}
@@ -424,35 +437,99 @@ private:
ICmpInst *ICI;
ConstantInt *C;
if (!((ICI = dyn_cast<ICmpInst>(I)) &&
- (C = GetConstantInt(I->getOperand(1), DL)))) {
+ (C = GetConstantInt(I->getOperand(1), DL)))) {
return false;
}
Value *RHSVal;
- ConstantInt *RHSC;
+ const APInt *RHSC;
// Pattern match a special case
- // (x & ~2^x) == y --> x == y || x == y|2^x
+ // (x & ~2^z) == y --> x == y || x == y|2^z
// This undoes a transformation done by instcombine to fuse 2 compares.
- if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
+
+ // It's a little bit hard to see why the following transformations are
+ // correct. Here is a CVC3 program to verify them for 64-bit values:
+
+ /*
+ ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
+ x : BITVECTOR(64);
+ y : BITVECTOR(64);
+ z : BITVECTOR(64);
+ mask : BITVECTOR(64) = BVSHL(ONE, z);
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+
+ // Please note that each pattern must be a dual implication (<--> or
+ // iff). One directional implication can create spurious matches. If the
+ // implication is only one-way, an unsatisfiable condition on the left
+ // side can imply a satisfiable condition on the right side. Dual
+ // implication ensures that satisfiable conditions are transformed to
+ // other satisfiable conditions and unsatisfiable conditions are
+ // transformed to other unsatisfiable conditions.
+
+ // Here is a concrete example of a unsatisfiable condition on the left
+ // implying a satisfiable condition on the right:
+ //
+ // mask = (1 << z)
+ // (x & ~mask) == y --> (x == y || x == (y | mask))
+ //
+ // Substituting y = 3, z = 0 yields:
+ // (x & -2) == 3 --> (x == 3 || x == 2)
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ */
if (match(ICI->getOperand(0),
- m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
- APInt Not = ~RHSC->getValue();
- if (Not.isPowerOf2()) {
+ m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = ~*RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
// If we already have a value for the switch, it has to match!
- if(!setValueOnce(RHSVal))
+ if (!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(),
+ C->getValue() | Mask));
+ UsedICmps++;
+ return true;
+ }
+ }
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+ if (match(ICI->getOperand(0),
+ m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = *RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(RHSVal))
return false;
Vals.push_back(C);
Vals.push_back(ConstantInt::get(C->getContext(),
- C->getValue() | Not));
+ C->getValue() & ~Mask));
UsedICmps++;
return true;
}
}
// If we already have a value for the switch, it has to match!
- if(!setValueOnce(ICI->getOperand(0)))
+ if (!setValueOnce(ICI->getOperand(0)))
return false;
UsedICmps++;
@@ -467,8 +544,8 @@ private:
// Shift the range if the compare is fed by an add. This is the range
// compare idiom as emitted by instcombine.
Value *CandidateVal = I->getOperand(0);
- if(match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
- Span = Span.subtract(RHSC->getValue());
+ if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
+ Span = Span.subtract(*RHSC);
CandidateVal = RHSVal;
}
@@ -484,7 +561,7 @@ private:
}
// If we already have a value for the switch, it has to match!
- if(!setValueOnce(CandidateVal))
+ if (!setValueOnce(CandidateVal))
return false;
// Add all values from the range to the set
@@ -493,7 +570,6 @@ private:
UsedICmps++;
return true;
-
}
/// Given a potentially 'or'd or 'and'd together collection of icmp
@@ -507,18 +583,22 @@ private:
// Keep a stack (SmallVector for efficiency) for depth-first traversal
SmallVector<Value *, 8> DFT;
+ SmallPtrSet<Value *, 8> Visited;
// Initialize
+ Visited.insert(V);
DFT.push_back(V);
- while(!DFT.empty()) {
+ while (!DFT.empty()) {
V = DFT.pop_back_val();
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If it is a || (or && depending on isEQ), process the operands.
if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
- DFT.push_back(I->getOperand(1));
- DFT.push_back(I->getOperand(0));
+ if (Visited.insert(I->getOperand(1)).second)
+ DFT.push_back(I->getOperand(1));
+ if (Visited.insert(I->getOperand(0)).second)
+ DFT.push_back(I->getOperand(0));
continue;
}
@@ -541,7 +621,6 @@ private:
}
}
};
-
}
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
@@ -556,7 +635,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
}
TI->eraseFromParent();
- if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ if (Cond)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond);
}
/// Return true if the specified terminator checks
@@ -566,8 +646,9 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
- if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
- pred_end(SI->getParent())) <= 128)
+ if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <=
+ 128)
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
@@ -589,46 +670,44 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
/// Given a value comparison instruction,
/// decode all of the 'cases' that it represents and return the 'default' block.
-BasicBlock *SimplifyCFGOpt::
-GetValueEqualityComparisonCases(TerminatorInst *TI,
- std::vector<ValueEqualityComparisonCase>
- &Cases) {
+BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
+ TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
- Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(),
- i.getCaseSuccessor()));
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
+ ++i)
+ Cases.push_back(
+ ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor()));
return SI->getDefaultDest();
}
BranchInst *BI = cast<BranchInst>(TI);
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
- Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
- DL),
- Succ));
+ Cases.push_back(ValueEqualityComparisonCase(
+ GetConstantInt(ICI->getOperand(1), DL), Succ));
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
}
-
/// Given a vector of bb/value pairs, remove any entries
/// in the list that match the specified block.
-static void EliminateBlockCases(BasicBlock *BB,
- std::vector<ValueEqualityComparisonCase> &Cases) {
+static void
+EliminateBlockCases(BasicBlock *BB,
+ std::vector<ValueEqualityComparisonCase> &Cases) {
Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
}
/// Return true if there are any keys in C1 that exist in C2 as well.
-static bool
-ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
- std::vector<ValueEqualityComparisonCase > &C2) {
+static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
+ std::vector<ValueEqualityComparisonCase> &C2) {
std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
// Make V1 be smaller than V2.
if (V1->size() > V2->size())
std::swap(V1, V2);
- if (V1->size() == 0) return false;
+ if (V1->size() == 0)
+ return false;
if (V1->size() == 1) {
// Just scan V2.
ConstantInt *TheVal = (*V1)[0].Value;
@@ -657,30 +736,30 @@ ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
/// also a value comparison with the same value, and if that comparison
/// determines the outcome of this comparison. If so, simplify TI. This does a
/// very limited form of jump threading.
-bool SimplifyCFGOpt::
-SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
- BasicBlock *Pred,
- IRBuilder<> &Builder) {
+bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
+ TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
- if (!PredVal) return false; // Not a value comparison in predecessor.
+ if (!PredVal)
+ return false; // Not a value comparison in predecessor.
Value *ThisVal = isValueEqualityComparison(TI);
assert(ThisVal && "This isn't a value comparison!!");
- if (ThisVal != PredVal) return false; // Different predicates.
+ if (ThisVal != PredVal)
+ return false; // Different predicates.
// TODO: Preserve branch weight metadata, similarly to how
// FoldValueComparisonIntoPredecessors preserves it.
// Find out information about when control will move from Pred to TI's block.
std::vector<ValueEqualityComparisonCase> PredCases;
- BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
- PredCases);
- EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+ BasicBlock *PredDef =
+ GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
// Find information about how control leaves this block.
std::vector<ValueEqualityComparisonCase> ThisCases;
BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
- EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
// If TI's block is the default block from Pred's comparison, potentially
// simplify TI based on this knowledge.
@@ -697,13 +776,14 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
assert(ThisCases.size() == 1 && "Branch can only have one case!");
// Insert the new branch.
Instruction *NI = Builder.CreateBr(ThisDef);
- (void) NI;
+ (void)NI;
// Remove PHI node entries for the dead edge.
ThisCases[0].Dest->removePredecessor(TI->getParent());
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
EraseTerminatorInstAndDCECond(TI);
return true;
@@ -711,7 +791,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
SwitchInst *SI = cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
- SmallPtrSet<Constant*, 16> DeadCases;
+ SmallPtrSet<Constant *, 16> DeadCases;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
DeadCases.insert(PredCases[i].Value);
@@ -732,7 +812,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
--i;
if (DeadCases.count(i.getCaseValue())) {
if (HasWeight) {
- std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ std::swap(Weights[i.getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
i.getCaseSuccessor()->removePredecessor(TI->getParent());
@@ -741,8 +821,8 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
}
if (HasWeight && Weights.size() >= 2)
SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getParent()->getContext()).
- createBranchWeights(Weights));
+ MDBuilder(SI->getParent()->getContext())
+ .createBranchWeights(Weights));
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
@@ -755,7 +835,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == TIBB) {
if (TIV)
- return false; // Cannot handle multiple values coming to this block.
+ return false; // Cannot handle multiple values coming to this block.
TIV = PredCases[i].Value;
}
assert(TIV && "No edge from pred to succ?");
@@ -770,53 +850,53 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
}
// If not handled by any explicit cases, it is handled by the default case.
- if (!TheRealDest) TheRealDest = ThisDef;
+ if (!TheRealDest)
+ TheRealDest = ThisDef;
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
- for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
- if (*SI != CheckEdge)
- (*SI)->removePredecessor(TIBB);
+ for (BasicBlock *Succ : successors(TIBB))
+ if (Succ != CheckEdge)
+ Succ->removePredecessor(TIBB);
else
CheckEdge = nullptr;
// Insert the new branch.
Instruction *NI = Builder.CreateBr(TheRealDest);
- (void) NI;
+ (void)NI;
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
EraseTerminatorInstAndDCECond(TI);
return true;
}
namespace {
- /// This class implements a stable ordering of constant
- /// integers that does not depend on their address. This is important for
- /// applications that sort ConstantInt's to ensure uniqueness.
- struct ConstantIntOrdering {
- bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
- return LHS->getValue().ult(RHS->getValue());
- }
- };
+/// This class implements a stable ordering of constant
+/// integers that does not depend on their address. This is important for
+/// applications that sort ConstantInt's to ensure uniqueness.
+struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+};
}
static int ConstantIntSortPredicate(ConstantInt *const *P1,
ConstantInt *const *P2) {
const ConstantInt *LHS = *P1;
const ConstantInt *RHS = *P2;
- if (LHS->getValue().ult(RHS->getValue()))
- return 1;
- if (LHS->getValue() == RHS->getValue())
+ if (LHS == RHS)
return 0;
- return -1;
+ return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
}
-static inline bool HasBranchWeights(const Instruction* I) {
+static inline bool HasBranchWeights(const Instruction *I) {
MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
if (ProfMD && ProfMD->getOperand(0))
- if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
return MDS->getString().equals("branch_weights");
return false;
@@ -837,7 +917,7 @@ static void GetBranchWeights(TerminatorInst *TI,
// If TI is a conditional eq, the default case is the false case,
// and the corresponding branch-weight data is at index 2. We swap the
// default weight to be the first entry.
- if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
assert(Weights.size() == 2);
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
@@ -862,17 +942,17 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
IRBuilder<> &Builder) {
BasicBlock *BB = TI->getParent();
- Value *CV = isValueEqualityComparison(TI); // CondVal
+ Value *CV = isValueEqualityComparison(TI); // CondVal
assert(CV && "Not a comparison?");
bool Changed = false;
- SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
while (!Preds.empty()) {
BasicBlock *Pred = Preds.pop_back_val();
// See if the predecessor is a comparison with the same value.
TerminatorInst *PTI = Pred->getTerminator();
- Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
// Figure out which 'cases' to copy from SI to PSI.
@@ -885,7 +965,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Based on whether the default edge from PTI goes to BB or not, fill in
// PredCases and PredDefault with the new switch cases we would like to
// build.
- SmallVector<BasicBlock*, 8> NewSuccessors;
+ SmallVector<BasicBlock *, 8> NewSuccessors;
// Update the branch weight metadata along the way
SmallVector<uint64_t, 8> Weights;
@@ -915,7 +995,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
if (PredDefault == BB) {
// If this is the default destination from PTI, only the edges in TI
// that don't occur in PTI, or that branch to BB will be activated.
- std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest != BB)
PTIHandled.insert(PredCases[i].Value);
@@ -925,13 +1005,14 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
if (PredHasWeights || SuccHasWeights) {
// Increase weight for the default case.
- Weights[0] += Weights[i+1];
- std::swap(Weights[i+1], Weights.back());
+ Weights[0] += Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
Weights.pop_back();
}
PredCases.pop_back();
- --i; --e;
+ --i;
+ --e;
}
// Reconstruct the new switch statement we will be building.
@@ -952,8 +1033,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// The default weight is at index 0, so weight for the ith case
// should be at index i+1. Scale the cases from successor by
// PredDefaultWeight (Weights[0]).
- Weights.push_back(Weights[0] * SuccWeights[i+1]);
- ValidTotalSuccWeight += SuccWeights[i+1];
+ Weights.push_back(Weights[0] * SuccWeights[i + 1]);
+ ValidTotalSuccWeight += SuccWeights[i + 1];
}
}
@@ -969,21 +1050,22 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
- std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
- std::map<ConstantInt*, uint64_t> WeightsForHandled;
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt *, uint64_t> WeightsForHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == BB) {
PTIHandled.insert(PredCases[i].Value);
if (PredHasWeights || SuccHasWeights) {
- WeightsForHandled[PredCases[i].Value] = Weights[i+1];
- std::swap(Weights[i+1], Weights.back());
+ WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
Weights.pop_back();
}
std::swap(PredCases[i], PredCases.back());
PredCases.pop_back();
- --i; --e;
+ --i;
+ --e;
}
// Okay, now we know which constants were sent to BB from the
@@ -995,17 +1077,16 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Weights.push_back(WeightsForHandled[BBCases[i].Value]);
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
- PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
+ PTIHandled.erase(
+ BBCases[i].Value); // This constant is taken care of
}
// If there are any constants vectored to BB that TI doesn't handle,
// they must go to the default destination of TI.
- for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
- PTIHandled.begin(),
- E = PTIHandled.end(); I != E; ++I) {
+ for (ConstantInt *I : PTIHandled) {
if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[*I]);
- PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
+ Weights.push_back(WeightsForHandled[I]);
+ PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
}
@@ -1024,8 +1105,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
}
// Now that the successors are updated, create the new Switch instruction.
- SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
- PredCases.size());
+ SwitchInst *NewSI =
+ Builder.CreateSwitch(CV, PredDefault, PredCases.size());
NewSI->setDebugLoc(PTI->getDebugLoc());
for (ValueEqualityComparisonCase &V : PredCases)
NewSI->addCase(V.Value, V.Dest);
@@ -1036,9 +1117,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- NewSI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(MDWeights));
+ NewSI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).createBranchWeights(MDWeights));
}
EraseTerminatorInstAndDCECond(PTI);
@@ -1052,8 +1133,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
if (!InfLoopBlock) {
// Insert it at the end of the function, because it's either code,
// or it won't matter if it's hot. :)
- InfLoopBlock = BasicBlock::Create(BB->getContext(),
- "infloop", BB->getParent());
+ InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
+ BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
}
NewSI->setSuccessor(i, InfLoopBlock);
@@ -1070,13 +1151,13 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// can't hoist the invoke, as there is nowhere to put the select in this case.
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
Instruction *I1, Instruction *I2) {
- for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ for (BasicBlock *Succ : successors(BB1)) {
PHINode *PN;
- for (BasicBlock::iterator BBI = SI->begin();
+ for (BasicBlock::iterator BBI = Succ->begin();
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
- if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
return false;
}
}
@@ -1096,8 +1177,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
// such, we currently just scan for obviously identical instructions in an
// identical order.
- BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
- BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
BasicBlock::iterator BB1_Itr = BB1->begin();
BasicBlock::iterator BB2_Itr = BB2->begin();
@@ -1135,12 +1216,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group,
- LLVMContext::MD_align, LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null};
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_mem_parallel_loop_access};
combineMetadata(I1, I2, KnownIDs);
I2->eraseFromParent();
Changed = true;
@@ -1165,9 +1250,9 @@ HoistTerminator:
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
return Changed;
- for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ for (BasicBlock *Succ : successors(BB1)) {
PHINode *PN;
- for (BasicBlock::iterator BBI = SI->begin();
+ for (BasicBlock::iterator BBI = Succ->begin();
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
@@ -1178,7 +1263,7 @@ HoistTerminator:
// eliminate undefined control flow then converting it to a select.
if (passingValueIsAlwaysUndefined(BB1V, PN) ||
passingValueIsAlwaysUndefined(BB2V, PN))
- return Changed;
+ return Changed;
if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
return Changed;
@@ -1196,27 +1281,28 @@ HoistTerminator:
NT->takeName(I1);
}
- IRBuilder<true, NoFolder> Builder(NT);
+ IRBuilder<NoFolder> Builder(NT);
// Hoisting one of the terminators from our successor is a great thing.
// Unfortunately, the successors of the if/else blocks may have PHI nodes in
// them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
// nodes, so we insert select instruction to compute the final result.
- std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
- for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
+ for (BasicBlock *Succ : successors(BB1)) {
PHINode *PN;
- for (BasicBlock::iterator BBI = SI->begin();
+ for (BasicBlock::iterator BBI = Succ->begin();
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
- if (BB1V == BB2V) continue;
+ if (BB1V == BB2V)
+ continue;
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
if (!SI)
- SI = cast<SelectInst>
- (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
- BB1V->getName()+"."+BB2V->getName()));
+ SI = cast<SelectInst>(
+ Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName() + "." + BB2V->getName(), BI));
// Make the PHI node use the select for all incoming values for BB1/BB2
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -1226,8 +1312,8 @@ HoistTerminator:
}
// Update any PHI nodes in our new successors.
- for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
- AddPredecessorToBlock(*SI, BIParent, BB1);
+ for (BasicBlock *Succ : successors(BB1))
+ AddPredecessorToBlock(Succ, BIParent, BB1);
EraseTerminatorInstAndDCECond(BI);
return true;
@@ -1280,10 +1366,12 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
RI2 = BB2->getInstList().rbegin(),
RE2 = BB2->getInstList().rend();
// Skip debug info.
- while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
+ ++RI1;
if (RI1 == RE1)
return false;
- while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
+ ++RI2;
if (RI2 == RE2)
return false;
// Skip the unconditional branches.
@@ -1293,10 +1381,12 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
bool Changed = false;
while (RI1 != RE1 && RI2 != RE2) {
// Skip debug info.
- while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
+ ++RI1;
if (RI1 == RE1)
return Changed;
- while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
+ ++RI2;
if (RI2 == RE2)
return Changed;
@@ -1305,22 +1395,19 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// I1 and I2 should have a single use in the same PHI node, and they
// perform the same operation.
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
- isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
- I1->isEHPad() || I2->isEHPad() ||
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) || isa<TerminatorInst>(I1) ||
+ isa<TerminatorInst>(I2) || I1->isEHPad() || I2->isEHPad() ||
isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
- !I1->hasOneUse() || !I2->hasOneUse() ||
- !JointValueMap.count(InstPair))
+ !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair))
return Changed;
// Check whether we should swap the operands of ICmpInst.
// TODO: Add support of communativity.
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
bool SwapOpnds = false;
- if (ICmp1 && ICmp2 &&
- ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
(ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
@@ -1343,8 +1430,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
continue;
// Early exit if we have more-than one pair of different operands or if
// we need a PHI node to replace a constant.
- if (Op1Idx != ~0U ||
- isa<Constant>(I1->getOperand(I)) ||
+ if (Op1Idx != ~0U || isa<Constant>(I1->getOperand(I)) ||
isa<Constant>(I2->getOperand(I))) {
// If we can't sink the instructions, undo the swapping.
if (SwapOpnds)
@@ -1379,7 +1465,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// We need to update RE1 and RE2 if we are going to sink the first
// instruction in the basic block down.
- bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front());
// Sink the instruction.
BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
BB1->getInstList(), I1);
@@ -1444,22 +1530,26 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
Value *StorePtr = StoreToHoist->getPointerOperand();
// Look for a store to the same pointer in BrBB.
- unsigned MaxNumInstToLookAt = 10;
- for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
- RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
- Instruction *CurI = &*RI;
+ unsigned MaxNumInstToLookAt = 9;
+ for (Instruction &CurI : reverse(*BrBB)) {
+ if (!MaxNumInstToLookAt)
+ break;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(CurI))
+ continue;
+ --MaxNumInstToLookAt;
// Could be calling an instruction that effects memory like free().
- if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
return nullptr;
- StoreInst *SI = dyn_cast<StoreInst>(CurI);
- // Found the previous store make sure it stores to the same location.
- if (SI && SI->getPointerOperand() == StorePtr)
- // Found the previous store, return its value operand.
- return SI->getValueOperand();
- else if (SI)
+ if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
+ // Found the previous store make sure it stores to the same location.
+ if (SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
return nullptr; // Unknown store.
+ }
}
return nullptr;
@@ -1562,11 +1652,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Do not hoist the instruction if any of its operands are defined but not
// used in BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = I->op_begin(), e = I->op_end();
- i != e; ++i) {
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
Instruction *OpI = dyn_cast<Instruction>(*i);
- if (!OpI || OpI->getParent() != BB ||
- OpI->mayHaveSideEffects())
+ if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
continue; // Not a candidate for sinking.
++SinkCandidateUseCounts[OpI];
@@ -1576,8 +1664,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Consider any sink candidates which are only used in CondBB as costs for
// speculation. Note, while we iterate over a DenseMap here, we are summing
// and so iteration order isn't significant.
- for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
- SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+ for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
+ I = SinkCandidateUseCounts.begin(),
+ E = SinkCandidateUseCounts.end();
I != E; ++I)
if (I->first->getNumUses() == I->second) {
++SpeculationCost;
@@ -1613,8 +1702,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return false;
unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
- unsigned MaxCost = 2 * PHINodeFoldingThreshold *
- TargetTransformInfo::TCC_Basic;
+ unsigned MaxCost =
+ 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
if (OrigCost + ThenCost > MaxCost)
return false;
@@ -1637,19 +1726,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Insert a select of the value of the speculated store.
if (SpeculatedStoreValue) {
- IRBuilder<true, NoFolder> Builder(BI);
+ IRBuilder<NoFolder> Builder(BI);
Value *TrueV = SpeculatedStore->getValueOperand();
Value *FalseV = SpeculatedStoreValue;
if (Invert)
std::swap(TrueV, FalseV);
- Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
- "." + FalseV->getName());
+ Value *S = Builder.CreateSelect(
+ BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
SpeculatedStore->setOperand(0, S);
}
// Metadata can be dependent on the condition we are hoisting above.
// Conservatively strip all metadata on the instruction.
- for (auto &I: *ThenBB)
+ for (auto &I : *ThenBB)
I.dropUnknownNonDebugMetadata();
// Hoist the instructions.
@@ -1657,7 +1746,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
ThenBB->begin(), std::prev(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
- IRBuilder<true, NoFolder> Builder(BI);
+ IRBuilder<NoFolder> Builder(BI);
for (BasicBlock::iterator I = EndBB->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
unsigned OrigI = PN->getBasicBlockIndex(BB);
@@ -1675,8 +1764,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
Value *TrueV = ThenV, *FalseV = OrigV;
if (Invert)
std::swap(TrueV, FalseV);
- Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
- TrueV->getName() + "." + FalseV->getName());
+ Value *V = Builder.CreateSelect(
+ BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
PN->setIncomingValue(OrigI, V);
PN->setIncomingValue(ThenI, V);
}
@@ -1685,19 +1774,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return true;
}
-/// \returns True if this block contains a CallInst with the NoDuplicate
-/// attribute.
-static bool HasNoDuplicateCall(const BasicBlock *BB) {
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- const CallInst *CI = dyn_cast<CallInst>(I);
- if (!CI)
- continue;
- if (CI->cannotDuplicate())
- return true;
- }
- return false;
-}
-
/// Return true if we can thread a branch across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
@@ -1706,14 +1782,16 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
- if (Size > 10) return false; // Don't clone large BB's.
+ if (Size > 10)
+ return false; // Don't clone large BB's.
++Size;
// We can only support instructions that do not define values that are
// live outside of the current basic block.
for (User *U : BBI->users()) {
Instruction *UI = cast<Instruction>(U);
- if (UI->getParent() != BB || isa<PHINode>(UI)) return false;
+ if (UI->getParent() != BB || isa<PHINode>(UI))
+ return false;
}
// Looks ok, continue checking.
@@ -1740,32 +1818,41 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
}
// Now we know that this block has multiple preds and two succs.
- if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+ if (!BlockIsSimpleEnoughToThreadThrough(BB))
+ return false;
- if (HasNoDuplicateCall(BB)) return false;
+ // Can't fold blocks that contain noduplicate or convergent calls.
+ if (llvm::any_of(*BB, [](const Instruction &I) {
+ const CallInst *CI = dyn_cast<CallInst>(&I);
+ return CI && (CI->cannotDuplicate() || CI->isConvergent());
+ }))
+ return false;
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (!CB || !CB->getType()->isIntegerTy(1)) continue;
+ if (!CB || !CB->getType()->isIntegerTy(1))
+ continue;
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
BasicBlock *PredBB = PN->getIncomingBlock(i);
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
- if (RealDest == BB) continue; // Skip self loops.
+ if (RealDest == BB)
+ continue; // Skip self loops.
// Skip if the predecessor's terminator is an indirect branch.
- if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ continue;
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
// the edge we are about to create.
- BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
- RealDest->getName()+".critedge",
- RealDest->getParent(), RealDest);
+ BasicBlock *EdgeBB =
+ BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
+ RealDest->getParent(), RealDest);
BranchInst::Create(RealDest, EdgeBB);
// Update PHI nodes.
@@ -1775,7 +1862,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
// instructions into EdgeBB. We know that there will be no uses of the
// cloned instructions outside of EdgeBB.
BasicBlock::iterator InsertPt = EdgeBB->begin();
- DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ DenseMap<Value *, Value *> TranslateMap; // Track translated values.
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1783,26 +1870,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
}
// Clone the instruction.
Instruction *N = BBI->clone();
- if (BBI->hasName()) N->setName(BBI->getName()+".c");
+ if (BBI->hasName())
+ N->setName(BBI->getName() + ".c");
// Update operands due to translation.
- for (User::op_iterator i = N->op_begin(), e = N->op_end();
- i != e; ++i) {
- DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) {
+ DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
if (PI != TranslateMap.end())
*i = PI->second;
}
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, DL)) {
- TranslateMap[&*BBI] = V;
- delete N; // Instruction folded away, don't need actual inst
+ if (!BBI->use_empty())
+ TranslateMap[&*BBI] = V;
+ if (!N->mayHaveSideEffects()) {
+ delete N; // Instruction folded away, don't need actual inst
+ N = nullptr;
+ }
} else {
- // Insert the new instruction into its new home.
- EdgeBB->getInstList().insert(InsertPt, N);
if (!BBI->use_empty())
TranslateMap[&*BBI] = N;
}
+ // Insert the new instruction into its new home.
+ if (N)
+ EdgeBB->getInstList().insert(InsertPt, N);
}
// Loop over all of the edges from PredBB to BB, changing them to branch
@@ -1852,7 +1944,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
- SmallPtrSet<Instruction*, 4> AggressiveInsts;
+ SmallPtrSet<Instruction *, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
@@ -1876,7 +1968,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
- if (!PN) return true;
+ if (!PN)
+ return true;
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
@@ -1886,10 +1979,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
isa<BinaryOperator>(IfCond)))
return false;
- // If we all PHI nodes are promotable, check to make sure that all
- // instructions in the predecessor blocks can be promoted as well. If
- // not, we won't be able to get rid of the control flow, so it's not
- // worth promoting to select instructions.
+ // If all PHI nodes are promotable, check to make sure that all instructions
+ // in the predecessor blocks can be promoted as well. If not, we won't be able
+ // to get rid of the control flow, so it's not worth promoting to select
+ // instructions.
BasicBlock *DomBlock = nullptr;
BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
@@ -1897,11 +1990,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock1 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock1);
- for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+ for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I);
+ ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
- // Because of this, we won't be able to get rid of the control
- // flow, so the xform is not worth it.
+ // Because of this, we won't be able to get rid of the control flow, so
+ // the xform is not worth it.
return false;
}
}
@@ -1910,11 +2004,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
IfBlock2 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock2);
- for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+ for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I);
+ ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
- // Because of this, we won't be able to get rid of the control
- // flow, so the xform is not worth it.
+ // Because of this, we won't be able to get rid of the control flow, so
+ // the xform is not worth it.
return false;
}
}
@@ -1925,7 +2020,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
Instruction *InsertPt = DomBlock->getTerminator();
- IRBuilder<true, NoFolder> Builder(InsertPt);
+ IRBuilder<NoFolder> Builder(InsertPt);
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
@@ -1940,13 +2035,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
- Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
- SelectInst *NV =
- cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
- PN->replaceAllUsesWith(NV);
- NV->takeName(PN);
+ Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
+ PN->replaceAllUsesWith(Sel);
+ Sel->takeName(PN);
PN->eraseFromParent();
}
@@ -2029,51 +2123,32 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
} else if (isa<UndefValue>(TrueValue)) {
TrueValue = FalseValue;
} else {
- TrueValue = Builder.CreateSelect(BrCond, TrueValue,
- FalseValue, "retval");
+ TrueValue =
+ Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
}
}
- Value *RI = !TrueValue ?
- Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+ Value *RI =
+ !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
- (void) RI;
+ (void)RI;
DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
<< "\n " << *BI << "NewRet = " << *RI
- << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
EraseTerminatorInstAndDCECond(BI);
return true;
}
-/// Given a conditional BranchInstruction, retrieve the probabilities of the
-/// branch taking each edge. Fills in the two APInt parameters and returns true,
-/// or returns false if no or invalid metadata was found.
-static bool ExtractBranchMetadata(BranchInst *BI,
- uint64_t &ProbTrue, uint64_t &ProbFalse) {
- assert(BI->isConditional() &&
- "Looking for probabilities on unconditional branch?");
- MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
- if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
- ConstantInt *CITrue =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
- ConstantInt *CIFalse =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
- if (!CITrue || !CIFalse) return false;
- ProbTrue = CITrue->getValue().getZExtValue();
- ProbFalse = CIFalse->getValue().getZExtValue();
- return true;
-}
-
/// Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
return false;
- for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) {
- Instruction *PBI = &*I;
+ for (Instruction &I : *PB) {
+ Instruction *PBI = &I;
// Check whether Inst and PBI generate the same value.
if (Inst->isIdenticalTo(PBI)) {
Inst->replaceAllUsesWith(PBI);
@@ -2084,6 +2159,29 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
return false;
}
+/// Return true if either PBI or BI has branch weight available, and store
+/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
+/// not have branch weight, use 1:1 as its weight.
+static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
+ uint64_t &PredTrueWeight,
+ uint64_t &PredFalseWeight,
+ uint64_t &SuccTrueWeight,
+ uint64_t &SuccFalseWeight) {
+ bool PredHasWeights =
+ PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
+ bool SuccHasWeights =
+ BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
+ if (PredHasWeights || SuccHasWeights) {
+ if (!PredHasWeights)
+ PredTrueWeight = PredFalseWeight = 1;
+ if (!SuccHasWeights)
+ SuccTrueWeight = SuccFalseWeight = 1;
+ return true;
+ } else {
+ return false;
+ }
+}
+
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
@@ -2103,8 +2201,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
if (PBI->isConditional() &&
(BI->getSuccessor(0) == PBI->getSuccessor(0) ||
BI->getSuccessor(0) == PBI->getSuccessor(1))) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Curr = &*I++;
if (isa<CmpInst>(Curr)) {
Cond = Curr;
@@ -2122,13 +2219,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
- return false;
+ return false;
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = ++Cond->getIterator();
// Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+ while (isa<DbgInfoIntrinsic>(CondIt))
+ ++CondIt;
if (&*CondIt != BI)
return false;
@@ -2139,7 +2237,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
- for (auto I = BB->begin(); Cond != I; ++I) {
+ for (auto I = BB->begin(); Cond != &*I; ++I) {
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
@@ -2168,7 +2266,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
return false;
// Finally, don't infinitely unroll conditional loops.
- BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
if (TrueDest == BB || FalseDest == BB)
return false;
@@ -2180,10 +2278,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
- SmallVector<PHINode*, 4> PHIs;
+ SmallVector<PHINode *, 4> PHIs;
if (!PBI || PBI->isUnconditional() ||
- (BI->isConditional() &&
- !SafeToMergeTerminators(BI, PBI)) ||
+ (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
continue;
@@ -2193,16 +2290,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
bool InvertPredCond = false;
if (BI->isConditional()) {
- if (PBI->getSuccessor(0) == TrueDest)
+ if (PBI->getSuccessor(0) == TrueDest) {
Opc = Instruction::Or;
- else if (PBI->getSuccessor(1) == FalseDest)
+ } else if (PBI->getSuccessor(1) == FalseDest) {
Opc = Instruction::And;
- else if (PBI->getSuccessor(0) == FalseDest)
- Opc = Instruction::And, InvertPredCond = true;
- else if (PBI->getSuccessor(1) == TrueDest)
- Opc = Instruction::Or, InvertPredCond = true;
- else
+ } else if (PBI->getSuccessor(0) == FalseDest) {
+ Opc = Instruction::And;
+ InvertPredCond = true;
+ } else if (PBI->getSuccessor(1) == TrueDest) {
+ Opc = Instruction::Or;
+ InvertPredCond = true;
+ } else {
continue;
+ }
} else {
if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
continue;
@@ -2219,8 +2319,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
CmpInst *CI = cast<CmpInst>(NewCond);
CI->setPredicate(CI->getInversePredicate());
} else {
- NewCond = Builder.CreateNot(NewCond,
- PBI->getCondition()->getName()+".not");
+ NewCond =
+ Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
}
PBI->setCondition(NewCond);
@@ -2234,12 +2334,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// We already make sure Cond is the last instruction before BI. Therefore,
// all instructions before Cond other than DbgInfoIntrinsic are bonus
// instructions.
- for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) {
+ for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
if (isa<DbgInfoIntrinsic>(BonusInst))
continue;
Instruction *NewBonusInst = BonusInst->clone();
RemapInstruction(NewBonusInst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
VMap[&*BonusInst] = NewBonusInst;
// If we moved a load, we cannot any longer claim any knowledge about
@@ -2258,49 +2358,49 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// two conditions together.
Instruction *New = Cond->clone();
RemapInstruction(New, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
PredBlock->getInstList().insert(PBI->getIterator(), New);
New->takeName(Cond);
Cond->setName(New->getName() + ".old");
if (BI->isConditional()) {
- Instruction *NewCond =
- cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
- New, "or.cond"));
+ Instruction *NewCond = cast<Instruction>(
+ Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond"));
PBI->setCondition(NewCond);
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
- PredFalseWeight);
- bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
- SuccFalseWeight);
+ bool HasWeights =
+ extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight);
SmallVector<uint64_t, 8> NewWeights;
if (PBI->getSuccessor(0) == BB) {
- if (PredHasWeights && SuccHasWeights) {
+ if (HasWeights) {
// PBI: br i1 %x, BB, FalseDest
// BI: br i1 %y, TrueDest, FalseDest
- //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
- //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
// TrueWeight for PBI * FalseWeight for BI.
// We assume that total weights of a BranchInst can fit into 32 bits.
// Therefore, we will not have overflow using 64-bit arithmetic.
- NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
- SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ NewWeights.push_back(PredFalseWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
}
AddPredecessorToBlock(TrueDest, PredBlock, BB);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
- if (PredHasWeights && SuccHasWeights) {
+ if (HasWeights) {
// PBI: br i1 %x, TrueDest, BB
// BI: br i1 %y, TrueDest, FalseDest
- //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
// FalseWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
- SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
- //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredTrueWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
}
AddPredecessorToBlock(FalseDest, PredBlock, BB);
@@ -2310,51 +2410,42 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// Halve the weights if any of them cannot fit in an uint32_t
FitWeights(NewWeights);
- SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
- PBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BI->getContext()).
- createBranchWeights(MDWeights));
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
+ NewWeights.end());
+ PBI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).createBranchWeights(MDWeights));
} else
PBI->setMetadata(LLVMContext::MD_prof, nullptr);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
ConstantInt *PBI_C = cast<ConstantInt>(
- PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
+ PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
assert(PBI_C->getType()->isIntegerTy(1));
Instruction *MergedCond = nullptr;
if (PBI->getSuccessor(0) == TrueDest) {
// Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
// PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
// is false: !PBI_Cond and BI_Value
- Instruction *NotCond =
- cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
- "not.cond"));
- MergedCond =
- cast<Instruction>(Builder.CreateBinOp(Instruction::And,
- NotCond, New,
- "and.cond"));
+ Instruction *NotCond = cast<Instruction>(
+ Builder.CreateNot(PBI->getCondition(), "not.cond"));
+ MergedCond = cast<Instruction>(
+ Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond"));
if (PBI_C->isOne())
- MergedCond =
- cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
- PBI->getCondition(), MergedCond,
- "or.cond"));
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
} else {
// Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
- MergedCond =
- cast<Instruction>(Builder.CreateBinOp(Instruction::And,
- PBI->getCondition(), New,
- "and.cond"));
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::And, PBI->getCondition(), New, "and.cond"));
if (PBI_C->isOne()) {
- Instruction *NotCond =
- cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
- "not.cond"));
- MergedCond =
- cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
- NotCond, MergedCond,
- "or.cond"));
+ Instruction *NotCond = cast<Instruction>(
+ Builder.CreateNot(PBI->getCondition(), "not.cond"));
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::Or, NotCond, MergedCond, "or.cond"));
}
}
// Update PHI Node.
@@ -2371,9 +2462,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// could replace PBI's branch probabilities with BI's.
// Copy any debug value intrinsics into the end of PredBlock.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (isa<DbgInfoIntrinsic>(*I))
- I->clone()->insertBefore(PBI);
+ for (Instruction &I : *BB)
+ if (isa<DbgInfoIntrinsic>(I))
+ I.clone()->insertBefore(PBI);
return true;
}
@@ -2417,7 +2508,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
// where OtherBB is the single other predecessor of BB's only successor.
PHINode *PHI = nullptr;
BasicBlock *Succ = BB->getSingleSuccessor();
-
+
for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
PHI = cast<PHINode>(I);
@@ -2443,8 +2534,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
PHI->addIncoming(V, BB);
for (BasicBlock *PredBB : predecessors(Succ))
if (PredBB != BB)
- PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()),
- PredBB);
+ PHI->addIncoming(
+ AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
return PHI;
}
@@ -2481,10 +2572,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
return N <= PHINodeFoldingThreshold;
};
- if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) ||
- !IsWorthwhile(PFB) ||
- !IsWorthwhile(QTB) ||
- !IsWorthwhile(QFB)))
+ if (!MergeCondStoresAggressively &&
+ (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) ||
+ !IsWorthwhile(QFB)))
return false;
// For every pointer, there must be exactly two stores, one coming from
@@ -2561,7 +2651,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
QStore->eraseFromParent();
PStore->eraseFromParent();
-
+
return true;
}
@@ -2593,7 +2683,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
// We model triangles as a type of diamond with a nullptr "true" block.
// Triangles are canonicalized so that the fallthrough edge is represented by
// a true condition, as in the diagram above.
- //
+ //
BasicBlock *PTB = PBI->getSuccessor(0);
BasicBlock *PFB = PBI->getSuccessor(1);
BasicBlock *QTB = QBI->getSuccessor(0);
@@ -2622,8 +2712,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
// the post-dominating block, and the non-fallthroughs must only have one
// predecessor.
auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
- return BB->getSinglePredecessor() == P &&
- BB->getSingleSuccessor() == S;
+ return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
};
if (!PostBB ||
!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
@@ -2637,7 +2726,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
// OK, this is a sequence of two diamonds or triangles.
// Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
- SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses;
+ SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
for (auto *BB : {PTB, PFB}) {
if (!BB)
continue;
@@ -2652,7 +2741,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
if (StoreInst *SI = dyn_cast<StoreInst>(&I))
QStoreAddresses.insert(SI->getPointerOperand());
}
-
+
set_intersect(PStoreAddresses, QStoreAddresses);
// set_intersect mutates PStoreAddresses in place. Rename it here to make it
// clear what it contains.
@@ -2684,9 +2773,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- CondIsTrue));
- return true; // Nuke the branch on constant.
+ BI->setCondition(
+ ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
+ return true; // Nuke the branch on constant.
}
// Otherwise, if there are multiple predecessors, insert a PHI that merges
@@ -2702,13 +2791,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Any predecessor where the condition is not computable we keep symbolic.
for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
- if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
- PBI != BI && PBI->isConditional() &&
- PBI->getCondition() == BI->getCondition() &&
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
+ PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- CondIsTrue), P);
+ NewPN->addIncoming(
+ ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
+ P);
} else {
NewPN->addIncoming(BI->getCondition(), P);
}
@@ -2723,19 +2812,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (CE->canTrap())
return false;
- // If BI is reached from the true path of PBI and PBI's condition implies
- // BI's condition, we know the direction of the BI branch.
- if (PBI->getSuccessor(0) == BI->getParent() &&
- isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
- BB->getSinglePredecessor()) {
- // Turn this into a branch on constant.
- auto *OldCond = BI->getCondition();
- BI->setCondition(ConstantInt::getTrue(BB->getContext()));
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- return true; // Nuke the branch on constant.
- }
-
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
@@ -2753,16 +2829,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
return false;
int PBIOp, BIOp;
- if (PBI->getSuccessor(0) == BI->getSuccessor(0))
- PBIOp = BIOp = 0;
- else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
- PBIOp = 0, BIOp = 1;
- else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
- PBIOp = 1, BIOp = 0;
- else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
- PBIOp = BIOp = 1;
- else
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+ PBIOp = 0;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+ PBIOp = 0;
+ BIOp = 1;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+ PBIOp = 1;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+ PBIOp = 1;
+ BIOp = 1;
+ } else {
return false;
+ }
// Check to make sure that the other destination of this branch
// isn't BB itself. If so, this is an infinite loop that will
@@ -2780,8 +2861,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
unsigned NumPhis = 0;
- for (BasicBlock::iterator II = CommonDest->begin();
- isa<PHINode>(II); ++II, ++NumPhis) {
+ for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
+ ++II, ++NumPhis) {
if (NumPhis > 2) // Disable this xform.
return false;
@@ -2804,7 +2885,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
-
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -2815,8 +2895,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (OtherDest == BB) {
// Insert it at the end of the function, because it's either code,
// or it won't matter if it's hot. :)
- BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
- "infloop", BB->getParent());
+ BasicBlock *InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
OtherDest = InfLoopBlock;
}
@@ -2828,13 +2908,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Make sure we get to CommonDest on True&True directions.
Value *PBICond = PBI->getCondition();
- IRBuilder<true, NoFolder> Builder(PBI);
+ IRBuilder<NoFolder> Builder(PBI);
if (PBIOp)
- PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
Value *BICond = BI->getCondition();
if (BIOp)
- BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+ BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
// Merge the conditions.
Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
@@ -2846,15 +2926,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// Update branch weight for PBI.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
- PredFalseWeight);
- bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
- SuccFalseWeight);
- if (PredHasWeights && SuccHasWeights) {
- uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
- uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
- uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
- uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
+ bool HasWeights =
+ extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight);
+ if (HasWeights) {
+ PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
// The weight to CommonDest should be PredCommon * SuccTotal +
// PredOther * SuccCommon.
// The weight to OtherDest should be PredOther * SuccOther.
@@ -2885,9 +2965,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
Value *PBIV = PN->getIncomingValue(PBBIdx);
if (BIV != PBIV) {
// Insert a select in PBI to pick the right value.
- Value *NV = cast<SelectInst>
- (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+ SelectInst *NV = cast<SelectInst>(
+ Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
PN->setIncomingValue(PBBIdx, NV);
+ // Although the select has the same condition as PBI, the original branch
+ // weights for PBI do not apply to the new select because the select's
+ // 'logical' edges are incoming edges of the phi that is eliminated, not
+ // the outgoing edges of PBI.
+ if (HasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to PredCommonDest should be PredCommon * SuccTotal.
+ // The weight to PredOtherDest should be PredOther * SuccCommon.
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
+ PredOther * SuccCommon};
+
+ FitWeights(NewWeights);
+
+ NV->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext())
+ .createBranchWeights(NewWeights[0], NewWeights[1]));
+ }
}
}
@@ -2907,7 +3007,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
uint32_t TrueWeight,
- uint32_t FalseWeight){
+ uint32_t FalseWeight) {
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
@@ -2942,8 +3042,8 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
if (TrueWeight != FalseWeight)
NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(OldTerm->getContext()).
- createBranchWeights(TrueWeight, FalseWeight));
+ MDBuilder(OldTerm->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
}
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
@@ -2988,16 +3088,16 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
if (HasWeights) {
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
- TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
- getSuccessorIndex()];
- FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
- getSuccessorIndex()];
+ TrueWeight =
+ (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()];
+ FalseWeight =
+ (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()];
}
}
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
- TrueWeight, FalseWeight);
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
+ FalseWeight);
}
// Replaces
@@ -3017,8 +3117,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
BasicBlock *FalseBB = FBA->getBasicBlock();
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
- 0, 0);
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
+ 0);
}
/// This is called when we find an icmp instruction
@@ -3046,7 +3146,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// If the block has any PHIs in it or the icmp has multiple uses, it is too
// complex.
- if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
+ return false;
Value *V = ICI->getOperand(0);
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
@@ -3055,7 +3156,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
- if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false;
+ if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
+ return false;
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
@@ -3104,7 +3206,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
// true in the PHI.
Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
- Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
std::swap(DefaultCst, NewCst);
@@ -3116,21 +3218,21 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// Okay, the switch goes to this block on a default value. Add an edge from
// the switch to the merge point on the compared value.
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
- BB->getParent(), BB);
+ BasicBlock *NewBB =
+ BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
SmallVector<uint64_t, 8> Weights;
bool HasWeights = HasBranchWeights(SI);
if (HasWeights) {
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
// Split weight for default case to case for "Cst".
- Weights[0] = (Weights[0]+1) >> 1;
+ Weights[0] = (Weights[0] + 1) >> 1;
Weights.push_back(Weights[0]);
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getContext()).
- createBranchWeights(MDWeights));
+ SI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).createBranchWeights(MDWeights));
}
}
SI->addCase(Cst, NewBB);
@@ -3149,7 +3251,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
const DataLayout &DL) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (!Cond) return false;
+ if (!Cond)
+ return false;
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
@@ -3158,13 +3261,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// Try to gather values from a chain of and/or to be turned into a switch
ConstantComparesGatherer ConstantCompare(Cond, DL);
// Unpack the result
- SmallVectorImpl<ConstantInt*> &Values = ConstantCompare.Vals;
+ SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
Value *CompVal = ConstantCompare.CompValue;
unsigned UsedICmps = ConstantCompare.UsedICmps;
Value *ExtraCase = ConstantCompare.Extra;
// If we didn't have a multiply compared value, fail.
- if (!CompVal) return false;
+ if (!CompVal)
+ return false;
// Avoid turning single icmps into a switch.
if (UsedICmps <= 1)
@@ -3179,20 +3283,23 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// If Extra was used, we require at least two switch values to do the
// transformation. A switch with one value is just a conditional branch.
- if (ExtraCase && Values.size() < 2) return false;
+ if (ExtraCase && Values.size() < 2)
+ return false;
// TODO: Preserve branch weight metadata, similarly to how
// FoldValueComparisonIntoPredecessors preserves it.
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
- BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual)
+ std::swap(DefaultBB, EdgeBB);
BasicBlock *BB = BI->getParent();
DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
- << " cases into SWITCH. BB is:\n" << *BB);
+ << " cases into SWITCH. BB is:\n"
+ << *BB);
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
@@ -3216,7 +3323,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
AddPredecessorToBlock(EdgeBB, BB, NewBB);
DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
- << "\nEXTRABB = " << *BB);
+ << "\nEXTRABB = " << *BB);
BB = NewBB;
}
@@ -3237,11 +3344,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
// We added edges from PI to the EdgeBB. As such, if there were any
// PHI nodes in EdgeBB, they need entries to be added corresponding to
// the number of edges added.
- for (BasicBlock::iterator BBI = EdgeBB->begin();
- isa<PHINode>(BBI); ++BBI) {
+ for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
PHINode *PN = cast<PHINode>(BBI);
Value *InVal = PN->getIncomingValueForBlock(BB);
- for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
PN->addIncoming(InVal, BB);
}
@@ -3270,7 +3376,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
// Check that there are no other instructions except for debug intrinsics
// between the phi of landing pads (RI->getValue()) and resume instruction.
BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
- E = RI->getIterator();
+ E = RI->getIterator();
while (++I != E)
if (!isa<DbgInfoIntrinsic>(I))
return false;
@@ -3279,8 +3385,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
auto *PhiLPInst = cast<PHINode>(RI->getValue());
// Check incoming blocks to see if any of them are trivial.
- for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues();
- Idx != End; Idx++) {
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
+ Idx++) {
auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
@@ -3289,8 +3395,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
if (IncomingBB->getUniqueSuccessor() != BB)
continue;
- auto *LandingPad =
- dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
// Not the landing pad that caused the control to branch here.
if (IncomingValue != LandingPad)
continue;
@@ -3310,7 +3415,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
}
// If no trivial unwind blocks, don't do any simplifications.
- if (TrivialUnwindBlocks.empty()) return false;
+ if (TrivialUnwindBlocks.empty())
+ return false;
// Turn all invokes that unwind here into calls.
for (auto *TrivialBB : TrivialUnwindBlocks) {
@@ -3346,8 +3452,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
- assert (RI->getValue() == LPInst &&
- "Resume must unwind the exception that caused control to here");
+ assert(RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
@@ -3363,10 +3469,12 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
// The landingpad is now unreachable. Zap it.
BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
return true;
}
-bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+static bool removeEmptyCleanup(CleanupReturnInst *RI) {
// If this is a trivial cleanup pad that executes no instructions, it can be
// eliminated. If the cleanup pad continues to the caller, any predecessor
// that is an EH pad will be updated to continue to the caller and any
@@ -3381,12 +3489,29 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
// This isn't an empty cleanup.
return false;
- // Check that there are no other instructions except for debug intrinsics.
+ // We cannot kill the pad if it has multiple uses. This typically arises
+ // from unreachable basic blocks.
+ if (!CPInst->hasOneUse())
+ return false;
+
+ // Check that there are no other instructions except for benign intrinsics.
BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
+ while (++I != E) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
return false;
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+
// If the cleanup return we are simplifying unwinds to the caller, this will
// set UnwindDest to nullptr.
BasicBlock *UnwindDest = RI->getUnwindDest();
@@ -3430,7 +3555,7 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
// removing, we need to merge that PHI node's incoming values into
// DestPN.
for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
- SrcIdx != SrcE; ++SrcIdx) {
+ SrcIdx != SrcE; ++SrcIdx) {
DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
SrcPN->getIncomingBlock(SrcIdx));
}
@@ -3484,13 +3609,63 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
return true;
}
+// Try to merge two cleanuppads together.
+static bool mergeCleanupPad(CleanupReturnInst *RI) {
+ // Skip any cleanuprets which unwind to caller, there is nothing to merge
+ // with.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ if (!UnwindDest)
+ return false;
+
+ // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
+ // be safe to merge without code duplication.
+ if (UnwindDest->getSinglePredecessor() != RI->getParent())
+ return false;
+
+ // Verify that our cleanuppad's unwind destination is another cleanuppad.
+ auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
+ if (!SuccessorCleanupPad)
+ return false;
+
+ CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
+ // Replace any uses of the successor cleanupad with the predecessor pad
+ // The only cleanuppad uses should be this cleanupret, it's cleanupret and
+ // funclet bundle operands.
+ SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
+ // Remove the old cleanuppad.
+ SuccessorCleanupPad->eraseFromParent();
+ // Now, we simply replace the cleanupret with a branch to the unwind
+ // destination.
+ BranchInst::Create(UnwindDest, RI->getParent());
+ RI->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+ // It is possible to transiantly have an undef cleanuppad operand because we
+ // have deleted some, but not all, dead blocks.
+ // Eventually, this block will be deleted.
+ if (isa<UndefValue>(RI->getOperand(0)))
+ return false;
+
+ if (mergeCleanupPad(RI))
+ return true;
+
+ if (removeEmptyCleanup(RI))
+ return true;
+
+ return false;
+}
+
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
- if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
// Find predecessors that end with branches.
- SmallVector<BasicBlock*, 8> UncondBranchPreds;
- SmallVector<BranchInst*, 8> CondBranchPreds;
+ SmallVector<BasicBlock *, 8> UncondBranchPreds;
+ SmallVector<BranchInst *, 8> CondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
TerminatorInst *PTI = P->getTerminator();
@@ -3507,14 +3682,17 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
while (!UncondBranchPreds.empty()) {
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
}
// If we eliminated all predecessors of the block, delete the block now.
- if (pred_empty(BB))
+ if (pred_empty(BB)) {
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
+ }
return true;
}
@@ -3547,7 +3725,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// Do not delete instructions that can have side effects which might cause
// the unreachable to not be reachable; specifically, calls and volatile
// operations may have this effect.
- if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))
+ break;
if (BBI->mayHaveSideEffects()) {
if (auto *SI = dyn_cast<StoreInst>(BBI)) {
@@ -3589,9 +3768,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// If the unreachable instruction is the first in the block, take a gander
// at all of the predecessors of this instruction, and simplify them.
- if (&BB->front() != UI) return Changed;
+ if (&BB->front() != UI)
+ return Changed;
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
IRBuilder<> Builder(TI);
@@ -3613,12 +3793,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
+ ++i)
if (i.getCaseSuccessor() == BB) {
BB->removePredecessor(SI->getParent());
SI->removeCase(i);
- --i; --e;
+ --i;
+ --e;
Changed = true;
}
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
@@ -3667,10 +3848,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
// If this block is now dead, remove it.
- if (pred_empty(BB) &&
- BB != &BB->getParent()->getEntryBlock()) {
+ if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
return true;
}
@@ -3699,25 +3881,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
// Partition the cases into two sets with different destinations.
BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
BasicBlock *DestB = nullptr;
- SmallVector <ConstantInt *, 16> CasesA;
- SmallVector <ConstantInt *, 16> CasesB;
+ SmallVector<ConstantInt *, 16> CasesA;
+ SmallVector<ConstantInt *, 16> CasesB;
for (SwitchInst::CaseIt I : SI->cases()) {
BasicBlock *Dest = I.getCaseSuccessor();
- if (!DestA) DestA = Dest;
+ if (!DestA)
+ DestA = Dest;
if (Dest == DestA) {
CasesA.push_back(I.getCaseValue());
continue;
}
- if (!DestB) DestB = Dest;
+ if (!DestB)
+ DestB = Dest;
if (Dest == DestB) {
CasesB.push_back(I.getCaseValue());
continue;
}
- return false; // More than two destinations.
+ return false; // More than two destinations.
}
- assert(DestA && DestB && "Single-destination switch should have been folded.");
+ assert(DestA && DestB &&
+ "Single-destination switch should have been folded.");
assert(DestA != DestB);
assert(DestB != SI->getDefaultDest());
assert(!CasesB.empty() && "There must be non-default cases.");
@@ -3741,7 +3926,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
// Start building the compare and branch.
Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
- Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size());
+ Constant *NumCases =
+ ConstantInt::get(Offset->getType(), ContiguousCases->size());
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
@@ -3773,21 +3959,24 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
FalseWeight /= 2;
}
NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getContext()).createBranchWeights(
- (uint32_t)TrueWeight, (uint32_t)FalseWeight));
+ MDBuilder(SI->getContext())
+ .createBranchWeights((uint32_t)TrueWeight,
+ (uint32_t)FalseWeight));
}
}
// Prune obsolete incoming values off the successors' PHI nodes.
for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
unsigned PreviousEdges = ContiguousCases->size();
- if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges;
+ if (ContiguousDest == SI->getDefaultDest())
+ ++PreviousEdges;
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
- if (OtherDest == SI->getDefaultDest()) ++PreviousEdges;
+ if (OtherDest == SI->getDefaultDest())
+ ++PreviousEdges;
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
@@ -3807,32 +3996,38 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);
+ // We can also eliminate cases by determining that their values are outside of
+ // the limited range of the condition based on how many significant (non-sign)
+ // bits are in the condition value.
+ unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
+ unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
+
// Gather dead cases.
- SmallVector<ConstantInt*, 8> DeadCases;
- for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
- if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
- (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
- DeadCases.push_back(I.getCaseValue());
- DEBUG(dbgs() << "SimplifyCFG: switch case '"
- << I.getCaseValue() << "' is dead.\n");
+ SmallVector<ConstantInt *, 8> DeadCases;
+ for (auto &Case : SI->cases()) {
+ APInt CaseVal = Case.getCaseValue()->getValue();
+ if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne ||
+ (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
+ DeadCases.push_back(Case.getCaseValue());
+ DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");
}
}
- // If we can prove that the cases must cover all possible values, the
- // default destination becomes dead and we can remove it. If we know some
+ // If we can prove that the cases must cover all possible values, the
+ // default destination becomes dead and we can remove it. If we know some
// of the bits in the value, we can use that to more precisely compute the
// number of possible unique case values.
bool HasDefault =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const unsigned NumUnknownBits = Bits -
- (KnownZero.Or(KnownOne)).countPopulation();
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const unsigned NumUnknownBits =
+ Bits - (KnownZero.Or(KnownOne)).countPopulation();
assert(NumUnknownBits <= Bits);
if (HasDefault && DeadCases.empty() &&
- NumUnknownBits < 64 /* avoid overflow */ &&
+ NumUnknownBits < 64 /* avoid overflow */ &&
SI->getNumCases() == (1ULL << NumUnknownBits)) {
DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(),
- SI->getParent(), "");
+ BasicBlock *NewDefault =
+ SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
SI->setDefaultDest(&*NewDefault);
SplitBlock(&*NewDefault, &NewDefault->front());
auto *OldTI = NewDefault->getTerminator();
@@ -3849,12 +4044,12 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
}
// Remove dead cases from the switch.
- for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
- SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+ for (ConstantInt *DeadCase : DeadCases) {
+ SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase);
assert(Case != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
if (HasWeight) {
- std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ std::swap(Weights[Case.getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
@@ -3865,8 +4060,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getParent()->getContext()).
- createBranchWeights(MDWeights));
+ MDBuilder(SI->getParent()->getContext())
+ .createBranchWeights(MDWeights));
}
return !DeadCases.empty();
@@ -3878,8 +4073,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
/// block and see if the incoming value is equal to CaseValue. If so, return
/// the phi node, and set PhiIndex to BB's index in the phi node.
static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
- BasicBlock *BB,
- int *PhiIndex) {
+ BasicBlock *BB, int *PhiIndex) {
if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
return nullptr; // BB must be empty to be a candidate for simplification.
if (!BB->getSinglePredecessor())
@@ -3897,7 +4091,8 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
assert(Idx >= 0 && "PHI has no entry for predecessor?");
Value *InValue = PHI->getIncomingValue(Idx);
- if (InValue != CaseValue) continue;
+ if (InValue != CaseValue)
+ continue;
*PhiIndex = Idx;
return PHI;
@@ -3911,17 +4106,19 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
/// blocks of the switch can be folded away.
/// Returns true if a change is made.
static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
- typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+ typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
ForwardingNodesMap ForwardingNodes;
- for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+ for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E;
+ ++I) {
ConstantInt *CaseValue = I.getCaseValue();
BasicBlock *CaseDest = I.getCaseSuccessor();
int PhiIndex;
- PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
- &PhiIndex);
- if (!PHI) continue;
+ PHINode *PHI =
+ FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex);
+ if (!PHI)
+ continue;
ForwardingNodes[PHI].push_back(PhiIndex);
}
@@ -3929,11 +4126,13 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
bool Changed = false;
for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
- E = ForwardingNodes.end(); I != E; ++I) {
+ E = ForwardingNodes.end();
+ I != E; ++I) {
PHINode *Phi = I->first;
SmallVectorImpl<int> &Indexes = I->second;
- if (Indexes.size() < 2) continue;
+ if (Indexes.size() < 2)
+ continue;
for (size_t I = 0, E = Indexes.size(); I != E; ++I)
Phi->setIncomingValue(Indexes[I], SI->getCondition());
@@ -3954,17 +4153,16 @@ static bool ValidLookupTableConstant(Constant *C) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
return CE->isGEPWithNoNotionalOverIndexing();
- return isa<ConstantFP>(C) ||
- isa<ConstantInt>(C) ||
- isa<ConstantPointerNull>(C) ||
- isa<GlobalValue>(C) ||
- isa<UndefValue>(C);
+ return isa<ConstantFP>(C) || isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) || isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
}
/// If V is a Constant, return it. Otherwise, try to look up
/// its constant value in ConstantPool, returning 0 if it's not there.
-static Constant *LookupConstant(Value *V,
- const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+static Constant *
+LookupConstant(Value *V,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
if (Constant *C = dyn_cast<Constant>(V))
return C;
return ConstantPool.lookup(V);
@@ -4001,7 +4199,7 @@ ConstantFold(Instruction *I, const DataLayout &DL,
COps[1], DL);
}
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
+ return ConstantFoldInstOperands(I, COps, DL);
}
/// Try to determine the resulting constant values in phi nodes
@@ -4018,7 +4216,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// If CaseDest is empty except for some side-effect free instructions through
// which we can constant-propagate the CaseVal, continue to its successor.
- SmallDenseMap<Value*, Constant*> ConstantPool;
+ SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
++I) {
@@ -4068,8 +4266,8 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
if (Idx == -1)
continue;
- Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
- ConstantPool);
+ Constant *ConstVal =
+ LookupConstant(PHI->getIncomingValue(Idx), ConstantPool);
if (!ConstVal)
return false;
@@ -4086,16 +4284,16 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// Helper function used to add CaseVal to the list of cases that generate
// Result.
static void MapCaseToResult(ConstantInt *CaseVal,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *Result) {
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
for (auto &I : UniqueResults) {
if (I.first == Result) {
I.second.push_back(CaseVal);
return;
}
}
- UniqueResults.push_back(std::make_pair(Result,
- SmallVector<ConstantInt*, 4>(1, CaseVal)));
+ UniqueResults.push_back(
+ std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
}
// Helper function that initializes a map containing
@@ -4137,7 +4335,7 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
DefaultResult =
DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
if ((!DefaultResult &&
- !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
+ !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
return false;
return true;
@@ -4154,12 +4352,11 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
// default:
// return 4;
// }
-static Value *
-ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
- Constant *DefaultResult, Value *Condition,
- IRBuilder<> &Builder) {
+static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
assert(ResultVector.size() == 2 &&
- "We should have exactly two unique results at this point");
+ "We should have exactly two unique results at this point");
// If we are selecting between only two cases transform into a simple
// select or a two-way select if default is possible.
if (ResultVector[0].second.size() == 1 &&
@@ -4177,8 +4374,8 @@ ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
}
Value *const ValueCompare =
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
- return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue,
- "switch.select");
+ return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
+ SelectValue, "switch.select");
}
return nullptr;
@@ -4227,9 +4424,8 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
assert(PHI != nullptr && "PHI for value select not found");
Builder.SetInsertPoint(SI);
- Value *SelectValue = ConvertTwoCaseSwitch(
- UniqueResults,
- DefaultResult, Cond, Builder);
+ Value *SelectValue =
+ ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
if (SelectValue) {
RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
return true;
@@ -4239,62 +4435,62 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
}
namespace {
- /// This class represents a lookup table that can be used to replace a switch.
- class SwitchLookupTable {
- public:
- /// Create a lookup table to use as a switch replacement with the contents
- /// of Values, using DefaultValue to fill any holes in the table.
- SwitchLookupTable(
- Module &M, uint64_t TableSize, ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL);
-
- /// Build instructions with Builder to retrieve the value at
- /// the position given by Index in the lookup table.
- Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
-
- /// Return true if a table with TableSize elements of
- /// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
- Type *ElementType);
-
- private:
- // Depending on the contents of the table, it can be represented in
- // different ways.
- enum {
- // For tables where each element contains the same value, we just have to
- // store that single value and return it for each lookup.
- SingleValueKind,
-
- // For tables where there is a linear relationship between table index
- // and values. We calculate the result with a simple multiplication
- // and addition instead of a table lookup.
- LinearMapKind,
-
- // For small tables with integer elements, we can pack them into a bitmap
- // that fits into a target-legal register. Values are retrieved by
- // shift and mask operations.
- BitMapKind,
-
- // The table is stored as an array of values. Values are retrieved by load
- // instructions from the table.
- ArrayKind
- } Kind;
-
- // For SingleValueKind, this is the single value.
- Constant *SingleValue;
-
- // For BitMapKind, this is the bitmap.
- ConstantInt *BitMap;
- IntegerType *BitMapElementTy;
-
- // For LinearMapKind, these are the constants used to derive the value.
- ConstantInt *LinearOffset;
- ConstantInt *LinearMultiplier;
-
- // For ArrayKind, this is the array.
- GlobalVariable *Array;
- };
+/// This class represents a lookup table that can be used to replace a switch.
+class SwitchLookupTable {
+public:
+ /// Create a lookup table to use as a switch replacement with the contents
+ /// of Values, using DefaultValue to fill any holes in the table.
+ SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL);
+
+ /// Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
+ Type *ElementType);
+
+private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For tables where there is a linear relationship between table index
+ // and values. We calculate the result with a simple multiplication
+ // and addition instead of a table lookup.
+ LinearMapKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For LinearMapKind, these are the constants used to derive the value.
+ ConstantInt *LinearOffset;
+ ConstantInt *LinearMultiplier;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+};
}
SwitchLookupTable::SwitchLookupTable(
@@ -4312,14 +4508,13 @@ SwitchLookupTable::SwitchLookupTable(
Type *ValueType = Values.begin()->second->getType();
// Build up the table contents.
- SmallVector<Constant*, 64> TableContents(TableSize);
+ SmallVector<Constant *, 64> TableContents(TableSize);
for (size_t I = 0, E = Values.size(); I != E; ++I) {
ConstantInt *CaseVal = Values[I].first;
Constant *CaseRes = Values[I].second;
assert(CaseRes->getType() == ValueType);
- uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
- .getLimitedValue();
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
TableContents[Idx] = CaseRes;
if (CaseRes != SingleValue)
@@ -4407,65 +4602,62 @@ SwitchLookupTable::SwitchLookupTable(
ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
- Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
- GlobalVariable::PrivateLinkage,
- Initializer,
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+ GlobalVariable::PrivateLinkage, Initializer,
"switch.table");
- Array->setUnnamedAddr(true);
+ Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Kind = ArrayKind;
}
Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
switch (Kind) {
- case SingleValueKind:
- return SingleValue;
- case LinearMapKind: {
- // Derive the result value from the input value.
- Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
- false, "switch.idx.cast");
- if (!LinearMultiplier->isOne())
- Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
- if (!LinearOffset->isZero())
- Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
- return Result;
- }
- case BitMapKind: {
- // Type of the bitmap (e.g. i59).
- IntegerType *MapTy = BitMap->getType();
-
- // Cast Index to the same type as the bitmap.
- // Note: The Index is <= the number of elements in the table, so
- // truncating it to the width of the bitmask is safe.
- Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
-
- // Multiply the shift amount by the element width.
- ShiftAmt = Builder.CreateMul(ShiftAmt,
- ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
- "switch.shiftamt");
-
- // Shift down.
- Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
- "switch.downshift");
- // Mask off.
- return Builder.CreateTrunc(DownShifted, BitMapElementTy,
- "switch.masked");
- }
- case ArrayKind: {
- // Make sure the table index will not overflow when treated as signed.
- IntegerType *IT = cast<IntegerType>(Index->getType());
- uint64_t TableSize = Array->getInitializer()->getType()
- ->getArrayNumElements();
- if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
- Index = Builder.CreateZExt(Index,
- IntegerType::get(IT->getContext(),
- IT->getBitWidth() + 1),
- "switch.tableidx.zext");
-
- Value *GEPIndices[] = { Builder.getInt32(0), Index };
- Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
- GEPIndices, "switch.gep");
- return Builder.CreateLoad(GEP, "switch.load");
- }
+ case SingleValueKind:
+ return SingleValue;
+ case LinearMapKind: {
+ // Derive the result value from the input value.
+ Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
+ false, "switch.idx.cast");
+ if (!LinearMultiplier->isOne())
+ Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
+ if (!LinearOffset->isZero())
+ Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
+ return Result;
+ }
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(
+ ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted =
+ Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
+ }
+ case ArrayKind: {
+ // Make sure the table index will not overflow when treated as signed.
+ IntegerType *IT = cast<IntegerType>(Index->getType());
+ uint64_t TableSize =
+ Array->getInitializer()->getType()->getArrayNumElements();
+ if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+ Index = Builder.CreateZExt(
+ Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
+ "switch.tableidx.zext");
+
+ Value *GEPIndices[] = {Builder.getInt32(0), Index};
+ Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+ GEPIndices, "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
}
llvm_unreachable("Unknown lookup table kind!");
}
@@ -4480,7 +4672,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
// are <= 15, we could try to narrow the type.
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
- if (TableSize >= UINT_MAX/IT->getBitWidth())
+ if (TableSize >= UINT_MAX / IT->getBitWidth())
return false;
return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
}
@@ -4503,8 +4695,9 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
// Saturate this flag to false.
- AllTablesFitInRegister = AllTablesFitInRegister &&
- SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
+ AllTablesFitInRegister =
+ AllTablesFitInRegister &&
+ SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
// If both flags saturate, we're done. NOTE: This *only* works with
// saturating flags, and all flags have to saturate first due to the
@@ -4547,9 +4740,10 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
/// ...
/// \endcode
/// Jump threading will then eliminate the second if(cond).
-static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
- BranchInst *RangeCheckBranch, Constant *DefaultValue,
- const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values) {
+static void reuseTableCompare(
+ User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
+ Constant *DefaultValue,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
if (!CmpInst)
@@ -4578,13 +4772,13 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
// compare result.
for (auto ValuePair : Values) {
Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
- ValuePair.second, CmpOp1, true);
+ ValuePair.second, CmpOp1, true);
if (!CaseConst || CaseConst == DefaultConst)
return;
assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
"Expect true or false as compare result.");
}
-
+
// Check if the branch instruction dominates the phi node. It's a simple
// dominance check, but sufficient for our needs.
// Although this check is invariant in the calling loops, it's better to do it
@@ -4602,9 +4796,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
++NumTableCmpReuses;
} else {
// The compare yields the same result, just inverted. We can replace it.
- Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp,
- ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
- RangeCheckBranch);
+ Value *InvertedTableCmp = BinaryOperator::CreateXor(
+ RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
+ RangeCheckBranch);
CmpInst->replaceAllUsesWith(InvertedTableCmp);
++NumTableCmpReuses;
}
@@ -4629,7 +4823,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
- // Ignore switches with less than three cases. Lookup tables will not make them
+ // Ignore switches with less than three cases. Lookup tables will not make
+ // them
// faster, so we don't analyze them.
if (SI->getNumCases() < 3)
return false;
@@ -4642,11 +4837,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
ConstantInt *MaxCaseVal = CI.getCaseValue();
BasicBlock *CommonDest = nullptr;
- typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
- SmallDenseMap<PHINode*, ResultListTy> ResultLists;
- SmallDenseMap<PHINode*, Constant*> DefaultResults;
- SmallDenseMap<PHINode*, Type*> ResultTypes;
- SmallVector<PHINode*, 4> PHIs;
+ typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
+ SmallDenseMap<PHINode *, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode *, Constant *> DefaultResults;
+ SmallDenseMap<PHINode *, Type *> ResultTypes;
+ SmallVector<PHINode *, 4> PHIs;
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
ConstantInt *CaseVal = CI.getCaseValue();
@@ -4656,7 +4851,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
MaxCaseVal = CaseVal;
// Resulting value at phi nodes for this case value.
- typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
Results, DL))
@@ -4684,14 +4879,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If the table has holes, we need a constant result for the default case
// or a bitmask that fits in a register.
- SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
&CommonDest, DefaultResultsList, DL);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
- if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
+ if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
return false;
if (!DL.fitsInLegalInteger(TableSize))
return false;
@@ -4708,15 +4903,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
- BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
- "switch.lookup",
- CommonDest->getParent(),
- CommonDest);
+ BasicBlock *LookupBB = BasicBlock::Create(
+ Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
// Compute the table index value.
Builder.SetInsertPoint(SI);
- Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
- "switch.tableidx");
+ Value *TableIndex =
+ Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
// Compute the maximum table size representable by the integer type we are
// switching upon.
@@ -4739,9 +4932,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Note: We call removeProdecessor later since we need to be able to get the
// PHI value for the default case in case we're using a bit mask.
} else {
- Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
- MinCaseVal->getType(), TableSize));
- RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ Value *Cmp = Builder.CreateICmpULT(
+ TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
+ RangeCheckBranch =
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
}
// Populate the BB that does the lookups.
@@ -4753,10 +4947,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// and we create a new LookupBB.
BasicBlock *MaskBB = LookupBB;
MaskBB->setName("switch.hole_check");
- LookupBB = BasicBlock::Create(Mod.getContext(),
- "switch.lookup",
- CommonDest->getParent(),
- CommonDest);
+ LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
+ CommonDest->getParent(), CommonDest);
// Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
// unnecessary illegal types.
@@ -4766,8 +4958,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Build bitmask; fill in a 1 bit for every case.
const ResultListTy &ResultList = ResultLists[PHIs[0]];
for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
- uint64_t Idx = (ResultList[I].first->getValue() -
- MinCaseVal->getValue()).getLimitedValue();
+ uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
+ .getLimitedValue();
MaskInt |= One << Idx;
}
ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
@@ -4776,13 +4968,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If this bit is 0 (meaning hole) jump to the default destination,
// else continue with table lookup.
IntegerType *MapTy = TableMask->getType();
- Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
- "switch.maskindex");
- Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex,
- "switch.shifted");
- Value *LoBit = Builder.CreateTrunc(Shifted,
- Type::getInt1Ty(Mod.getContext()),
- "switch.lobit");
+ Value *MaskIndex =
+ Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
+ Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
+ Value *LoBit = Builder.CreateTrunc(
+ Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
Builder.SetInsertPoint(LookupBB);
@@ -4905,7 +5095,8 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
Dest->removePredecessor(BB);
IBI->removeDestination(i);
- --i; --e;
+ --i;
+ --e;
Changed = true;
}
}
@@ -4968,27 +5159,28 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
if (!LPad2 || !LPad2->isIdenticalTo(LPad))
continue;
- for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
+ }
BranchInst *BI2 = dyn_cast<BranchInst>(I);
if (!BI2 || !BI2->isIdenticalTo(BI))
continue;
- // We've found an identical block. Update our predeccessors to take that
+ // We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
SmallSet<BasicBlock *, 16> Preds;
Preds.insert(pred_begin(BB), pred_end(BB));
for (BasicBlock *Pred : Preds) {
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
- assert(II->getNormalDest() != BB &&
- II->getUnwindDest() == BB && "unexpected successor");
+ assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
+ "unexpected successor");
II->setUnwindDest(OtherPred);
}
// The debug info in OtherPred doesn't cover the merged control flow that
// used to go through BB. We need to delete it or update it.
- for (auto I = OtherPred->begin(), E = OtherPred->end();
- I != E;) {
- Instruction &Inst = *I; I++;
+ for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
+ Instruction &Inst = *I;
+ I++;
if (isa<DbgInfoIntrinsic>(Inst))
Inst.eraseFromParent();
}
@@ -5007,15 +5199,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
return false;
}
-bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
+ IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
if (SinkCommon && SinkThenElseCodeToEnd(BI))
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
+ // if LoopHeader is provided, check if the block is a loop header
+ // (This is for early invocations before loop simplify and vectorization
+ // to keep canonical loop forms for nested loops.
+ // These blocks can be eliminated when the pass is invoked later
+ // in the back-end.)
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ (!LoopHeaders || !LoopHeaders->count(BB)) &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
@@ -5034,9 +5233,9 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// See if we can merge an empty landing pad block with another which is
// equivalent.
if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
- for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
- if (I->isTerminator() &&
- TryToMergeLandingPad(LPad, BI, BB))
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
+ }
+ if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
return true;
}
@@ -5081,7 +5280,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- } else if (&*I == cast<Instruction>(BI->getCondition())){
+ } else if (&*I == cast<Instruction>(BI->getCondition())) {
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
@@ -5095,6 +5294,30 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SimplifyBranchOnICmpChain(BI, Builder, DL))
return true;
+ // If this basic block has a single dominating predecessor block and the
+ // dominating block's condition implies BI's condition, we know the direction
+ // of the BI branch.
+ if (BasicBlock *Dom = BB->getSinglePredecessor()) {
+ auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
+ if (PBI && PBI->isConditional() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
+ (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) {
+ bool CondIsFalse = PBI->getSuccessor(1) == BB;
+ Optional<bool> Implication = isImpliedCondition(
+ PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
+ if (Implication) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ ConstantInt *CI = *Implication
+ ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ BI->setCondition(CI);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+ }
+ }
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
@@ -5149,7 +5372,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PBI != BI && PBI->isConditional())
if (mergeConditionalStores(PBI, BI))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
-
+
return false;
}
@@ -5162,7 +5385,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
if (I->use_empty())
return false;
- if (C->isNullValue()) {
+ if (C->isNullValue() || isa<UndefValue>(C)) {
// Only look at the first use, avoid hurting compile time with long uselists
User *Use = *I->user_begin();
@@ -5189,7 +5412,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Store to null is undefined.
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
if (!SI->isVolatile())
- return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ return SI->getPointerAddressSpace() == 0 &&
+ SI->getPointerOperand() == I;
+
+ // A call to null is undefined.
+ if (auto CS = CallSite(Use))
+ return CS.getCalledValue() == I;
}
return false;
}
@@ -5210,8 +5438,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
if (BI->isUnconditional())
Builder.CreateUnreachable();
else
- Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
- BI->getSuccessor(0));
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
+ : BI->getSuccessor(0));
BI->eraseFromParent();
return true;
}
@@ -5229,8 +5457,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// Remove basic blocks that have no predecessors (except the entry block)...
// or that just have themself as a predecessor. These are unreachable.
- if ((pred_empty(BB) &&
- BB != &BB->getParent()->getEntryBlock()) ||
+ if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
BB->getSinglePredecessor() == BB) {
DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
@@ -5265,25 +5492,33 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
if (BI->isUnconditional()) {
- if (SimplifyUncondBranch(BI, Builder)) return true;
+ if (SimplifyUncondBranch(BI, Builder))
+ return true;
} else {
- if (SimplifyCondBranch(BI, Builder)) return true;
+ if (SimplifyCondBranch(BI, Builder))
+ return true;
}
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (SimplifyReturn(RI, Builder)) return true;
+ if (SimplifyReturn(RI, Builder))
+ return true;
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
- if (SimplifyResume(RI, Builder)) return true;
+ if (SimplifyResume(RI, Builder))
+ return true;
} else if (CleanupReturnInst *RI =
- dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
- if (SimplifyCleanupReturn(RI)) return true;
+ dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (SimplifyCleanupReturn(RI))
+ return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (SimplifySwitch(SI, Builder)) return true;
+ if (SimplifySwitch(SI, Builder))
+ return true;
} else if (UnreachableInst *UI =
- dyn_cast<UnreachableInst>(BB->getTerminator())) {
- if (SimplifyUnreachable(UI)) return true;
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI))
+ return true;
} else if (IndirectBrInst *IBI =
- dyn_cast<IndirectBrInst>(BB->getTerminator())) {
- if (SimplifyIndirectBr(IBI)) return true;
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI))
+ return true;
}
return Changed;
@@ -5295,7 +5530,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, AssumptionCache *AC) {
+ unsigned BonusInstThreshold, AssumptionCache *AC,
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
- BonusInstThreshold, AC).run(BB);
+ BonusInstThreshold, AC, LoopHeaders)
+ .run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index ddd8775a8431d..6b1d3dc413305 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -25,7 +25,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -71,14 +70,12 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+ bool eliminateOverflowIntrinsic(CallInst *CI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
-
- Instruction *splitOverflowIntrinsic(Instruction *IVUser,
- const DominatorTree *DT);
};
}
@@ -183,9 +180,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
DeadInsts.emplace_back(ICmp);
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (isa<PHINode>(IVOperand) &&
- SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop,
- InvariantPredicate, InvariantLHS,
- InvariantRHS)) {
+ SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
+ InvariantLHS, InvariantRHS)) {
// Rewrite the comparison to a loop invariant comparison if it can be done
// cheaply, where cheaply means "we don't need to emit any new
@@ -201,9 +197,48 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
NewRHS =
ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
- for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) {
- if (NewLHS && NewRHS)
- break;
+ auto *PN = cast<PHINode>(IVOperand);
+ for (unsigned i = 0, e = PN->getNumIncomingValues();
+ i != e && (!NewLHS || !NewRHS);
+ ++i) {
+
+ // If this is a value incoming from the backedge, then it cannot be a loop
+ // invariant value (since we know that IVOperand is an induction variable).
+ if (L->contains(PN->getIncomingBlock(i)))
+ continue;
+
+ // NB! This following assert does not fundamentally have to be true, but
+ // it is true today given how SCEV analyzes induction variables.
+ // Specifically, today SCEV will *not* recognize %iv as an induction
+ // variable in the following case:
+ //
+ // define void @f(i32 %k) {
+ // entry:
+ // br i1 undef, label %r, label %l
+ //
+ // l:
+ // %k.inc.l = add i32 %k, 1
+ // br label %loop
+ //
+ // r:
+ // %k.inc.r = add i32 %k, 1
+ // br label %loop
+ //
+ // loop:
+ // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ]
+ // %iv.inc = add i32 %iv, 1
+ // br label %loop
+ // }
+ //
+ // but if it starts to, at some point, then the assertion below will have
+ // to be changed to a runtime check.
+
+ Value *Incoming = PN->getIncomingValue(i);
+
+#ifndef NDEBUG
+ if (auto *I = dyn_cast<Instruction>(Incoming))
+ assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!");
+#endif
const SCEV *IncomingS = SE->getSCEV(Incoming);
@@ -280,6 +315,108 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DeadInsts.emplace_back(Rem);
}
+bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
+ auto *F = CI->getCalledFunction();
+ if (!F)
+ return false;
+
+ typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
+ const SCEV *, const SCEV *, SCEV::NoWrapFlags);
+ typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
+ const SCEV *, Type *);
+
+ OperationFunctionTy Operation;
+ ExtensionFunctionTy Extension;
+
+ Instruction::BinaryOps RawOp;
+
+ // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
+ // have nuw.
+ bool NoSignedOverflow;
+
+ switch (F->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::sadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::uadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = false;
+ break;
+
+ case Intrinsic::ssub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::usub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = false;
+ break;
+ }
+
+ const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
+ const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+
+ auto *NarrowTy = cast<IntegerType>(LHS->getType());
+ auto *WideTy =
+ IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
+
+ const SCEV *A =
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
+ const SCEV *B =
+ (SE->*Operation)((SE->*Extension)(LHS, WideTy),
+ (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);
+
+ if (A != B)
+ return false;
+
+ // Proved no overflow, nuke the overflow check and, if possible, the overflow
+ // intrinsic as well.
+
+ BinaryOperator *NewResult = BinaryOperator::Create(
+ RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+
+ if (NoSignedOverflow)
+ NewResult->setHasNoSignedWrap(true);
+ else
+ NewResult->setHasNoUnsignedWrap(true);
+
+ SmallVector<ExtractValueInst *, 4> ToDelete;
+
+ for (auto *U : CI->users()) {
+ if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+ if (EVI->getIndices()[0] == 1)
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+ else {
+ assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
+ EVI->replaceAllUsesWith(NewResult);
+ }
+ ToDelete.push_back(EVI);
+ }
+ }
+
+ for (auto *EVI : ToDelete)
+ EVI->eraseFromParent();
+
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return true;
+}
+
/// Eliminate an operation that consumes a simple IV and has no observable
/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
/// but UseInst may not be.
@@ -297,6 +434,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
}
}
+ if (auto *CI = dyn_cast<CallInst>(UseInst))
+ if (eliminateOverflowIntrinsic(CI))
+ return true;
+
if (eliminateIdentitySCEV(UseInst, IVOperand))
return true;
@@ -408,69 +549,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
return Changed;
}
-/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
-/// analysis and optimization.
-///
-/// \return A new value representing the non-overflowing add if possible,
-/// otherwise return the original value.
-Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
- const DominatorTree *DT) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
- if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
- return IVUser;
-
- // Find a branch guarded by the overflow check.
- BranchInst *Branch = nullptr;
- Instruction *AddVal = nullptr;
- for (User *U : II->users()) {
- if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
- if (ExtractInst->getNumIndices() != 1)
- continue;
- if (ExtractInst->getIndices()[0] == 0)
- AddVal = ExtractInst;
- else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
- Branch = dyn_cast<BranchInst>(ExtractInst->user_back());
- }
- }
- if (!AddVal || !Branch)
- return IVUser;
-
- BasicBlock *ContinueBB = Branch->getSuccessor(1);
- if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
- return IVUser;
-
- // Check if all users of the add are provably NSW.
- bool AllNSW = true;
- for (Use &U : AddVal->uses()) {
- if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) {
- BasicBlock *UseBB = UseInst->getParent();
- if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
- UseBB = PHI->getIncomingBlock(U);
- if (!DT->dominates(ContinueBB, UseBB)) {
- AllNSW = false;
- break;
- }
- }
- }
- if (!AllNSW)
- return IVUser;
-
- // Go for it...
- IRBuilder<> Builder(IVUser);
- Instruction *AddInst = dyn_cast<Instruction>(
- Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
-
- // The caller expects the new add to have the same form as the intrinsic. The
- // IV operand position must be the same.
- assert((AddInst->getOpcode() == Instruction::Add &&
- AddInst->getOperand(0) == II->getOperand(0)) &&
- "Bad add instruction created from overflow intrinsic.");
-
- AddVal->replaceAllUsesWith(AddInst);
- DeadInsts.emplace_back(AddVal);
- return AddInst;
-}
-
/// Add all uses of Def to the current IV's worklist.
static void pushIVUsers(
Instruction *Def,
@@ -545,12 +623,6 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
// Bypass back edges to avoid extra work.
if (UseInst == CurrIV) continue;
- if (V && V->shouldSplitOverflowInstrinsics()) {
- UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
- if (!UseInst)
- continue;
- }
-
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
assert(N <= Simplified.size() && "runaway iteration");
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index d5377f9a4c1ff..df299067094f6 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -14,7 +14,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SimplifyInstructions.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -27,12 +27,60 @@
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "instsimplify"
STATISTIC(NumSimplified, "Number of redundant instructions removed");
+static bool runImpl(Function &F, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ AssumptionCache *AC) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
+ // Here be subtlety: the iterator must be incremented before the loop
+ // body (not sure why), so a range-for loop won't work here.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = &*BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ Changed |= res;
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+}
+
namespace {
struct InstSimplifier : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
@@ -48,56 +96,17 @@ namespace {
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
const DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
- bool Changed = false;
-
- do {
- for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
- // Here be subtlety: the iterator must be incremented before the loop
- // body (not sure why), so a range-for loop won't work here.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = &*BI++;
- // The first time through the loop ToSimplify is empty and we try to
- // simplify all instructions. On later iterations ToSimplify is not
- // empty and we only bother simplifying instructions that are in it.
- if (!ToSimplify->empty() && !ToSimplify->count(I))
- continue;
- // Don't waste time simplifying unused instructions.
- if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
- // Mark all uses for resimplification next time round the loop.
- for (User *U : I->users())
- Next->insert(cast<Instruction>(U));
- I->replaceAllUsesWith(V);
- ++NumSimplified;
- Changed = true;
- }
- bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- if (res) {
- // RecursivelyDeleteTriviallyDeadInstruction can remove
- // more than one instruction, so simply incrementing the
- // iterator does not work. When instructions get deleted
- // re-iterate instead.
- BI = BB->begin(); BE = BB->end();
- Changed |= res;
- }
- }
-
- // Place the list of instructions to simplify on the next loop iteration
- // into ToSimplify.
- std::swap(ToSimplify, Next);
- Next->clear();
- } while (!ToSimplify->empty());
-
- return Changed;
+ return runImpl(F, DT, TLI, AC);
}
};
}
@@ -115,3 +124,15 @@ char &llvm::InstructionSimplifierID = InstSimplifier::ID;
FunctionPass *llvm::createInstructionSimplifierPass() {
return new InstSimplifier();
}
+
+PreservedAnalyses InstSimplifierPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ bool Changed = runImpl(F, DT, &TLI, &AC);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ // FIXME: This should also 'preserve the CFG'.
+ return PreservedAnalyses::none();
+}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 2f3c31128cf03..c2986951e48fd 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -29,7 +29,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -104,101 +103,11 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
}
}
-/// \brief Check whether we can use unsafe floating point math for
-/// the function passed as input.
-static bool canUseUnsafeFPMath(Function *F) {
-
- // FIXME: For finer-grain optimization, we need intrinsics to have the same
- // fast-math flag decorations that are applied to FP instructions. For now,
- // we have to rely on the function-level unsafe-fp-math attribute to do this
- // optimization because there's no other way to express that the call can be
- // relaxed.
- if (F->hasFnAttribute("unsafe-fp-math")) {
- Attribute Attr = F->getFnAttribute("unsafe-fp-math");
- if (Attr.getValueAsString() == "true")
- return true;
- }
- return false;
-}
-
-/// \brief Returns whether \p F matches the signature expected for the
-/// string/memory copying library function \p Func.
-/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
-/// Their fortified (_chk) counterparts are also accepted.
-static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) {
- const DataLayout &DL = F->getParent()->getDataLayout();
- FunctionType *FT = F->getFunctionType();
- LLVMContext &Context = F->getContext();
- Type *PCharTy = Type::getInt8PtrTy(Context);
- Type *SizeTTy = DL.getIntPtrType(Context);
- unsigned NumParams = FT->getNumParams();
-
- // All string libfuncs return the same type as the first parameter.
- if (FT->getReturnType() != FT->getParamType(0))
- return false;
-
- switch (Func) {
- default:
- llvm_unreachable("Can't check signature for non-string-copy libfunc.");
- case LibFunc::stpncpy_chk:
- case LibFunc::strncpy_chk:
- --NumParams; // fallthrough
- case LibFunc::stpncpy:
- case LibFunc::strncpy: {
- if (NumParams != 3 || FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != PCharTy || !FT->getParamType(2)->isIntegerTy())
- return false;
- break;
- }
- case LibFunc::strcpy_chk:
- case LibFunc::stpcpy_chk:
- --NumParams; // fallthrough
- case LibFunc::stpcpy:
- case LibFunc::strcpy: {
- if (NumParams != 2 || FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != PCharTy)
- return false;
- break;
- }
- case LibFunc::memmove_chk:
- case LibFunc::memcpy_chk:
- --NumParams; // fallthrough
- case LibFunc::memmove:
- case LibFunc::memcpy: {
- if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != SizeTTy)
- return false;
- break;
- }
- case LibFunc::memset_chk:
- --NumParams; // fallthrough
- case LibFunc::memset: {
- if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != SizeTTy)
- return false;
- break;
- }
- }
- // If this is a fortified libcall, the last parameter is a size_t.
- if (NumParams == FT->getNumParams() - 1)
- return FT->getParamType(FT->getNumParams() - 1) == SizeTTy;
- return true;
-}
-
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Verify the "strcat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType())
- return nullptr;
-
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -220,7 +129,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
+ Value *DstLen = emitStrLen(Dst, B, DL, TLI);
if (!DstLen)
return nullptr;
@@ -238,15 +147,6 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
}
Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Verify the "strncat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType() ||
- !FT->getParamType(2)->isIntegerTy())
- return nullptr;
-
// Extract some information from the instruction.
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -281,13 +181,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Verify the "strchr" function prototype.
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return nullptr;
-
Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
@@ -298,7 +192,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
- return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
B, DL, TLI);
}
@@ -308,7 +202,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI),
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
"strchr");
return nullptr;
}
@@ -326,14 +220,6 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Verify the "strrchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return nullptr;
-
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
@@ -345,7 +231,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TLI);
+ return emitStrChr(SrcStr, '\0', B, TLI);
return nullptr;
}
@@ -361,14 +247,6 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Verify the "strcmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return nullptr;
-
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -392,7 +270,7 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
- return EmitMemCmp(Str1P, Str2P,
+ return emitMemCmp(Str1P, Str2P,
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
std::min(Len1, Len2)),
B, DL, TLI);
@@ -402,15 +280,6 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Verify the "strncmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
- return nullptr;
-
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -426,7 +295,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
return ConstantInt::get(CI->getType(), 0);
if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
+ return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -450,11 +319,6 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy))
- return nullptr;
-
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
@@ -473,12 +337,9 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
- return nullptr;
-
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
@@ -500,9 +361,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
- return nullptr;
-
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
Value *LenOp = CI->getArgOperand(2);
@@ -540,18 +398,63 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
if (uint64_t Len = GetStringLength(Src))
return ConstantInt::get(CI->getType(), Len - 1);
+ // If s is a constant pointer pointing to a string literal, we can fold
+ // strlen(s + x) to strlen(s) - x, when x is known to be in the range
+ // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
+ // We only try to simplify strlen when the pointer s points to an array
+ // of i8. Otherwise, we would need to scale the offset x before doing the
+ // subtraction. This will make the optimization more complex, and it's not
+ // very useful because calling strlen for a pointer of other types is
+ // very uncommon.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
+ if (!isGEPBasedOnPointerToString(GEP))
+ return nullptr;
+
+ StringRef Str;
+ if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) {
+ size_t NullTermIdx = Str.find('\0');
+
+ // If the string does not have '\0', leave it to strlen to compute
+ // its length.
+ if (NullTermIdx == StringRef::npos)
+ return nullptr;
+
+ Value *Offset = GEP->getOperand(2);
+ unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI,
+ nullptr);
+ KnownZero.flipAllBits();
+ size_t ArrSize =
+ cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
+
+ // KnownZero's bits are flipped, so zeros in KnownZero now represent
+ // bits known to be zeros in Offset, and ones in KnowZero represent
+ // bits unknown in Offset. Therefore, Offset is known to be in range
+ // [0, NullTermIdx] when the flipped KnownZero is non-negative and
+ // unsigned-less-than NullTermIdx.
+ //
+ // If Offset is not provably in the range [0, NullTermIdx], we can still
+ // optimize if we can prove that the program has undefined behavior when
+ // Offset is outside that range. That is the case when GEP->getOperand(0)
+ // is a pointer to an object whose memory extent is NullTermIdx+1.
+ if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) ||
+ (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
+ NullTermIdx == ArrSize - 1))
+ return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
+ Offset);
+ }
+
+ return nullptr;
+ }
+
// strlen(x?"foo":"bars") --> x ? 3 : 4
if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
uint64_t LenTrue = GetStringLength(SI->getTrueValue());
@@ -576,13 +479,6 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- FT->getReturnType() != FT->getParamType(0))
- return nullptr;
-
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -604,19 +500,12 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
// strpbrk(s, "a") -> strchr(s, 'a')
if (HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
+ return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
return nullptr;
}
Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy())
- return nullptr;
-
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
@@ -628,13 +517,6 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -656,13 +538,6 @@ Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
@@ -681,29 +556,22 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
// strcspn(s, "") -> strlen(s)
if (HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
+ return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
return nullptr;
}
Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isPointerTy())
- return nullptr;
-
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
+ Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
return nullptr;
- Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
StrLen, B, DL, TLI);
if (!StrNCmp)
return nullptr;
@@ -734,28 +602,20 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Value *Result = castToCStr(CI->getArgOperand(0), B);
Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
+ Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
return nullptr;
}
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy(32) ||
- !FT->getParamType(2)->isIntegerTy() ||
- !FT->getReturnType()->isPointerTy())
- return nullptr;
-
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
@@ -834,13 +694,6 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy(32))
- return nullptr;
-
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
if (LHS == RHS) // memcmp(s,s,x) -> 0
@@ -857,9 +710,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
// memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
if (Len == 1) {
- Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"),
CI->getType(), "lhsv");
- Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"),
CI->getType(), "rhsv");
return B.CreateSub(LHSV, RHSV, "chardiff");
}
@@ -909,11 +762,6 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy))
- return nullptr;
-
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
@@ -921,23 +769,81 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove))
- return nullptr;
-
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
-Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
+// TODO: Does this belong in BuildLibCalls or should all of those similar
+// functions be moved here?
+static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs,
+ IRBuilder<> &B, const TargetLibraryInfo &TLI) {
+ LibFunc::Func Func;
+ if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
+ Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
+ PtrType, PtrType, nullptr);
+ CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc");
+
+ if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset))
+/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
+static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
+ const TargetLibraryInfo &TLI) {
+ // This has to be a memset of zeros (bzero).
+ auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
+ if (!FillValue || FillValue->getZExtValue() != 0)
return nullptr;
+ // TODO: We should handle the case where the malloc has more than one use.
+ // This is necessary to optimize common patterns such as when the result of
+ // the malloc is checked against null or when a memset intrinsic is used in
+ // place of a memset library call.
+ auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
+ if (!Malloc || !Malloc->hasOneUse())
+ return nullptr;
+
+ // Is the inner call really malloc()?
+ Function *InnerCallee = Malloc->getCalledFunction();
+ LibFunc::Func Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc::malloc)
+ return nullptr;
+
+ // The memset must cover the same number of bytes that are malloc'd.
+ if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
+ return nullptr;
+
+ // Replace the malloc with a calloc. We need the data layout to know what the
+ // actual size of a 'size_t' parameter is.
+ B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
+ const DataLayout &DL = Malloc->getModule()->getDataLayout();
+ IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
+ Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
+ Malloc->getArgOperand(0), Malloc->getAttributes(),
+ B, TLI);
+ if (!Calloc)
+ return nullptr;
+
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+
+ return Calloc;
+}
+
+Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+ if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
+ return Calloc;
+
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
@@ -970,34 +876,12 @@ static Value *valueHasFloatPrecision(Value *Val) {
return nullptr;
}
-/// Any floating-point library function that we're trying to simplify will have
-/// a signature of the form: fptype foo(fptype param1, fptype param2, ...).
-/// CheckDoubleTy indicates that 'fptype' must be 'double'.
-static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams,
- bool CheckDoubleTy) {
- FunctionType *FT = F->getFunctionType();
- if (FT->getNumParams() != NumParams)
- return false;
-
- // The return type must match what we're looking for.
- Type *RetTy = FT->getReturnType();
- if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy())
- return false;
-
- // Each parameter must match the return type, and therefore, match every other
- // parameter too.
- for (const Type *ParamTy : FT->params())
- if (ParamTy != RetTy)
- return false;
-
- return true;
-}
-
/// Shrink double -> float for unary functions like 'floor'.
static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
bool CheckRetType) {
Function *Callee = CI->getCalledFunction();
- if (!matchesFPLibFunctionSignature(Callee, 1, true))
+ // We know this libcall has a valid prototype, but we don't know which.
+ if (!CI->getType()->isDoubleTy())
return nullptr;
if (CheckRetType) {
@@ -1026,7 +910,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
V = B.CreateCall(F, V);
} else {
// The call is a library call rather than an intrinsic.
- V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
}
return B.CreateFPExt(V, B.getDoubleTy());
@@ -1035,7 +919,8 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
/// Shrink double -> float for binary functions like 'fmin/fmax'.
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!matchesFPLibFunctionSignature(Callee, 2, true))
+ // We know this libcall has a valid prototype, but we don't know which.
+ if (!CI->getType()->isDoubleTy())
return nullptr;
// If this is something like 'fmin((double)floatval1, (double)floatval2)',
@@ -1054,7 +939,7 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
// TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
- Value *V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
Callee->getAttributes());
return B.CreateFPExt(V, B.getDoubleTy());
}
@@ -1066,13 +951,6 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
// cos(-x) -> cos(x)
Value *Op1 = CI->getArgOperand(0);
if (BinaryOperator::isFNeg(Op1)) {
@@ -1114,14 +992,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
// pow(1.0, x) -> 1.0
@@ -1131,19 +1001,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (Op1C->isExactlyValue(2.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
LibFunc::exp2l))
- return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
+ return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
Callee->getAttributes());
// pow(10.0, x) -> exp10(x)
if (Op1C->isExactlyValue(10.0) &&
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
LibFunc::exp10l))
- return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
Callee->getAttributes());
}
- // FIXME: Use instruction-level FMF.
- bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
-
// pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
// We enable these only with fast-math. Besides rounding differences, the
@@ -1159,7 +1026,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
- return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+ return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
OpCCallee->getAttributes());
}
}
@@ -1181,7 +1048,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (CI->hasUnsafeAlgebra()) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
}
@@ -1191,9 +1058,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
- Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+ Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
Value *FAbs =
- EmitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes());
+ emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes());
Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
@@ -1207,7 +1074,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
// In -ffast-math, generate repeated fmul instead of generating pow(x, n).
- if (UnsafeFPMath) {
+ if (CI->hasUnsafeAlgebra()) {
APFloat V = abs(Op2C->getValueAPF());
// We limit to a max of 7 fmul(s). Thus max exponent is 32.
// This transformation applies to integer exponents only.
@@ -1224,6 +1091,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// So we first convert V to something which could be converted to double.
bool ignored;
V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+
+ // TODO: Should the new instructions propagate the 'fast' flag of the pow()?
Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
// For negative exponents simply compute the reciprocal.
if (Op2C->isNegative())
@@ -1236,19 +1105,11 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- Function *Caller = CI->getParent()->getParent();
Value *Ret = nullptr;
StringRef Name = Callee->getName();
if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
@@ -1273,11 +1134,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
if (!Op->getType()->isFloatTy())
One = ConstantExpr::getFPExtend(One, Op->getType());
- Module *M = Caller->getParent();
- Value *Callee =
+ Module *M = CI->getModule();
+ Value *NewCallee =
M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
Op->getType(), B.getInt32Ty(), nullptr);
- CallInst *CI = B.CreateCall(Callee, {One, LdExpArg});
+ CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1294,12 +1155,6 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
if (Name == "fabs" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, false);
- FunctionType *FT = Callee->getFunctionType();
- // Make sure this has 1 argument of FP type which matches the result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
Value *Op = CI->getArgOperand(0);
if (Instruction *I = dyn_cast<Instruction>(Op)) {
// Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
@@ -1311,21 +1166,14 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
// function, do that first.
- Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
- // Make sure this has 2 arguments of FP type which match the result type.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return nullptr;
-
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
if (CI->hasUnsafeAlgebra()) {
@@ -1360,13 +1208,6 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
StringRef Name = Callee->getName();
if (UnsafeFPShrink && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- FunctionType *FT = Callee->getFunctionType();
-
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
if (!CI->hasUnsafeAlgebra())
return Ret;
@@ -1392,7 +1233,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
return B.CreateFMul(OpC->getArgOperand(1),
- EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
+ emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
Callee->getAttributes()), "mul");
// log(exp2(y)) -> y*log(2)
@@ -1400,7 +1241,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
TLI->has(Func) && Func == LibFunc::exp2)
return B.CreateFMul(
OpC->getArgOperand(0),
- EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
+ emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
Callee->getName(), B, Callee->getAttributes()),
"logmul");
return Ret;
@@ -1408,21 +1249,11 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
-
Value *Ret = nullptr;
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- // FIXME: Refactor - this check is repeated all over this file and even in the
- // preceding call to shrink double -> float.
-
- // Make sure this has 1 argument of FP type, which matches the result type.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
if (!CI->hasUnsafeAlgebra())
return Ret;
@@ -1489,13 +1320,6 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
StringRef Name = Callee->getName();
if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- FunctionType *FT = Callee->getFunctionType();
-
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
@@ -1519,13 +1343,65 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
-static bool isTrigLibCall(CallInst *CI);
+static bool isTrigLibCall(CallInst *CI) {
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ // We already checked the prototype.
+ return CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+}
+
static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
- Value *&SinCos);
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
-Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospif_stret";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, nullptr);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, nullptr);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+}
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
@@ -1541,9 +1417,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
// Look for all compatible sinpi, cospi and sincospi calls with the same
// argument. If there are enough (in some sense) we can make the
// substitution.
+ Function *F = CI->getFunction();
for (User *U : Arg->users())
- classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
- SinCosCalls);
+ classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
@@ -1559,35 +1435,23 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
-static bool isTrigLibCall(CallInst *CI) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
-
- // We can only hope to do anything useful if we can ignore things like errno
- // and floating-point exceptions.
- bool AttributesSafe =
- CI->hasFnAttr(Attribute::NoUnwind) && CI->hasFnAttr(Attribute::ReadNone);
-
- // Other than that we need float(float) or double(double)
- return AttributesSafe && FT->getNumParams() == 1 &&
- FT->getReturnType() == FT->getParamType(0) &&
- (FT->getParamType(0)->isFloatTy() ||
- FT->getParamType(0)->isDoubleTy());
-}
-
-void
-LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
- SmallVectorImpl<CallInst *> &SinCalls,
- SmallVectorImpl<CallInst *> &CosCalls,
- SmallVectorImpl<CallInst *> &SinCosCalls) {
+void LibCallSimplifier::classifyArgUse(
+ Value *Val, Function *F, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
CallInst *CI = dyn_cast<CallInst>(Val);
if (!CI)
return;
+ // Don't consider calls in other functions.
+ if (CI->getFunction() != F)
+ return;
+
Function *Callee = CI->getCalledFunction();
LibFunc::Func Func;
- if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) ||
+ if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
!isTrigLibCall(CI))
return;
@@ -1614,69 +1478,12 @@ void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
replaceAllUsesWith(C, Res);
}
-void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
- bool UseFloat, Value *&Sin, Value *&Cos, Value *&SinCos) {
- Type *ArgTy = Arg->getType();
- Type *ResTy;
- StringRef Name;
-
- Triple T(OrigCallee->getParent()->getTargetTriple());
- if (UseFloat) {
- Name = "__sincospif_stret";
-
- assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
- // x86_64 can't use {float, float} since that would be returned in both
- // xmm0 and xmm1, which isn't what a real struct would do.
- ResTy = T.getArch() == Triple::x86_64
- ? static_cast<Type *>(VectorType::get(ArgTy, 2))
- : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr));
- } else {
- Name = "__sincospi_stret";
- ResTy = StructType::get(ArgTy, ArgTy, nullptr);
- }
-
- Module *M = OrigCallee->getParent();
- Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy, nullptr);
-
- if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
- // If the argument is an instruction, it must dominate all uses so put our
- // sincos call there.
- B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
- } else {
- // Otherwise (e.g. for a constant) the beginning of the function is as
- // good a place as any.
- BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
- B.SetInsertPoint(&EntryBB, EntryBB.begin());
- }
-
- SinCos = B.CreateCall(Callee, Arg, "sincospi");
-
- if (SinCos->getType()->isStructTy()) {
- Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
- Cos = B.CreateExtractValue(SinCos, 1, "cospi");
- } else {
- Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
- "sinpi");
- Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
- "cospi");
- }
-}
-
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
-static bool checkIntUnaryReturnAndParam(Function *Callee) {
- FunctionType *FT = Callee->getFunctionType();
- return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) &&
- FT->getParamType(0)->isIntegerTy();
-}
-
Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkIntUnaryReturnAndParam(Callee))
- return nullptr;
Value *Op = CI->getArgOperand(0);
// Constant fold.
@@ -1700,13 +1507,6 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(integer) where the types agree.
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- FT->getParamType(0) != FT->getReturnType())
- return nullptr;
-
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
Value *Pos =
@@ -1716,9 +1516,6 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
- if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
- return nullptr;
-
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
@@ -1727,9 +1524,6 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
- if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
- return nullptr;
-
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
@@ -1737,9 +1531,6 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
- if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
- return nullptr;
-
// toascii(c) -> c & 0x7f
return B.CreateAnd(CI->getArgOperand(0),
ConstantInt::get(CI->getType(), 0x7F));
@@ -1753,6 +1544,7 @@ static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
int StreamArg) {
+ Function *Callee = CI->getCalledFunction();
// Error reporting calls should be cold, mark them as such.
// This applies even to non-builtin calls: it is only a hint and applies to
// functions that the frontend might not understand as builtins.
@@ -1761,8 +1553,6 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
// Improving Static Branch Prediction in a Compiler
// Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
// Proceedings of PACT'98, Oct. 1998, IEEE
- Function *Callee = CI->getCalledFunction();
-
if (!CI->hasFnAttr(Attribute::Cold) &&
isReportingError(Callee, CI, StreamArg)) {
CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
@@ -1808,12 +1598,18 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
if (!CI->use_empty())
return nullptr;
- // printf("x") -> putchar('x'), even for '%'.
- if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI);
- if (CI->use_empty() || !Res)
- return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
+ // printf("x") -> putchar('x'), even for "%" and "%%".
+ if (FormatStr.size() == 1 || FormatStr == "%%")
+ return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
+
+ // printf("%s", "a") --> putchar('a')
+ if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
+ StringRef ChrStr;
+ if (!getConstantStringInfo(CI->getOperand(1), ChrStr))
+ return nullptr;
+ if (ChrStr.size() != 1)
+ return nullptr;
+ return emitPutChar(B.getInt32(ChrStr[0]), B, TLI);
}
// printf("foo\n") --> puts("foo")
@@ -1823,40 +1619,26 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, TLI);
- return (CI->use_empty() || !NewCI)
- ? NewCI
- : ConstantInt::get(CI->getType(), FormatStr.size() + 1);
+ return emitPutS(GV, B, TLI);
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI);
-
- if (CI->use_empty() || !Res)
- return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ CI->getArgOperand(1)->getType()->isIntegerTy())
+ return emitPutChar(CI->getArgOperand(1), B, TLI);
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, TLI);
- }
+ CI->getArgOperand(1)->getType()->isPointerTy())
+ return emitPutS(CI->getArgOperand(1), B, TLI);
return nullptr;
}
Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Require one fixed pointer argument and an integer/void result.
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy()))
- return nullptr;
-
if (Value *V = optimizePrintFString(CI, B)) {
return V;
}
@@ -1909,7 +1691,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ Value *Ptr = castToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -1922,7 +1704,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
+ Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
return nullptr;
Value *IncLen =
@@ -1937,13 +1719,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Require two fixed pointer arguments and an integer result.
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
if (Value *V = optimizeSPrintFString(CI, B)) {
return V;
}
@@ -1982,7 +1758,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return nullptr; // We found a format specifier.
- return EmitFWrite(
+ return emitFWrite(
CI->getArgOperand(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
CI->getArgOperand(0), B, DL, TLI);
@@ -1999,27 +1775,21 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
return nullptr;
}
Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // Require two fixed paramters as pointers and integer result.
FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
if (Value *V = optimizeFPrintFString(CI, B)) {
return V;
}
@@ -2041,16 +1811,6 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
optimizeErrorReporting(CI, B, 3);
- Function *Callee = CI->getCalledFunction();
- // Require a pointer, an integer, an integer, a pointer, returning integer.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- !FT->getParamType(2)->isIntegerTy() ||
- !FT->getParamType(3)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
-
// Get the element size and count.
ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
@@ -2065,8 +1825,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// If this is writing one byte, turn it into fputc.
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI);
+ Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
@@ -2076,12 +1836,13 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
optimizeErrorReporting(CI, B, 1);
- Function *Callee = CI->getCalledFunction();
+ // Don't rewrite fputs to fwrite when optimising for size because fwrite
+ // requires more arguments and thus extra MOVs are required.
+ if (CI->getParent()->getParent()->optForSize())
+ return nullptr;
- // Require two pointers. Also, we can't optimize if return value is used.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() || !CI->use_empty())
+ // We can't optimize if return value is used.
+ if (!CI->use_empty())
return nullptr;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
@@ -2090,20 +1851,13 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
// Known to have no uses (see above).
- return EmitFWrite(
+ return emitFWrite(
CI->getArgOperand(0),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
CI->getArgOperand(1), B, DL, TLI);
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- // Require one fixed pointer argument and an integer/void result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy()))
- return nullptr;
-
// Check for a constant string.
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
@@ -2111,7 +1865,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI);
+ Value *Res = emitPutChar(B.getInt32('\n'), B, TLI);
if (CI->use_empty() || !Res)
return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -2133,10 +1887,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
IRBuilder<> &Builder) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
-
// Check for string/memory library functions.
- if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
// Make sure we never change the calling convention.
assert((ignoreCallingConv(Func) ||
CI->getCallingConv() == llvm::CallingConv::C) &&
@@ -2208,10 +1960,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
- // Command-line parameter overrides function attribute.
+ // Command-line parameter overrides instruction attribute.
if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (canUseUnsafeFPMath(Callee))
+ else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra())
UnsafeFPShrink = true;
// First, check for intrinsics.
@@ -2229,6 +1981,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
+ // TODO: Use foldMallocMemset() with memset intrinsic.
default:
return nullptr;
}
@@ -2253,7 +2006,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
}
// Then check for known library functions.
- if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
// We never change the calling convention.
if (!ignoreCallingConv(Func) && !isCallingConvC)
return nullptr;
@@ -2457,11 +2210,6 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
- return nullptr;
-
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
@@ -2472,11 +2220,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
- return nullptr;
-
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
@@ -2487,10 +2230,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
-
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
- return nullptr;
+ // TODO: Try foldMallocMemset() here.
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
@@ -2506,16 +2246,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
const DataLayout &DL = CI->getModule()->getDataLayout();
-
- if (!checkStringCopyLibFuncSignature(Callee, Func))
- return nullptr;
-
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
*ObjSize = CI->getArgOperand(2);
// __stpcpy_chk(x,x,...) -> x+strlen(x)
if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
- Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
@@ -2525,7 +2261,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFortifiedCallFoldable(CI, 2, 1, true))
- return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+ return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
if (OnlyLowerUnknownSize)
return nullptr;
@@ -2537,7 +2273,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Type *SizeTTy = DL.getIntPtrType(CI->getContext());
Value *LenV = ConstantInt::get(SizeTTy, Len);
- Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+ Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
// a __memcpy_chk, we still need to return the correct end pointer.
if (Ret && Func == LibFunc::stpcpy_chk)
@@ -2550,11 +2286,8 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
LibFunc::Func Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
-
- if (!checkStringCopyLibFuncSignature(Callee, Func))
- return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
return Ret;
}
@@ -2577,15 +2310,15 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
- // First, check that this is a known library functions.
- if (!TLI->getLibFunc(FuncName, Func))
+ // First, check that this is a known library functions and that the prototype
+ // is correct.
+ if (!TLI->getLibFunc(*Callee, Func))
return nullptr;
// We never change the calling convention.
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index ad6b782caf8b5..e9a368f4faa4e 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -13,19 +13,184 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "split-module"
+
#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include <queue>
using namespace llvm;
+namespace {
+typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
+typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
+typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
+}
+
+static void addNonConstUser(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const User *U) {
+ assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
+
+ if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ const GlobalValue *F = I->getParent()->getParent();
+ GVtoClusterMap.unionSets(GV, F);
+ } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
+ isa<GlobalVariable>(U)) {
+ GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
+ } else {
+ llvm_unreachable("Underimplemented use case");
+ }
+}
+
+// Adds all GlobalValue users of V to the same cluster as GV.
+static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const Value *V) {
+ for (auto *U : V->users()) {
+ SmallVector<const User *, 4> Worklist;
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ const User *UU = Worklist.pop_back_val();
+ // For each constant that is not a GV (a pure const) recurse.
+ if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
+ Worklist.append(UU->user_begin(), UU->user_end());
+ continue;
+ }
+ addNonConstUser(GVtoClusterMap, GV, UU);
+ }
+ }
+}
+
+// Find partitions for module in the way that no locals need to be
+// globalized.
+// Try to balance pack those partitions into N files since this roughly equals
+// thread balancing for the backend codegen step.
+static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
+ unsigned N) {
+ // At this point module should have the proper mix of globals and locals.
+ // As we attempt to partition this module, we must not change any
+ // locals to globals.
+ DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
+ ClusterMapType GVtoClusterMap;
+ ComdatMembersType ComdatMembers;
+
+ auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
+ if (GV.isDeclaration())
+ return;
+
+ if (!GV.hasName())
+ GV.setName("__llvmsplit_unnamed");
+
+ // Comdat groups must not be partitioned. For comdat groups that contain
+ // locals, record all their members here so we can keep them together.
+ // Comdat groups that only contain external globals are already handled by
+ // the MD5-based partitioning.
+ if (const Comdat *C = GV.getComdat()) {
+ auto &Member = ComdatMembers[C];
+ if (Member)
+ GVtoClusterMap.unionSets(Member, &GV);
+ else
+ Member = &GV;
+ }
+
+ // For aliases we should not separate them from their aliasees regardless
+ // of linkage.
+ if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
+ if (const GlobalObject *Base = GIS->getBaseObject())
+ GVtoClusterMap.unionSets(&GV, Base);
+ }
+
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ for (const BasicBlock &BB : *F) {
+ BlockAddress *BA = BlockAddress::lookup(&BB);
+ if (!BA || !BA->isConstantUsed())
+ continue;
+ addAllGlobalValueUsers(GVtoClusterMap, F, BA);
+ }
+ }
+
+ if (GV.hasLocalLinkage())
+ addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
+ };
+
+ std::for_each(M->begin(), M->end(), recordGVSet);
+ std::for_each(M->global_begin(), M->global_end(), recordGVSet);
+ std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
+
+ // Assigned all GVs to merged clusters while balancing number of objects in
+ // each.
+ auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
+ const std::pair<unsigned, unsigned> &b) {
+ if (a.second || b.second)
+ return a.second > b.second;
+ else
+ return a.first > b.first;
+ };
+
+ std::priority_queue<std::pair<unsigned, unsigned>,
+ std::vector<std::pair<unsigned, unsigned>>,
+ decltype(CompareClusters)>
+ BalancinQueue(CompareClusters);
+ // Pre-populate priority queue with N slot blanks.
+ for (unsigned i = 0; i < N; ++i)
+ BalancinQueue.push(std::make_pair(i, 0));
+
+ typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
+ SmallVector<SortType, 64> Sets;
+ SmallPtrSet<const GlobalValue *, 32> Visited;
+
+ // To guarantee determinism, we have to sort SCC according to size.
+ // When size is the same, use leader's name.
+ for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
+ E = GVtoClusterMap.end(); I != E; ++I)
+ if (I->isLeader())
+ Sets.push_back(
+ std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
+ GVtoClusterMap.member_end()), I));
+
+ std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() > b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
+
+ for (auto &I : Sets) {
+ unsigned CurrentClusterID = BalancinQueue.top().first;
+ unsigned CurrentClusterSize = BalancinQueue.top().second;
+ BalancinQueue.pop();
+
+ DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
+ << ") ----> " << I.second->getData()->getName() << "\n");
+
+ for (ClusterMapType::member_iterator MI =
+ GVtoClusterMap.findLeader(I.second);
+ MI != GVtoClusterMap.member_end(); ++MI) {
+ if (!Visited.insert(*MI).second)
+ continue;
+ DEBUG(dbgs() << "----> " << (*MI)->getName()
+ << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
+ Visited.insert(*MI);
+ ClusterIDMap[*MI] = CurrentClusterID;
+ CurrentClusterSize++;
+ }
+ // Add this set size to the number of entries in this cluster.
+ BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
+ }
+}
+
static void externalize(GlobalValue *GV) {
if (GV->hasLocalLinkage()) {
GV->setLinkage(GlobalValue::ExternalLinkage);
@@ -40,8 +205,8 @@ static void externalize(GlobalValue *GV) {
// Returns whether GV should be in partition (0-based) I of N.
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
- if (auto GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalObject *Base = GA->getBaseObject())
+ if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
+ if (const GlobalObject *Base = GIS->getBaseObject())
GV = Base;
StringRef Name;
@@ -62,21 +227,34 @@ static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
void llvm::SplitModule(
std::unique_ptr<Module> M, unsigned N,
- std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
- for (Function &F : *M)
- externalize(&F);
- for (GlobalVariable &GV : M->globals())
- externalize(&GV);
- for (GlobalAlias &GA : M->aliases())
- externalize(&GA);
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
+ bool PreserveLocals) {
+ if (!PreserveLocals) {
+ for (Function &F : *M)
+ externalize(&F);
+ for (GlobalVariable &GV : M->globals())
+ externalize(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ externalize(&GA);
+ for (GlobalIFunc &GIF : M->ifuncs())
+ externalize(&GIF);
+ }
+
+ // This performs splitting without a need for externalization, which might not
+ // always be possible.
+ ClusterIDMapType ClusterIDMap;
+ findPartitions(M.get(), ClusterIDMap, N);
// FIXME: We should be able to reuse M as the last partition instead of
// cloning it.
- for (unsigned I = 0; I != N; ++I) {
+ for (unsigned I = 0; I < N; ++I) {
ValueToValueMapTy VMap;
std::unique_ptr<Module> MPart(
- CloneModule(M.get(), VMap, [=](const GlobalValue *GV) {
- return isInPartition(GV, I, N);
+ CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
+ if (ClusterIDMap.count(GV))
+ return (ClusterIDMap[GV] == I);
+ else
+ return isInPartition(GV, I, N);
}));
if (I != 0)
MPart->setModuleInlineAsm("");
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 1d1f602b041dc..7523ca527b680 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -58,7 +58,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "symbol-rewriter"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/LegacyPassManager.h"
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 6b1d1dae5f01b..9385f825523cd 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -66,9 +66,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
"UnifiedUnreachableBlock", &F);
new UnreachableInst(F.getContext(), UnreachableBlock);
- for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
- E = UnreachableBlocks.end(); I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : UnreachableBlocks) {
BB->getInstList().pop_back(); // Remove the unreachable inst.
BranchInst::Create(UnreachableBlock, BB);
}
@@ -104,10 +102,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Loop over all of the blocks, replacing the return instruction with an
// unconditional branch.
//
- for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
- E = ReturningBlocks.end(); I != E; ++I) {
- BasicBlock *BB = *I;
-
+ for (BasicBlock *BB : ReturningBlocks) {
// Add an incoming element to the PHI node for every return instruction that
// is merging into this new block...
if (PN)
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index ed4f45c6a615d..8f85f19efe38b 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -21,17 +21,20 @@ using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
- initializeAddDiscriminatorsPass(Registry);
+ initializeAddDiscriminatorsLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeInstNamerPass(Registry);
- initializeLCSSAPass(Registry);
+ initializeLCSSAWrapperPassPass(Registry);
initializeLoopSimplifyPass(Registry);
initializeLowerInvokePass(Registry);
initializeLowerSwitchPass(Registry);
- initializePromotePassPass(Registry);
+ initializeNameAnonFunctionPass(Registry);
+ initializePromoteLegacyPassPass(Registry);
initializeUnifyFunctionExitNodesPass(Registry);
initializeInstSimplifierPass(Registry);
initializeMetaRenamerPass(Registry);
+ initializeMemorySSAWrapperPassPass(Registry);
+ initializeMemorySSAPrinterLegacyPassPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index f47ddb9f064f5..2eade8cbe8efd 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,9 +13,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
@@ -25,25 +29,326 @@ using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
void ValueMaterializer::anchor() {}
-void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) {
-}
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- ValueToValueMapTy::iterator I = VM.find(V);
-
+namespace {
+
+/// A basic block used in a BlockAddress whose function body is not yet
+/// materialized.
+struct DelayedBasicBlock {
+ BasicBlock *OldBB;
+ std::unique_ptr<BasicBlock> TempBB;
+
+ // Explicit move for MSVC.
+ DelayedBasicBlock(DelayedBasicBlock &&X)
+ : OldBB(std::move(X.OldBB)), TempBB(std::move(X.TempBB)) {}
+ DelayedBasicBlock &operator=(DelayedBasicBlock &&X) {
+ OldBB = std::move(X.OldBB);
+ TempBB = std::move(X.TempBB);
+ return *this;
+ }
+
+ DelayedBasicBlock(const BlockAddress &Old)
+ : OldBB(Old.getBasicBlock()),
+ TempBB(BasicBlock::Create(Old.getContext())) {}
+};
+
+struct WorklistEntry {
+ enum EntryKind {
+ MapGlobalInit,
+ MapAppendingVar,
+ MapGlobalAliasee,
+ RemapFunction
+ };
+ struct GVInitTy {
+ GlobalVariable *GV;
+ Constant *Init;
+ };
+ struct AppendingGVTy {
+ GlobalVariable *GV;
+ Constant *InitPrefix;
+ };
+ struct GlobalAliaseeTy {
+ GlobalAlias *GA;
+ Constant *Aliasee;
+ };
+
+ unsigned Kind : 2;
+ unsigned MCID : 29;
+ unsigned AppendingGVIsOldCtorDtor : 1;
+ unsigned AppendingGVNumNewMembers;
+ union {
+ GVInitTy GVInit;
+ AppendingGVTy AppendingGV;
+ GlobalAliaseeTy GlobalAliasee;
+ Function *RemapF;
+ } Data;
+};
+
+struct MappingContext {
+ ValueToValueMapTy *VM;
+ ValueMaterializer *Materializer = nullptr;
+
+ /// Construct a MappingContext with a value map and materializer.
+ explicit MappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr)
+ : VM(&VM), Materializer(Materializer) {}
+};
+
+class MDNodeMapper;
+class Mapper {
+ friend class MDNodeMapper;
+
+#ifndef NDEBUG
+ DenseSet<GlobalValue *> AlreadyScheduled;
+#endif
+
+ RemapFlags Flags;
+ ValueMapTypeRemapper *TypeMapper;
+ unsigned CurrentMCID = 0;
+ SmallVector<MappingContext, 2> MCs;
+ SmallVector<WorklistEntry, 4> Worklist;
+ SmallVector<DelayedBasicBlock, 1> DelayedBBs;
+ SmallVector<Constant *, 16> AppendingInits;
+
+public:
+ Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
+ : Flags(Flags), TypeMapper(TypeMapper),
+ MCs(1, MappingContext(VM, Materializer)) {}
+
+ /// ValueMapper should explicitly call \a flush() before destruction.
+ ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
+
+ bool hasWorkToDo() const { return !Worklist.empty(); }
+
+ unsigned
+ registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr) {
+ MCs.push_back(MappingContext(VM, Materializer));
+ return MCs.size() - 1;
+ }
+
+ void addFlags(RemapFlags Flags);
+
+ Value *mapValue(const Value *V);
+ void remapInstruction(Instruction *I);
+ void remapFunction(Function &F);
+
+ Constant *mapConstant(const Constant *C) {
+ return cast_or_null<Constant>(mapValue(C));
+ }
+
+ /// Map metadata.
+ ///
+ /// Find the mapping for MD. Guarantees that the return will be resolved
+ /// (not an MDNode, or MDNode::isResolved() returns true).
+ Metadata *mapMetadata(const Metadata *MD);
+
+ void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID);
+ void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID);
+ void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID);
+ void scheduleRemapFunction(Function &F, unsigned MCID);
+
+ void flush();
+
+private:
+ void mapGlobalInitializer(GlobalVariable &GV, Constant &Init);
+ void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers);
+ void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee);
+ void remapFunction(Function &F, ValueToValueMapTy &VM);
+
+ ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
+ ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
+
+ Value *mapBlockAddress(const BlockAddress &BA);
+
+ /// Map metadata that doesn't require visiting operands.
+ Optional<Metadata *> mapSimpleMetadata(const Metadata *MD);
+
+ Metadata *mapToMetadata(const Metadata *Key, Metadata *Val);
+ Metadata *mapToSelf(const Metadata *MD);
+};
+
+class MDNodeMapper {
+ Mapper &M;
+
+ /// Data about a node in \a UniquedGraph.
+ struct Data {
+ bool HasChanged = false;
+ unsigned ID = ~0u;
+ TempMDNode Placeholder;
+
+ Data() {}
+ Data(Data &&X)
+ : HasChanged(std::move(X.HasChanged)), ID(std::move(X.ID)),
+ Placeholder(std::move(X.Placeholder)) {}
+ Data &operator=(Data &&X) {
+ HasChanged = std::move(X.HasChanged);
+ ID = std::move(X.ID);
+ Placeholder = std::move(X.Placeholder);
+ return *this;
+ }
+ };
+
+ /// A graph of uniqued nodes.
+ struct UniquedGraph {
+ SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties.
+ SmallVector<MDNode *, 16> POT; // Post-order traversal.
+
+ /// Propagate changed operands through the post-order traversal.
+ ///
+ /// Iteratively update \a Data::HasChanged for each node based on \a
+ /// Data::HasChanged of its operands, until fixed point.
+ void propagateChanges();
+
+ /// Get a forward reference to a node to use as an operand.
+ Metadata &getFwdReference(MDNode &Op);
+ };
+
+ /// Worklist of distinct nodes whose operands need to be remapped.
+ SmallVector<MDNode *, 16> DistinctWorklist;
+
+ // Storage for a UniquedGraph.
+ SmallDenseMap<const Metadata *, Data, 32> InfoStorage;
+ SmallVector<MDNode *, 16> POTStorage;
+
+public:
+ MDNodeMapper(Mapper &M) : M(M) {}
+
+ /// Map a metadata node (and its transitive operands).
+ ///
+ /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative
+ /// algorithm handles distinct nodes and uniqued node subgraphs using
+ /// different strategies.
+ ///
+ /// Distinct nodes are immediately mapped and added to \a DistinctWorklist
+ /// using \a mapDistinctNode(). Their mapping can always be computed
+ /// immediately without visiting operands, even if their operands change.
+ ///
+ /// The mapping for uniqued nodes depends on whether their operands change.
+ /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of
+ /// a node to calculate uniqued node mappings in bulk. Distinct leafs are
+ /// added to \a DistinctWorklist with \a mapDistinctNode().
+ ///
+ /// After mapping \c N itself, this function remaps the operands of the
+ /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c
+ /// N has been mapped.
+ Metadata *map(const MDNode &N);
+
+private:
+ /// Map a top-level uniqued node and the uniqued subgraph underneath it.
+ ///
+ /// This builds up a post-order traversal of the (unmapped) uniqued subgraph
+ /// underneath \c FirstN and calculates the nodes' mapping. Each node uses
+ /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its
+ /// operands uses the identity mapping.
+ ///
+ /// The algorithm works as follows:
+ ///
+ /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and
+ /// save the post-order traversal in the given \a UniquedGraph, tracking
+ /// nodes' operands change.
+ ///
+ /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands
+ /// through the \a UniquedGraph until fixed point, following the rule
+ /// that if a node changes, any node that references must also change.
+ ///
+ /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes
+ /// (referencing new operands) where necessary.
+ Metadata *mapTopLevelUniquedNode(const MDNode &FirstN);
+
+ /// Try to map the operand of an \a MDNode.
+ ///
+ /// If \c Op is already mapped, return the mapping. If it's not an \a
+ /// MDNode, compute and return the mapping. If it's a distinct \a MDNode,
+ /// return the result of \a mapDistinctNode().
+ ///
+ /// \return None if \c Op is an unmapped uniqued \a MDNode.
+ /// \post getMappedOp(Op) only returns None if this returns None.
+ Optional<Metadata *> tryToMapOperand(const Metadata *Op);
+
+ /// Map a distinct node.
+ ///
+ /// Return the mapping for the distinct node \c N, saving the result in \a
+ /// DistinctWorklist for later remapping.
+ ///
+ /// \pre \c N is not yet mapped.
+ /// \pre \c N.isDistinct().
+ MDNode *mapDistinctNode(const MDNode &N);
+
+ /// Get a previously mapped node.
+ Optional<Metadata *> getMappedOp(const Metadata *Op) const;
+
+ /// Create a post-order traversal of an unmapped uniqued node subgraph.
+ ///
+ /// This traverses the metadata graph deeply enough to map \c FirstN. It
+ /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any
+ /// metadata that has already been mapped will not be part of the POT.
+ ///
+ /// Each node that has a changed operand from outside the graph (e.g., a
+ /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata)
+ /// is marked with \a Data::HasChanged.
+ ///
+ /// \return \c true if any nodes in \c G have \a Data::HasChanged.
+ /// \post \c G.POT is a post-order traversal ending with \c FirstN.
+ /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs
+ /// to change because of operands outside the graph.
+ bool createPOT(UniquedGraph &G, const MDNode &FirstN);
+
+ /// Visit the operands of a uniqued node in the POT.
+ ///
+ /// Visit the operands in the range from \c I to \c E, returning the first
+ /// uniqued node we find that isn't yet in \c G. \c I is always advanced to
+ /// where to continue the loop through the operands.
+ ///
+ /// This sets \c HasChanged if any of the visited operands change.
+ MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged);
+
+ /// Map all the nodes in the given uniqued graph.
+ ///
+ /// This visits all the nodes in \c G in post-order, using the identity
+ /// mapping or creating a new node depending on \a Data::HasChanged.
+ ///
+ /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of
+ /// their operands outside of \c G.
+ /// \pre \a Data::HasChanged is true for a node in \c G iff any of its
+ /// operands have changed.
+ /// \post \a getMappedOp() returns the mapped node for every node in \c G.
+ void mapNodesInPOT(UniquedGraph &G);
+
+ /// Remap a node's operands using the given functor.
+ ///
+ /// Iterate through the operands of \c N and update them in place using \c
+ /// mapOperand.
+ ///
+ /// \pre N.isDistinct() or N.isTemporary().
+ template <class OperandMapper>
+ void remapOperands(MDNode &N, OperandMapper mapOperand);
+};
+
+} // end namespace
+
+Value *Mapper::mapValue(const Value *V) {
+ ValueToValueMapTy::iterator I = getVM().find(V);
+
// If the value already exists in the map, use it.
- if (I != VM.end() && I->second) return I->second;
-
+ if (I != getVM().end()) {
+ assert(I->second && "Unexpected null mapping");
+ return I->second;
+ }
+
// If we have a materializer and it can materialize a value, use that.
- if (Materializer) {
- if (Value *NewV =
- Materializer->materializeDeclFor(const_cast<Value *>(V))) {
- VM[V] = NewV;
- if (auto *NewGV = dyn_cast<GlobalValue>(NewV))
- Materializer->materializeInitFor(
- NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V)));
+ if (auto *Materializer = getMaterializer()) {
+ if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) {
+ getVM()[V] = NewV;
return NewV;
}
}
@@ -51,13 +356,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
if (isa<GlobalValue>(V)) {
- if (Flags & RF_NullMapMissingGlobalValues) {
- assert(!(Flags & RF_IgnoreMissingEntries) &&
- "Illegal to specify both RF_NullMapMissingGlobalValues and "
- "RF_IgnoreMissingEntries");
+ if (Flags & RF_NullMapMissingGlobalValues)
return nullptr;
- }
- return VM[V] = const_cast<Value*>(V);
+ return getVM()[V] = const_cast<Value *>(V);
}
if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
@@ -70,28 +371,39 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
IA->hasSideEffects(), IA->isAlignStack());
}
-
- return VM[V] = const_cast<Value*>(V);
+
+ return getVM()[V] = const_cast<Value *>(V);
}
if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) {
const Metadata *MD = MDV->getMetadata();
+
+ if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) {
+ // Look through to grab the local value.
+ if (Value *LV = mapValue(LAM->getValue())) {
+ if (V == LAM->getValue())
+ return const_cast<Value *>(V);
+ return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV));
+ }
+
+ // FIXME: always return nullptr once Verifier::verifyDominatesUse()
+ // ensures metadata operands only reference defined SSA values.
+ return (Flags & RF_IgnoreMissingLocals)
+ ? nullptr
+ : MetadataAsValue::get(V->getContext(),
+ MDTuple::get(V->getContext(), None));
+ }
+
// If this is a module-level metadata and we know that nothing at the module
// level is changing, then use an identity mapping.
- if (!isa<LocalAsMetadata>(MD) && (Flags & RF_NoModuleLevelChanges))
- return VM[V] = const_cast<Value *>(V);
-
- auto *MappedMD = MapMetadata(MD, VM, Flags, TypeMapper, Materializer);
- if (MD == MappedMD || (!MappedMD && (Flags & RF_IgnoreMissingEntries)))
- return VM[V] = const_cast<Value *>(V);
-
- // FIXME: This assert crashes during bootstrap, but I think it should be
- // correct. For now, just match behaviour from before the metadata/value
- // split.
- //
- // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) &&
- // "Referenced metadata value not in value map");
- return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD);
+ if (Flags & RF_NoModuleLevelChanges)
+ return getVM()[V] = const_cast<Value *>(V);
+
+ // Map the metadata and turn it into a value.
+ auto *MappedMD = mapMetadata(MD);
+ if (MD == MappedMD)
+ return getVM()[V] = const_cast<Value *>(V);
+ return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD);
}
// Okay, this either must be a constant (which may or may not be mappable) or
@@ -99,25 +411,31 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
if (!C)
return nullptr;
-
- if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
- Function *F =
- cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
- Flags, TypeMapper, Materializer));
- return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
- }
-
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return mapBlockAddress(*BA);
+
+ auto mapValueOrNull = [this](Value *V) {
+ auto Mapped = mapValue(V);
+ assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
+ "Unexpected null mapping for constant operand without "
+ "NullMapMissingGlobalValues flag");
+ return Mapped;
+ };
+
// Otherwise, we have some other constant to remap. Start by checking to see
// if all operands have an identity remapping.
unsigned OpNo = 0, NumOperands = C->getNumOperands();
Value *Mapped = nullptr;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
- Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
- if (Mapped != C) break;
+ Mapped = mapValueOrNull(Op);
+ if (!Mapped)
+ return nullptr;
+ if (Mapped != Op)
+ break;
}
-
+
// See if the type mapper wants to remap the type as well.
Type *NewTy = C->getType();
if (TypeMapper)
@@ -126,23 +444,26 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// If the result type and all operands match up, then just insert an identity
// mapping.
if (OpNo == NumOperands && NewTy == C->getType())
- return VM[V] = C;
-
+ return getVM()[V] = C;
+
// Okay, we need to create a new constant. We've already processed some or
// all of the operands, set them all up now.
SmallVector<Constant*, 8> Ops;
Ops.reserve(NumOperands);
for (unsigned j = 0; j != OpNo; ++j)
Ops.push_back(cast<Constant>(C->getOperand(j)));
-
+
// If one of the operands mismatch, push it and the other mapped operands.
if (OpNo != NumOperands) {
Ops.push_back(cast<Constant>(Mapped));
-
+
// Map the rest of the operands that aren't processed yet.
- for (++OpNo; OpNo != NumOperands; ++OpNo)
- Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
- Flags, TypeMapper, Materializer));
+ for (++OpNo; OpNo != NumOperands; ++OpNo) {
+ Mapped = mapValueOrNull(C->getOperand(OpNo));
+ if (!Mapped)
+ return nullptr;
+ Ops.push_back(cast<Constant>(Mapped));
+ }
}
Type *NewSrcTy = nullptr;
if (TypeMapper)
@@ -150,309 +471,407 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
+ return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
if (isa<ConstantArray>(C))
- return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
if (isa<ConstantStruct>(C))
- return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
if (isa<ConstantVector>(C))
- return VM[V] = ConstantVector::get(Ops);
+ return getVM()[V] = ConstantVector::get(Ops);
// If this is a no-operand constant, it must be because the type was remapped.
if (isa<UndefValue>(C))
- return VM[V] = UndefValue::get(NewTy);
+ return getVM()[V] = UndefValue::get(NewTy);
if (isa<ConstantAggregateZero>(C))
- return VM[V] = ConstantAggregateZero::get(NewTy);
+ return getVM()[V] = ConstantAggregateZero::get(NewTy);
assert(isa<ConstantPointerNull>(C));
- return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
-}
-
-static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key,
- Metadata *Val, ValueMaterializer *Materializer,
- RemapFlags Flags) {
- VM.MD()[Key].reset(Val);
- if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) {
- auto *N = dyn_cast_or_null<MDNode>(Val);
- // Need to invoke this once we have non-temporary MD.
- if (!N || !N->isTemporary())
- Materializer->replaceTemporaryMetadata(Key, Val);
+ return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
+ Function *F = cast<Function>(mapValue(BA.getFunction()));
+
+ // F may not have materialized its initializer. In that case, create a
+ // dummy basic block for now, and replace it once we've materialized all
+ // the initializers.
+ BasicBlock *BB;
+ if (F->empty()) {
+ DelayedBBs.push_back(DelayedBasicBlock(BA));
+ BB = DelayedBBs.back().TempBB.get();
+ } else {
+ BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock()));
}
- return Val;
+
+ return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock());
}
-static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD,
- ValueMaterializer *Materializer, RemapFlags Flags) {
- return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags);
+Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) {
+ getVM().MD()[Key].reset(Val);
+ return Val;
}
-static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer);
+Metadata *Mapper::mapToSelf(const Metadata *MD) {
+ return mapToMetadata(MD, const_cast<Metadata *>(MD));
+}
-static Metadata *mapMetadataOp(Metadata *Op,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
+Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
if (!Op)
return nullptr;
- if (Materializer && !Materializer->isMetadataNeeded(Op))
+ if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) {
+#ifndef NDEBUG
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ assert((!*MappedOp || M.getVM().count(CMD->getValue()) ||
+ M.getVM().getMappedMD(Op)) &&
+ "Expected Value to be memoized");
+ else
+ assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) &&
+ "Expected result to be memoized");
+#endif
+ return *MappedOp;
+ }
+
+ const MDNode &N = *cast<MDNode>(Op);
+ if (N.isDistinct())
+ return mapDistinctNode(N);
+ return None;
+}
+
+MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
+ assert(N.isDistinct() && "Expected a distinct node");
+ assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
+ DistinctWorklist.push_back(cast<MDNode>(
+ (M.Flags & RF_MoveDistinctMDs)
+ ? M.mapToSelf(&N)
+ : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone()))));
+ return DistinctWorklist.back();
+}
+
+static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD,
+ Value *MappedV) {
+ if (CMD.getValue() == MappedV)
+ return const_cast<ConstantAsMetadata *>(&CMD);
+ return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr;
+}
+
+Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const {
+ if (!Op)
return nullptr;
- if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags,
- TypeMapper, Materializer))
- return MappedOp;
- // Use identity map if MappedOp is null and we can ignore missing entries.
- if (Flags & RF_IgnoreMissingEntries)
+ if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op))
+ return *MappedOp;
+
+ if (isa<MDString>(Op))
+ return const_cast<Metadata *>(Op);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue()));
+
+ return None;
+}
+
+Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) {
+ auto Where = Info.find(&Op);
+ assert(Where != Info.end() && "Expected a valid reference");
+
+ auto &OpD = Where->second;
+ if (!OpD.HasChanged)
return Op;
- // FIXME: This assert crashes during bootstrap, but I think it should be
- // correct. For now, just match behaviour from before the metadata/value
- // split.
- //
- // assert((Flags & RF_NullMapMissingGlobalValues) &&
- // "Referenced metadata not in value map!");
- return nullptr;
+ // Lazily construct a temporary node.
+ if (!OpD.Placeholder)
+ OpD.Placeholder = Op.clone();
+
+ return *OpD.Placeholder;
}
-/// Resolve uniquing cycles involving the given metadata.
-static void resolveCycles(Metadata *MD, bool AllowTemps) {
- if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
- if (AllowTemps && N->isTemporary())
- return;
- if (!N->isResolved()) {
- if (AllowTemps)
- // Note that this will drop RAUW support on any temporaries, which
- // blocks uniquing. If this ends up being an issue, in the future
- // we can experiment with delaying resolving these nodes until
- // after metadata is fully materialized (i.e. when linking metadata
- // as a postpass after function importing).
- N->resolveNonTemporaries();
- else
- N->resolveCycles();
- }
+template <class OperandMapper>
+void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
+ assert(!N.isUniqued() && "Expected distinct or temporary nodes");
+ for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
+ Metadata *Old = N.getOperand(I);
+ Metadata *New = mapOperand(Old);
+
+ if (Old != New)
+ N.replaceOperandWith(I, New);
}
}
-/// Remap the operands of an MDNode.
-///
-/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is
-/// distinct, uniquing cycles are resolved as they're found.
-///
-/// \pre \c Node.isDistinct() or \c Node.isTemporary().
-static bool remapOperands(MDNode &Node,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(!Node.isUniqued() && "Expected temporary or distinct node");
- const bool IsDistinct = Node.isDistinct();
-
- bool AnyChanged = false;
- for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) {
- Metadata *Old = Node.getOperand(I);
- Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper,
- Materializer);
- if (Old != New) {
- AnyChanged = true;
- Node.replaceOperandWith(I, New);
-
- // Resolve uniquing cycles underneath distinct nodes on the fly so they
- // don't infect later operands.
- if (IsDistinct)
- resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata);
+namespace {
+/// An entry in the worklist for the post-order traversal.
+struct POTWorklistEntry {
+ MDNode *N; ///< Current node.
+ MDNode::op_iterator Op; ///< Current operand of \c N.
+
+ /// Keep a flag of whether operands have changed in the worklist to avoid
+ /// hitting the map in \a UniquedGraph.
+ bool HasChanged = false;
+
+ POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
+};
+} // end namespace
+
+bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
+ assert(G.Info.empty() && "Expected a fresh traversal");
+ assert(FirstN.isUniqued() && "Expected uniqued node in POT");
+
+ // Construct a post-order traversal of the uniqued subgraph under FirstN.
+ bool AnyChanges = false;
+ SmallVector<POTWorklistEntry, 16> Worklist;
+ Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN)));
+ (void)G.Info[&FirstN];
+ while (!Worklist.empty()) {
+ // Start or continue the traversal through the this node's operands.
+ auto &WE = Worklist.back();
+ if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) {
+ // Push a new node to traverse first.
+ Worklist.push_back(POTWorklistEntry(*N));
+ continue;
}
+
+ // Push the node onto the POT.
+ assert(WE.N->isUniqued() && "Expected only uniqued nodes");
+ assert(WE.Op == WE.N->op_end() && "Expected to visit all operands");
+ auto &D = G.Info[WE.N];
+ AnyChanges |= D.HasChanged = WE.HasChanged;
+ D.ID = G.POT.size();
+ G.POT.push_back(WE.N);
+
+ // Pop the node off the worklist.
+ Worklist.pop_back();
}
+ return AnyChanges;
+}
- return AnyChanged;
-}
-
-/// Map a distinct MDNode.
-///
-/// Whether distinct nodes change is independent of their operands. If \a
-/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in
-/// place; effectively, they're moved from one graph to another. Otherwise,
-/// they're cloned/duplicated, and the new copy's operands are remapped.
-static Metadata *mapDistinctNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(Node->isDistinct() && "Expected distinct node");
-
- MDNode *NewMD;
- if (Flags & RF_MoveDistinctMDs)
- NewMD = const_cast<MDNode *>(Node);
- else
- NewMD = MDNode::replaceWithDistinct(Node->clone());
-
- // Remap operands later.
- DistinctWorklist.push_back(NewMD);
- return mapToMetadata(VM, Node, NewMD, Materializer, Flags);
-}
-
-/// \brief Map a uniqued MDNode.
-///
-/// Uniqued nodes may not need to be recreated (they may map to themselves).
-static Metadata *mapUniquedNode(const MDNode *Node,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) &&
- "Expected uniqued node");
-
- // Create a temporary node and map it upfront in case we have a uniquing
- // cycle. If necessary, this mapping will get updated by RAUW logic before
- // returning.
- auto ClonedMD = Node->clone();
- mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags);
- if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper,
- Materializer)) {
- // No operands changed, so use the original.
- ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node));
- // Even though replaceAllUsesWith would have replaced the value map
- // entry, we need to explictly map with the final non-temporary node
- // to replace any temporary metadata via the callback.
- return mapToSelf(VM, Node, Materializer, Flags);
+MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged) {
+ while (I != E) {
+ Metadata *Op = *I++; // Increment even on early return.
+ if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) {
+ // Check if the operand changes.
+ HasChanged |= Op != *MappedOp;
+ continue;
+ }
+
+ // A uniqued metadata node.
+ MDNode &OpN = *cast<MDNode>(Op);
+ assert(OpN.isUniqued() &&
+ "Only uniqued operands cannot be mapped immediately");
+ if (G.Info.insert(std::make_pair(&OpN, Data())).second)
+ return &OpN; // This is a new one. Return it.
}
+ return nullptr;
+}
- // Uniquify the cloned node. Explicitly map it with the final non-temporary
- // node so that replacement of temporary metadata via the callback occurs.
- return mapToMetadata(VM, Node,
- MDNode::replaceWithUniqued(std::move(ClonedMD)),
- Materializer, Flags);
+void MDNodeMapper::UniquedGraph::propagateChanges() {
+ bool AnyChanges;
+ do {
+ AnyChanges = false;
+ for (MDNode *N : POT) {
+ auto &D = Info[N];
+ if (D.HasChanged)
+ continue;
+
+ if (!llvm::any_of(N->operands(), [&](const Metadata *Op) {
+ auto Where = Info.find(Op);
+ return Where != Info.end() && Where->second.HasChanged;
+ }))
+ continue;
+
+ AnyChanges = D.HasChanged = true;
+ }
+ } while (AnyChanges);
}
-static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<MDNode *> &DistinctWorklist,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- // If the value already exists in the map, use it.
- if (Metadata *NewMD = VM.MD().lookup(MD).get())
- return NewMD;
+void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
+ // Construct uniqued nodes, building forward references as necessary.
+ SmallVector<MDNode *, 16> CyclicNodes;
+ for (auto *N : G.POT) {
+ auto &D = G.Info[N];
+ if (!D.HasChanged) {
+ // The node hasn't changed.
+ M.mapToSelf(N);
+ continue;
+ }
- if (isa<MDString>(MD))
- return mapToSelf(VM, MD, Materializer, Flags);
-
- if (isa<ConstantAsMetadata>(MD))
- if ((Flags & RF_NoModuleLevelChanges))
- return mapToSelf(VM, MD, Materializer, Flags);
-
- if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) {
- Value *MappedV =
- MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer);
- if (VMD->getValue() == MappedV ||
- (!MappedV && (Flags & RF_IgnoreMissingEntries)))
- return mapToSelf(VM, MD, Materializer, Flags);
-
- // FIXME: This assert crashes during bootstrap, but I think it should be
- // correct. For now, just match behaviour from before the metadata/value
- // split.
- //
- // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) &&
- // "Referenced metadata not in value map!");
- if (MappedV)
- return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer,
- Flags);
- return nullptr;
+ // Remember whether this node had a placeholder.
+ bool HadPlaceholder(D.Placeholder);
+
+ // Clone the uniqued node and remap the operands.
+ TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone();
+ remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
+ if (Optional<Metadata *> MappedOp = getMappedOp(Old))
+ return *MappedOp;
+ assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
+ return &G.getFwdReference(*cast<MDNode>(Old));
+ });
+
+ auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN));
+ M.mapToMetadata(N, NewN);
+
+ // Nodes that were referenced out of order in the POT are involved in a
+ // uniquing cycle.
+ if (HadPlaceholder)
+ CyclicNodes.push_back(NewN);
}
- // Note: this cast precedes the Flags check so we always get its associated
- // assertion.
- const MDNode *Node = cast<MDNode>(MD);
+ // Resolve cycles.
+ for (auto *N : CyclicNodes)
+ if (!N->isResolved())
+ N->resolveCycles();
+}
- // If this is a module-level metadata and we know that nothing at the
- // module level is changing, then use an identity mapping.
- if (Flags & RF_NoModuleLevelChanges)
- return mapToSelf(VM, MD, Materializer, Flags);
+Metadata *MDNodeMapper::map(const MDNode &N) {
+ assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive");
+ assert(!(M.Flags & RF_NoModuleLevelChanges) &&
+ "MDNodeMapper::map assumes module-level changes");
// Require resolved nodes whenever metadata might be remapped.
- assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) &&
- "Unexpected unresolved node");
-
- if (Materializer && Node->isTemporary()) {
- assert(Flags & RF_HaveUnmaterializedMetadata);
- Metadata *TempMD =
- Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD));
- // If the above callback returned an existing temporary node, use it
- // instead of the current temporary node. This happens when earlier
- // function importing passes already created and saved a temporary
- // metadata node for the same value id.
- if (TempMD) {
- mapToMetadata(VM, MD, TempMD, Materializer, Flags);
- return TempMD;
- }
+ assert(N.isResolved() && "Unexpected unresolved node");
+
+ Metadata *MappedN =
+ N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N);
+ while (!DistinctWorklist.empty())
+ remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) {
+ if (Optional<Metadata *> MappedOp = tryToMapOperand(Old))
+ return *MappedOp;
+ return mapTopLevelUniquedNode(*cast<MDNode>(Old));
+ });
+ return MappedN;
+}
+
+Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
+ assert(FirstN.isUniqued() && "Expected uniqued node");
+
+ // Create a post-order traversal of uniqued nodes under FirstN.
+ UniquedGraph G;
+ if (!createPOT(G, FirstN)) {
+ // Return early if no nodes have changed.
+ for (const MDNode *N : G.POT)
+ M.mapToSelf(N);
+ return &const_cast<MDNode &>(FirstN);
}
- if (Node->isDistinct())
- return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
- Materializer);
+ // Update graph with all nodes that have changed.
+ G.propagateChanges();
- return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
- Materializer);
+ // Map all the nodes in the graph.
+ mapNodesInPOT(G);
+
+ // Return the original node, remapped.
+ return *getMappedOp(&FirstN);
}
-Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- SmallVector<MDNode *, 8> DistinctWorklist;
- Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper,
- Materializer);
+namespace {
- // When there are no module-level changes, it's possible that the metadata
- // graph has temporaries. Skip the logic to resolve cycles, since it's
- // unnecessary (and invalid) in that case.
- if (Flags & RF_NoModuleLevelChanges)
- return NewMD;
+struct MapMetadataDisabler {
+ ValueToValueMapTy &VM;
- // Resolve cycles involving the entry metadata.
- resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata);
+ MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
+ VM.disableMapMetadata();
+ }
+ ~MapMetadataDisabler() { VM.enableMapMetadata(); }
+};
- // Remap the operands of distinct MDNodes.
- while (!DistinctWorklist.empty())
- remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags,
- TypeMapper, Materializer);
+} // end namespace
- return NewMD;
+Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
+ // If the value already exists in the map, use it.
+ if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD))
+ return *NewMD;
+
+ if (isa<MDString>(MD))
+ return const_cast<Metadata *>(MD);
+
+ // This is a module-level metadata. If nothing at the module level is
+ // changing, use an identity mapping.
+ if ((Flags & RF_NoModuleLevelChanges))
+ return const_cast<Metadata *>(MD);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
+ // Disallow recursion into metadata mapping through mapValue.
+ MapMetadataDisabler MMD(getVM());
+
+ // Don't memoize ConstantAsMetadata. Instead of lasting until the
+ // LLVMContext is destroyed, they can be deleted when the GlobalValue they
+ // reference is destructed. These aren't super common, so the extra
+ // indirection isn't that expensive.
+ return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
+ }
+
+ assert(isa<MDNode>(MD) && "Expected a metadata node");
+
+ return None;
}
-MDNode *llvm::MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- return cast<MDNode>(MapMetadata(static_cast<const Metadata *>(MD), VM, Flags,
- TypeMapper, Materializer));
+Metadata *Mapper::mapMetadata(const Metadata *MD) {
+ assert(MD && "Expected valid metadata");
+ assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata");
+
+ if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD))
+ return *NewMD;
+
+ return MDNodeMapper(*this).map(*cast<MDNode>(MD));
+}
+
+void Mapper::flush() {
+ // Flush out the worklist of global values.
+ while (!Worklist.empty()) {
+ WorklistEntry E = Worklist.pop_back_val();
+ CurrentMCID = E.MCID;
+ switch (E.Kind) {
+ case WorklistEntry::MapGlobalInit:
+ E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init));
+ break;
+ case WorklistEntry::MapAppendingVar: {
+ unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
+ mapAppendingVariable(*E.Data.AppendingGV.GV,
+ E.Data.AppendingGV.InitPrefix,
+ E.AppendingGVIsOldCtorDtor,
+ makeArrayRef(AppendingInits).slice(PrefixSize));
+ AppendingInits.resize(PrefixSize);
+ break;
+ }
+ case WorklistEntry::MapGlobalAliasee:
+ E.Data.GlobalAliasee.GA->setAliasee(
+ mapConstant(E.Data.GlobalAliasee.Aliasee));
+ break;
+ case WorklistEntry::RemapFunction:
+ remapFunction(*E.Data.RemapF);
+ break;
+ }
+ }
+ CurrentMCID = 0;
+
+ // Finish logic for block addresses now that all global values have been
+ // handled.
+ while (!DelayedBBs.empty()) {
+ DelayedBasicBlock DBB = DelayedBBs.pop_back_val();
+ BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB));
+ DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB);
+ }
}
-/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by VMap.
-///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer){
+void Mapper::remapInstruction(Instruction *I) {
// Remap operands.
- for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
+ for (Use &Op : I->operands()) {
+ Value *V = mapValue(Op);
// If we aren't ignoring missing entries, assert that something happened.
if (V)
- *op = V;
+ Op = V;
else
- assert((Flags & RF_IgnoreMissingEntries) &&
+ assert((Flags & RF_IgnoreMissingLocals) &&
"Referenced value not in value map!");
}
// Remap phi nodes' incoming blocks.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+ Value *V = mapValue(PN->getIncomingBlock(i));
// If we aren't ignoring missing entries, assert that something happened.
if (V)
PN->setIncomingBlock(i, cast<BasicBlock>(V));
else
- assert((Flags & RF_IgnoreMissingEntries) &&
+ assert((Flags & RF_IgnoreMissingLocals) &&
"Referenced block not in value map!");
}
}
@@ -462,11 +881,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
I->getAllMetadata(MDs);
for (const auto &MI : MDs) {
MDNode *Old = MI.second;
- MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer);
+ MDNode *New = cast_or_null<MDNode>(mapMetadata(Old));
if (New != Old)
I->setMetadata(MI.first, New);
}
-
+
if (!TypeMapper)
return;
@@ -491,3 +910,213 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
}
I->mutateType(TypeMapper->remapType(I->getType()));
}
+
+void Mapper::remapFunction(Function &F) {
+ // Remap the operands.
+ for (Use &Op : F.operands())
+ if (Op)
+ Op = mapValue(Op);
+
+ // Remap the metadata attachments.
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+ F.getAllMetadata(MDs);
+ F.clearMetadata();
+ for (const auto &I : MDs)
+ F.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second)));
+
+ // Remap the argument types.
+ if (TypeMapper)
+ for (Argument &A : F.args())
+ A.mutateType(TypeMapper->remapType(A.getType()));
+
+ // Remap the instructions.
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ remapInstruction(&I);
+}
+
+void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers) {
+ SmallVector<Constant *, 16> Elements;
+ if (InitPrefix) {
+ unsigned NumElements =
+ cast<ArrayType>(InitPrefix->getType())->getNumElements();
+ for (unsigned I = 0; I != NumElements; ++I)
+ Elements.push_back(InitPrefix->getAggregateElement(I));
+ }
+
+ PointerType *VoidPtrTy;
+ Type *EltTy;
+ if (IsOldCtorDtor) {
+ // FIXME: This upgrade is done during linking to support the C API. See
+ // also IRLinker::linkAppendingVarProto() in IRMover.cpp.
+ VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo();
+ auto &ST = *cast<StructType>(NewMembers.front()->getType());
+ Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
+ EltTy = StructType::get(GV.getContext(), Tys, false);
+ }
+
+ for (auto *V : NewMembers) {
+ Constant *NewV;
+ if (IsOldCtorDtor) {
+ auto *S = cast<ConstantStruct>(V);
+ auto *E1 = mapValue(S->getOperand(0));
+ auto *E2 = mapValue(S->getOperand(1));
+ Value *Null = Constant::getNullValue(VoidPtrTy);
+ NewV =
+ ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr);
+ } else {
+ NewV = cast_or_null<Constant>(mapValue(V));
+ }
+ Elements.push_back(NewV);
+ }
+
+ GV.setInitializer(ConstantArray::get(
+ cast<ArrayType>(GV.getType()->getElementType()), Elements));
+}
+
+void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapGlobalInit;
+ WE.MCID = MCID;
+ WE.Data.GVInit.GV = &GV;
+ WE.Data.GVInit.Init = &Init;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapAppendingVar;
+ WE.MCID = MCID;
+ WE.Data.AppendingGV.GV = &GV;
+ WE.Data.AppendingGV.InitPrefix = InitPrefix;
+ WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor;
+ WE.AppendingGVNumNewMembers = NewMembers.size();
+ Worklist.push_back(WE);
+ AppendingInits.append(NewMembers.begin(), NewMembers.end());
+}
+
+void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapGlobalAliasee;
+ WE.MCID = MCID;
+ WE.Data.GlobalAliasee.GA = &GA;
+ WE.Data.GlobalAliasee.Aliasee = &Aliasee;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ assert(AlreadyScheduled.insert(&F).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::RemapFunction;
+ WE.MCID = MCID;
+ WE.Data.RemapF = &F;
+ Worklist.push_back(WE);
+}
+
+void Mapper::addFlags(RemapFlags Flags) {
+ assert(!hasWorkToDo() && "Expected to have flushed the worklist");
+ this->Flags = this->Flags | Flags;
+}
+
+static Mapper *getAsMapper(void *pImpl) {
+ return reinterpret_cast<Mapper *>(pImpl);
+}
+
+namespace {
+
+class FlushingMapper {
+ Mapper &M;
+
+public:
+ explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
+ assert(!M.hasWorkToDo() && "Expected to be flushed");
+ }
+ ~FlushingMapper() { M.flush(); }
+ Mapper *operator->() const { return &M; }
+};
+
+} // end namespace
+
+ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer)
+ : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
+
+ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
+
+unsigned
+ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer) {
+ return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer);
+}
+
+void ValueMapper::addFlags(RemapFlags Flags) {
+ FlushingMapper(pImpl)->addFlags(Flags);
+}
+
+Value *ValueMapper::mapValue(const Value &V) {
+ return FlushingMapper(pImpl)->mapValue(&V);
+}
+
+Constant *ValueMapper::mapConstant(const Constant &C) {
+ return cast_or_null<Constant>(mapValue(C));
+}
+
+Metadata *ValueMapper::mapMetadata(const Metadata &MD) {
+ return FlushingMapper(pImpl)->mapMetadata(&MD);
+}
+
+MDNode *ValueMapper::mapMDNode(const MDNode &N) {
+ return cast_or_null<MDNode>(mapMetadata(N));
+}
+
+void ValueMapper::remapInstruction(Instruction &I) {
+ FlushingMapper(pImpl)->remapInstruction(&I);
+}
+
+void ValueMapper::remapFunction(Function &F) {
+ FlushingMapper(pImpl)->remapFunction(F);
+}
+
+void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
+ Constant &Init,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID);
+}
+
+void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAppendingVariable(
+ GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID);
+}
+
+void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ getAsMapper(pImpl)->scheduleRemapFunction(F, MCID);
+}