diff options
Diffstat (limited to 'lib/Transforms/Utils')
40 files changed, 7781 insertions, 2941 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp index 409326eba401f..7e50d4bb447ef 100644 --- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -44,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { else if (Size <= 512) Res = Size + 64; else if (Size <= 4096) Res = Size + 128; else Res = Size + 256; - return RoundUpToAlignment(Res, Alignment); + return alignTo(Res, Alignment); } void diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 0262358fa3d57..d034905b6572b 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -52,7 +52,9 @@ // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -72,20 +74,22 @@ using namespace llvm; #define DEBUG_TYPE "add-discriminators" namespace { -struct AddDiscriminators : public FunctionPass { +// The legacy pass of AddDiscriminators. +struct AddDiscriminatorsLegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - AddDiscriminators() : FunctionPass(ID) { - initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); + AddDiscriminatorsLegacyPass() : FunctionPass(ID) { + initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; }; -} -char AddDiscriminators::ID = 0; -INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators", +} // end anonymous namespace + +char AddDiscriminatorsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) -INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators", +INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) // Command line option to disable discriminator generation even in the @@ -95,13 +99,9 @@ static cl::opt<bool> NoDiscriminators( "no-discriminators", cl::init(false), cl::desc("Disable generation of discriminator information.")); +// Create the legacy AddDiscriminatorsPass. FunctionPass *llvm::createAddDiscriminatorsPass() { - return new AddDiscriminators(); -} - -static bool hasDebugInfo(const Function &F) { - DISubprogram *S = getDISubprogram(&F); - return S != nullptr; + return new AddDiscriminatorsLegacyPass(); } /// \brief Assign DWARF discriminators. @@ -155,13 +155,13 @@ static bool hasDebugInfo(const Function &F) { /// lexical block for I2 and all the instruction in B2 that share the same /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. -bool AddDiscriminators::runOnFunction(Function &F) { +static bool addDiscriminators(Function &F) { // If the function has debug information, but the user has disabled // discriminators, do nothing. // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). - if (NoDiscriminators || !hasDebugInfo(F) || + if (NoDiscriminators || !F.getSubprogram() || F.getParent()->getDwarfVersion() < 4) return false; @@ -173,8 +173,11 @@ bool AddDiscriminators::runOnFunction(Function &F) { typedef std::pair<StringRef, unsigned> Location; typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap; typedef DenseMap<Location, BBScopeMap> LocationBBMap; + typedef DenseMap<Location, unsigned> LocationDiscriminatorMap; + typedef DenseSet<Location> LocationSet; LocationBBMap LBM; + LocationDiscriminatorMap LDM; // Traverse all instructions in the function. If the source line location // of the instruction appears in other basic block, assign a new @@ -199,8 +202,7 @@ bool AddDiscriminators::runOnFunction(Function &F) { auto *Scope = DIL->getScope(); auto *File = Builder.createFile(DIL->getFilename(), Scope->getDirectory()); - NewScope = Builder.createLexicalBlockFile( - Scope, File, DIL->computeNewDiscriminator()); + NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]); } I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(), NewScope, DIL->getInlinedAt())); @@ -217,32 +219,40 @@ bool AddDiscriminators::runOnFunction(Function &F) { // Sample base profile needs to distinguish different function calls within // a same source line for correct profile annotation. for (BasicBlock &B : F) { - const DILocation *FirstDIL = NULL; + LocationSet CallLocations; for (auto &I : B.getInstList()) { CallInst *Current = dyn_cast<CallInst>(&I); if (!Current || isa<DbgInfoIntrinsic>(&I)) continue; DILocation *CurrentDIL = Current->getDebugLoc(); - if (FirstDIL) { - if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() && - CurrentDIL->getFilename() == FirstDIL->getFilename()) { - auto *Scope = FirstDIL->getScope(); - auto *File = Builder.createFile(FirstDIL->getFilename(), - Scope->getDirectory()); - auto *NewScope = Builder.createLexicalBlockFile( - Scope, File, FirstDIL->computeNewDiscriminator()); - Current->setDebugLoc(DILocation::get( - Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope, - CurrentDIL->getInlinedAt())); - Changed = true; - } else { - FirstDIL = CurrentDIL; - } - } else { - FirstDIL = CurrentDIL; + if (!CurrentDIL) + continue; + Location L = + std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); + if (!CallLocations.insert(L).second) { + auto *Scope = CurrentDIL->getScope(); + auto *File = Builder.createFile(CurrentDIL->getFilename(), + Scope->getDirectory()); + auto *NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]); + Current->setDebugLoc(DILocation::get(Ctx, CurrentDIL->getLine(), + CurrentDIL->getColumn(), NewScope, + CurrentDIL->getInlinedAt())); + Changed = true; } } } return Changed; } + +bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { + return addDiscriminators(F); +} +PreservedAnalyses AddDiscriminatorsPass::run(Function &F, + AnalysisManager<Function> &AM) { + if (!addDiscriminators(F)) + return PreservedAnalyses::all(); + + // FIXME: should be all() + return PreservedAnalyses::none(); +} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 72db980cf572a..b90349d3cdad1 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -31,8 +31,6 @@ #include <algorithm> using namespace llvm; -/// DeleteDeadBlock - Delete the specified block, which must have no -/// predecessors. void llvm::DeleteDeadBlock(BasicBlock *BB) { assert((pred_begin(BB) == pred_end(BB) || // Can delete self loop. @@ -61,12 +59,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { BB->eraseFromParent(); } -/// FoldSingleEntryPHINodes - We know that BB has one predecessor. If there are -/// any single-entry PHI nodes in it, fold them away. This handles the case -/// when all entries to the PHI nodes in a block are guaranteed equal, such as -/// when the block has exactly one predecessor. void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, - MemoryDependenceAnalysis *MemDep) { + MemoryDependenceResults *MemDep) { if (!isa<PHINode>(BB->begin())) return; while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { @@ -82,11 +76,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, } } - -/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it -/// is dead. Also recursively delete any operands that become dead as -/// a result. This includes tracing the def-use list from the PHI to see if -/// it is ultimately unused or if it reaches an unused cycle. bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. @@ -103,11 +92,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { return Changed; } -/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, -/// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, LoopInfo *LI, - MemoryDependenceAnalysis *MemDep) { + MemoryDependenceResults *MemDep) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; @@ -165,10 +152,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, if (DomTreeNode *DTN = DT->getNode(BB)) { DomTreeNode *PredDTN = DT->getNode(PredBB); SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); - for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), - DE = Children.end(); - DI != DE; ++DI) - DT->changeImmediateDominator(*DI, PredDTN); + for (DomTreeNode *DI : Children) + DT->changeImmediateDominator(DI, PredDTN); DT->eraseNode(BB); } @@ -183,9 +168,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, return true; } -/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI) -/// with a value, then remove and delete the original instruction. -/// void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Value *V) { Instruction &I = *BI; @@ -200,11 +182,6 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, BI = BIL.erase(BI); } - -/// ReplaceInstWithInst - Replace the instruction specified by BI with the -/// instruction specified by I. The original instruction is deleted and BI is -/// updated to point to the new instruction. -/// void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I) { assert(I->getParent() == nullptr && @@ -225,16 +202,11 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BI = New; } -/// ReplaceInstWithInst - Replace the instruction specified by From with the -/// instruction specified by To. -/// void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { BasicBlock::iterator BI(From); ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } -/// SplitEdge - Split the edge connecting specified block. Pass P must -/// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, LoopInfo *LI) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); @@ -266,8 +238,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, const CriticalEdgeSplittingOptions &Options) { unsigned NumBroken = 0; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); + for (BasicBlock &BB : F) { + TerminatorInst *TI = BB.getTerminator(); if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) if (SplitCriticalEdge(TI, i, Options)) @@ -276,11 +248,6 @@ llvm::SplitAllCriticalEdges(Function &F, return NumBroken; } -/// SplitBlock - Split the specified block at the specified instruction - every -/// thing before SplitPt stays in Old and everything starting with SplitPt moves -/// to a new block. The two blocks are joined by an unconditional branch and -/// the loop info is updated. -/// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI) { BasicBlock::iterator SplitIt = SplitPt->getIterator(); @@ -297,22 +264,17 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, if (DT) // Old dominates New. New node dominates all other nodes dominated by Old. if (DomTreeNode *OldNode = DT->getNode(Old)) { - std::vector<DomTreeNode *> Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); DomTreeNode *NewNode = DT->addNewBlock(New, Old); - for (std::vector<DomTreeNode *>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) - DT->changeImmediateDominator(*I, NewNode); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); } return New; } -/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA -/// analysis information. +/// Update DominatorTree, LoopInfo, and LCCSA analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, DominatorTree *DT, LoopInfo *LI, @@ -331,10 +293,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, // this split will affect loops. bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; - for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end(); - i != e; ++i) { - BasicBlock *Pred = *i; - + for (BasicBlock *Pred : Preds) { // If we need to preserve LCSSA, determine if any of the preds is a loop // exit. if (PreserveLCSSA) @@ -362,9 +321,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, // loops enclose them, and select the most-nested loop which contains the // loop containing the block being split. Loop *InnermostPredLoop = nullptr; - for (ArrayRef<BasicBlock*>::iterator - i = Preds.begin(), e = Preds.end(); i != e; ++i) { - BasicBlock *Pred = *i; + for (BasicBlock *Pred : Preds) { if (Loop *PredLoop = LI->getLoopFor(Pred)) { // Seek a loop which actually contains the block being split (to avoid // adjacent loops). @@ -388,8 +345,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } } -/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming -/// from NewBB. This also updates AliasAnalysis, if available. +/// Update the PHI nodes in OrigBB to include the values coming from NewBB. +/// This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, BranchInst *BI, bool HasLoopExit) { @@ -456,21 +413,6 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, } } -/// SplitBlockPredecessors - This method introduces at least one new basic block -/// into the function and moves some of the predecessors of BB to be -/// predecessors of the new block. The new predecessors are indicated by the -/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic -/// block to which predecessors from Preds are now pointing. -/// -/// If BB is a landingpad block then additional basicblock might be introduced. -/// It will have suffix of 'Suffix'+".split_lp". -/// See SplitLandingPadPredecessors for more details on this case. -/// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// LoopInfo, and LCCSA but no other analyses. In particular, it does not -/// preserve LoopSimplify (because it's complicated to handle the case where one -/// of the edges being split is an exit of a loop with other exits). -/// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix, DominatorTree *DT, @@ -529,19 +471,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, return NewBB; } -/// SplitLandingPadPredecessors - This method transforms the landing pad, -/// OrigBB, by introducing two new basic blocks into the function. One of those -/// new basic blocks gets the predecessors listed in Preds. The other basic -/// block gets the remaining predecessors of OrigBB. The landingpad instruction -/// OrigBB is clone into both of the new basic blocks. The new blocks are given -/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. -/// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, -/// it does not preserve LoopSimplify (because it's complicated to handle the -/// case where one of the edges being split is an exit of a loop with other -/// exits). -/// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, const char *Suffix2, @@ -603,9 +532,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); // Move the remaining edges from OrigBB to point to NewBB2. - for (SmallVectorImpl<BasicBlock*>::iterator - i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) - (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + for (BasicBlock *NewBB2Pred : NewBB2Preds) + NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; @@ -646,11 +574,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } } -/// FoldReturnIntoUncondBranch - This method duplicates the specified return -/// instruction into a predecessor which ends in an unconditional branch. If -/// the return instruction returns a value defined by a PHI, propagate the -/// right value into the return. It returns the new return instruction in the -/// predecessor. ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred) { Instruction *UncondBranch = Pred->getTerminator(); @@ -689,31 +612,10 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } -/// SplitBlockAndInsertIfThen - Split the containing block at the -/// specified instruction - everything before and including SplitBefore stays -/// in the old basic block, and everything after SplitBefore is moved to a -/// new block. The two blocks are connected by a conditional branch -/// (with value of Cmp being the condition). -/// Before: -/// Head -/// SplitBefore -/// Tail -/// After: -/// Head -/// if (Cond) -/// ThenBlock -/// SplitBefore -/// Tail -/// -/// If Unreachable is true, then ThenBlock ends with -/// UnreachableInst, otherwise it branches to Tail. -/// Returns the NewBasicBlock's terminator. - -TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, - bool Unreachable, - MDNode *BranchWeights, - DominatorTree *DT) { +TerminatorInst * +llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DominatorTree *DT, LoopInfo *LI) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); @@ -735,7 +637,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); - for (auto Child : Children) + for (DomTreeNode *Child : Children) DT->changeImmediateDominator(Child, NewNode); // Head dominates ThenBlock. @@ -743,23 +645,15 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, } } + if (LI) { + Loop *L = LI->getLoopFor(Head); + L->addBasicBlockToLoop(ThenBlock, *LI); + L->addBasicBlockToLoop(Tail, *LI); + } + return CheckTerm; } -/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, -/// but also creates the ElseBlock. -/// Before: -/// Head -/// SplitBefore -/// Tail -/// After: -/// Head -/// if (Cond) -/// ThenBlock -/// else -/// ElseBlock -/// SplitBefore -/// Tail void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, TerminatorInst **ThenTerm, TerminatorInst **ElseTerm, @@ -781,15 +675,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, } -/// GetIfCondition - Given a basic block (BB) with two predecessors, -/// check to see if the merge at this block is due -/// to an "if condition". If so, return the boolean condition that determines -/// which entry into BB will be taken. Also, return by references the block -/// that will be entered from if the condition is true, and the block that will -/// be entered if the condition is false. -/// -/// This does no checking to see if the true/false blocks have large or unsavory -/// instructions in them. Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 95825991cee96..49b646a041f50 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -76,11 +76,10 @@ FunctionPass *llvm::createBreakCriticalEdgesPass() { // Implementation of the external critical edge manipulation functions //===----------------------------------------------------------------------===// -/// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form -/// may require new PHIs in the new exit block. This function inserts the -/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB -/// is the new loop exit block, and DestBB is the old loop exit, now the -/// successor of SplitBB. +/// When a loop exit edge is split, LCSSA form may require new PHIs in the new +/// exit block. This function inserts the new PHIs, as needed. Preds is a list +/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is +/// the old loop exit, now the successor of SplitBB. static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, BasicBlock *SplitBB, BasicBlock *DestBB) { @@ -112,25 +111,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, } } -/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to -/// split the critical edge. This will update DominatorTree information if it -/// is available, thus calling this pass will not invalidate either of them. -/// This returns the new block if the edge was split, null otherwise. -/// -/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the -/// specified successor will be merged into the same critical edge block. -/// This is most commonly interesting with switch instructions, which may -/// have many edges to any one destination. This ensures that all edges to that -/// dest go to one block instead of each going to a different block, but isn't -/// the standard definition of a "critical edge". -/// -/// It is invalid to call this function on a critical edge that starts at an -/// IndirectBrInst. Splitting these edges will almost always create an invalid -/// program because the address of the new block won't be the one that is jumped -/// to. -/// -BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - const CriticalEdgeSplittingOptions &Options) { +BasicBlock * +llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options) { if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) return nullptr; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 64b44a6b79194..f4260a9ff9804 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -25,81 +26,742 @@ using namespace llvm; -/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. -Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { +#define DEBUG_TYPE "build-libcalls" + +//- Infer Attributes ---------------------------------------------------------// + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setOnlyAccessesArgMemory(Function &F) { + if (F.onlyAccessesArgMemory()) + return false; + F.setOnlyAccessesArgMemory (); + ++NumArgMemOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned n) { + if (F.doesNotCapture(n)) + return false; + F.setDoesNotCapture(n); + ++NumNoCapture; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned n) { + if (F.onlyReadsMemory(n)) + return false; + F.setOnlyReadsMemory(n); + ++NumReadOnlyArg; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned n) { + if (F.doesNotAlias(n)) + return false; + F.setDoesNotAlias(n); + ++NumNoAlias; + return true; +} + +static bool setNonNull(Function &F, unsigned n) { + assert((n != AttributeSet::ReturnIndex || + F.getReturnType()->isPointerTy()) && + "nonnull applies only to pointers"); + if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) + return false; + F.addAttribute(n, Attribute::NonNull); + ++NumNonNull; + return true; +} + +bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + switch (TheLibFunc) { + case LibFunc::strlen: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strchr: + case LibFunc::strrchr: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strxfrm: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strcmp: // 0,1 + case LibFunc::strspn: // 0,1 + case LibFunc::strncmp: // 0,1 + case LibFunc::strcspn: // 0,1 + case LibFunc::strcoll: // 0,1 + case LibFunc::strcasecmp: // 0,1 + case LibFunc::strncasecmp: // + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strstr: + case LibFunc::strpbrk: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strtok: + case LibFunc::strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::scanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::setbuf: + case LibFunc::setvbuf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strdup: + case LibFunc::strndup: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat: + case LibFunc::statvfs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::sscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::sprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::snprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::setitimer: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::system: + // May throw; "system" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::malloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::memcmp: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memchr: + case LibFunc::memrchr: + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::memcpy_chk: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::memalign: + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::mkdir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::mktime: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::realloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::read: + // May throw; "read" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::rewind: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::rmdir: + case LibFunc::remove: + case LibFunc::realpath: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::rename: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::readlink: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::write: + // May throw; "write" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::bcopy: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::bcmp: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::bzero: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::calloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::chmod: + case LibFunc::chown: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::access: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fopen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fdopen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::ferror: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + return Changed; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::fgets: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc::fread: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fwrite: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + // FIXME: readonly #1? + return Changed; + case LibFunc::fputs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fscanf: + case LibFunc::fprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fgetpos: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::getenv: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::gets: + case LibFunc::getchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::getitimer: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getpwnam: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ungetc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::uname: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::unlink: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::unsetenv: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::utime: + case LibFunc::utimes: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::pread: + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::pwrite: + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::popen: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::pclose: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::vscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vsscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vfscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::valloc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::vprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vsnprintf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::open: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::opendir: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::tmpfile: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::times: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + return Changed; + case LibFunc::lstat: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::lchown: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::qsort: + // May throw; places call through function pointer. + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_strtok_r: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::under_IO_getc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::under_IO_putc: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::dunder_isoc99_scanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_isoc99_sscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fopen64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fseeko64: + case LibFunc::ftello64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::tmpfile64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::open64: + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::gettimeofday: + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::Znwj: // new(unsigned int) + case LibFunc::Znwm: // new(unsigned long) + case LibFunc::Znaj: // new[](unsigned int) + case LibFunc::Znam: // new[](unsigned long) + case LibFunc::msvc_new_int: // new(unsigned int) + case LibFunc::msvc_new_longlong: // new(unsigned long long) + case LibFunc::msvc_new_array_int: // new[](unsigned int) + case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) + // Operator new always returns a nonnull noalias pointer + Changed |= setNonNull(F, AttributeSet::ReturnIndex); + Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); + return Changed; + //TODO: add LibFunc entries for: + //case LibFunc::memset_pattern4: + //case LibFunc::memset_pattern8: + case LibFunc::memset_pattern16: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + // int __nvvm_reflect(const char *) + case LibFunc::nvvm_reflect: + Changed |= setDoesNotAccessMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + return false; + } +} + +//- Emit LibCalls ------------------------------------------------------------// + +Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { unsigned AS = V->getType()->getPointerAddressSpace(); return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); } -/// EmitStrLen - Emit a call to the strlen function to the builder, for the -/// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, +Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - + Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction( - "strlen", AttributeSet::get(M->getContext(), AS), - DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr); - CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context), + B.getInt8PtrTy(), nullptr); + inferLibFuncAttributes(*M->getFunction("strlen"), *TLI); + CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitStrChr - Emit a call to the strchr function to the builder, for the -/// specified pointer and character. Ptr is required to be some pointer type, -/// and the return value has 'i8*' type. -Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, +Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AttributeSet AS = - AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - + Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", - AttributeSet::get(M->getContext(), - AS), - I8Ptr, I8Ptr, I32Ty, nullptr); + Constant *StrChr = + M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr); + inferLibFuncAttributes(*M->getFunction("strchr"), *TLI); CallInst *CI = B.CreateCall( - StrChr, {CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr"); + StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr"); if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitStrNCmp - Emit a call to the strncmp function to the builder. -Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, +Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[3]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - + Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction( - "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), - B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); + Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI); CallInst *CI = B.CreateCall( - StrNCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "strncmp"); + StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp"); if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -107,64 +769,46 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, return CI; } -/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the -/// specified pointer arguments. -Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, +Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, - AttributeSet::get(M->getContext(), AS), - I8Ptr, I8Ptr, I8Ptr, nullptr); + Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr); + inferLibFuncAttributes(*M->getFunction(Name), *TLI); CallInst *CI = - B.CreateCall(StrCpy, {CastToCStr(Dst, B), CastToCStr(Src, B)}, Name); + B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name); if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the -/// specified pointer arguments. -Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, +Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, - AttributeSet::get(M->getContext(), - AS), - I8Ptr, I8Ptr, I8Ptr, + Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, Len->getType(), nullptr); + inferLibFuncAttributes(*M->getFunction(Name), *TLI); CallInst *CI = B.CreateCall( - StrNCpy, {CastToCStr(Dst, B), CastToCStr(Src, B), Len}, "strncpy"); + StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy"); if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. -/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src -/// are pointers. -Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, +Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); + Module *M = B.GetInsertBlock()->getModule(); AttributeSet AS; AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); @@ -173,30 +817,26 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context), nullptr); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); + Dst = castToCStr(Dst, B); + Src = castToCStr(Src, B); CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is -/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, +Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS; - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); + Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction( - "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), - B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr); - CallInst *CI = B.CreateCall(MemChr, {CastToCStr(Ptr, B), Val, Len}, "memchr"); + Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("memchr"), *TLI); + CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr"); if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -204,25 +844,19 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, return CI; } -/// EmitMemCmp - Emit a call to the memcmp function. -Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, +Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[3]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - + Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction( - "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), - B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); + Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); + inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI); CallInst *CI = B.CreateCall( - MemCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "memcmp"); + MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp"); if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -231,7 +865,8 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, } /// Append a suffix to the function name according to the type of 'Op'. -static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) { +static void appendTypeSuffix(Value *Op, StringRef &Name, + SmallString<20> &NameBuffer) { if (!Op->getType()->isDoubleTy()) { NameBuffer += Name; @@ -242,19 +877,14 @@ static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBu Name = NameBuffer; } - return; } -/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. -/// 'floor'). This function is known to take a single of type matching 'Op' and -/// returns one value with the same type. If 'Op' is a long double, 'l' is -/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, +Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, const AttributeSet &Attrs) { SmallString<20> NameBuffer; - AppendTypeSuffix(Op, Name, NameBuffer); + appendTypeSuffix(Op, Name, NameBuffer); - Module *M = B.GetInsertBlock()->getParent()->getParent(); + Module *M = B.GetInsertBlock()->getModule(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), nullptr); CallInst *CI = B.CreateCall(Callee, Op, Name); @@ -265,19 +895,14 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, return CI; } -/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name' -/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2' -/// and return one value with the same type. If 'Op1/Op2' are long double, 'l' -/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f' -/// suffix. -Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, IRBuilder<> &B, const AttributeSet &Attrs) { SmallString<20> NameBuffer; - AppendTypeSuffix(Op1, Name, NameBuffer); + appendTypeSuffix(Op1, Name, NameBuffer); - Module *M = B.GetInsertBlock()->getParent()->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), - Op1->getType(), Op2->getType(), nullptr); + Module *M = B.GetInsertBlock()->getModule(); + Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), + Op2->getType(), nullptr); CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); CI->setAttributes(Attrs); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) @@ -286,14 +911,12 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, return CI; } -/// EmitPutChar - Emit a call to the putchar function. This assumes that Char -/// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, +Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); + Module *M = B.GetInsertBlock()->getModule(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty(), nullptr); CallInst *CI = B.CreateCall(PutChar, @@ -308,54 +931,31 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, return CI; } -/// EmitPutS - Emit a call to the puts function. This assumes that Str is -/// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, +Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - - Value *PutS = M->getOrInsertFunction("puts", - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - nullptr); - CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + Module *M = B.GetInsertBlock()->getModule(); + Value *PutS = + M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr); + inferLibFuncAttributes(*M->getFunction("puts"), *TLI); + CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } -/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is -/// an integer and File is a pointer to FILE. -Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, +Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - Constant *F; + Module *M = B.GetInsertBlock()->getModule(); + Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(), + File->getType(), nullptr); if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt32Ty(), File->getType(), - nullptr); - else - F = M->getOrInsertFunction("fputc", - B.getInt32Ty(), - B.getInt32Ty(), - File->getType(), nullptr); + inferLibFuncAttributes(*M->getFunction("fputc"), *TLI); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, "chari"); CallInst *CI = B.CreateCall(F, {Char, File}, "fputc"); @@ -365,66 +965,40 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, return CI; } -/// EmitFPutS - Emit a call to the puts function. Str is required to be a -/// pointer and File is a pointer to FILE. -Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, +Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[3]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); - AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + Module *M = B.GetInsertBlock()->getModule(); StringRef FPutsName = TLI->getName(LibFunc::fputs); - Constant *F; + Constant *F = M->getOrInsertFunction( + FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr); if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - File->getType(), nullptr); - else - F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), - B.getInt8PtrTy(), - File->getType(), nullptr); - CallInst *CI = B.CreateCall(F, {CastToCStr(Str, B), File}, "fputs"); + inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } -/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is -/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, +Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return nullptr; - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[3]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture); - AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); - Constant *F; + Constant *F = M->getOrInsertFunction( + FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(), + nullptr); if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction( - FWriteName, AttributeSet::get(M->getContext(), AS), - DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), - DL.getIntPtrType(Context), File->getType(), nullptr); - else - F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context), - B.getInt8PtrTy(), DL.getIntPtrType(Context), - DL.getIntPtrType(Context), File->getType(), - nullptr); + inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI); CallInst *CI = - B.CreateCall(F, {CastToCStr(Ptr, B), Size, + B.CreateCall(F, {castToCStr(Ptr, B), Size, ConstantInt::get(DL.getIntPtrType(Context), 1), File}); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 8308a9b69149d..5aec0dce34db8 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -11,7 +11,9 @@ add_llvm_library(LLVMTransformUtils CodeExtractor.cpp CtorUtils.cpp DemoteRegToStack.cpp + Evaluator.cpp FlattenCFG.cpp + FunctionImportUtils.cpp GlobalStatus.cpp InlineFunction.cpp InstructionNamer.cpp @@ -26,10 +28,13 @@ add_llvm_library(LLVMTransformUtils LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + MemorySSA.cpp MetaRenamer.cpp ModuleUtils.cpp + NameAnonFunctions.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp + SanitizerStats.cpp SimplifyCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 6454afb8bc42d..c5ca56360fc88 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -119,6 +119,15 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex, OldAttrs.getFnAttributes())); + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. @@ -163,65 +172,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, TypeMapper, Materializer); } -// Find the MDNode which corresponds to the subprogram data that described F. -static DISubprogram *FindSubprogram(const Function *F, - DebugInfoFinder &Finder) { - for (DISubprogram *Subprogram : Finder.subprograms()) { - if (Subprogram->describes(F)) - return Subprogram; - } - return nullptr; -} - -// Add an operand to an existing MDNode. The new operand will be added at the -// back of the operand list. -static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs, - Metadata *NewSP) { - SmallVector<Metadata *, 16> NewSPs; - NewSPs.reserve(SPs.size() + 1); - for (auto *SP : SPs) - NewSPs.push_back(SP); - NewSPs.push_back(NewSP); - CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs)); -} - -// Clone the module-level debug info associated with OldFunc. The cloned data -// will point to NewFunc instead. -static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap) { - DebugInfoFinder Finder; - Finder.processModule(*OldFunc->getParent()); - - const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); - if (!OldSubprogramMDNode) return; - - auto *NewSubprogram = - cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap)); - NewFunc->setSubprogram(NewSubprogram); - - for (auto *CU : Finder.compile_units()) { - auto Subprograms = CU->getSubprograms(); - // If the compile unit's function list contains the old function, it should - // also contain the new one. - for (auto *SP : Subprograms) { - if (SP == OldSubprogramMDNode) { - AddOperand(CU, Subprograms, NewSubprogram); - break; - } - } - } -} - -/// Return a copy of the specified function, but without -/// embedding the function into another module. Also, any references specified -/// in the VMap are changed to refer to their mapped value instead of the -/// original one. If any of the arguments to the function are in the VMap, -/// the arguments are deleted from the resultant function. The VMap is -/// updated to include mappings from all of the instructions and basicblocks in -/// the function from their old to new values. +/// Return a copy of the specified function and add it to that function's +/// module. Also, any references specified in the VMap are changed to refer to +/// their mapped value instead of the original one. If any of the arguments to +/// the function are in the VMap, the arguments are deleted from the resultant +/// function. The VMap is updated to include mappings from all of the +/// instructions and basicblocks in the function from their old to new values. /// -Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, - bool ModuleLevelChanges, +Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo) { std::vector<Type*> ArgTypes; @@ -237,7 +195,8 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, ArgTypes, F->getFunctionType()->isVarArg()); // Create the new function... - Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName()); + Function *NewF = + Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent()); // Loop over the arguments, copying the names of the mapped arguments over... Function::arg_iterator DestI = NewF->arg_begin(); @@ -247,11 +206,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, VMap[&I] = &*DestI++; // Add mapping to VMap } - if (ModuleLevelChanges) - CloneDebugInfoMetadata(NewF, F, VMap); - SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CodeInfo); + return NewF; } @@ -338,9 +296,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (Value *MappedV = VMap.lookup(V)) V = MappedV; - VMap[&*II] = V; - delete NewInst; - continue; + if (!NewInst->mayHaveSideEffects()) { + VMap[&*II] = V; + delete NewInst; + continue; + } } } @@ -372,7 +332,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... if (!Cond) { - Value *V = VMap[BI->getCondition()]; + Value *V = VMap.lookup(BI->getCondition()); Cond = dyn_cast_or_null<ConstantInt>(V); } @@ -388,7 +348,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); if (!Cond) { // Or known constant after constant prop in the callee... - Value *V = VMap[SI->getCondition()]; + Value *V = VMap.lookup(SI->getCondition()); Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! @@ -475,7 +435,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { - Value *V = VMap[&BI]; + Value *V = VMap.lookup(&BI); BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. @@ -519,7 +479,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { - Value *V = VMap[PN->getIncomingBlock(pred)]; + Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, @@ -529,7 +489,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); - --pred, --e; // Revisit the next entry. + --pred; // Revisit the next entry. + --e; } } } @@ -558,10 +519,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { - for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), - E = PredCount.end(); PCI != E; ++PCI) { - BasicBlock *Pred = PCI->first; - for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) + for (const auto &PCI : PredCount) { + BasicBlock *Pred = PCI.first; + for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } @@ -684,7 +644,7 @@ void llvm::remapInstructionsInBlocks( for (auto *BB : Blocks) for (auto &Inst : *BB) RemapInstruction(&Inst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } /// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p @@ -697,6 +657,8 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl<BasicBlock *> &Blocks) { + assert(OrigLoop->getSubLoops().empty() && + "Loop to be cloned cannot have inner loop"); Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); @@ -727,13 +689,19 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, // Update LoopInfo. NewLoop->addBasicBlockToLoop(NewBB, *LI); - // Update DominatorTree. - BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); - DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); + // Add DominatorTree node. After seeing all blocks, update to correct IDom. + DT->addNewBlock(NewBB, NewPH); Blocks.push_back(NewBB); } + for (BasicBlock *BB : OrigLoop->getBlocks()) { + // Update DominatorTree. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]), + cast<BasicBlock>(VMap[IDomBB])); + } + // Move them physically from the end of the block list. F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), NewPH); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index ab083353ece6b..17e34c4ffa0f5 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::CloneModule(const Module *M, std::unique_ptr<Module> llvm::CloneModule( const Module *M, ValueToValueMapTy &VMap, - std::function<bool(const GlobalValue *)> ShouldCloneDefinition) { + function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { // First off, we need to create the new module. std::unique_ptr<Module> New = llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext()); @@ -53,7 +53,7 @@ std::unique_ptr<Module> llvm::CloneModule( for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { GlobalVariable *GV = new GlobalVariable(*New, - I->getType()->getElementType(), + I->getValueType(), I->isConstant(), I->getLinkage(), (Constant*) nullptr, I->getName(), (GlobalVariable*) nullptr, @@ -64,12 +64,11 @@ std::unique_ptr<Module> llvm::CloneModule( } // Loop over the functions in the module, making external functions as before - for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *NF = - Function::Create(cast<FunctionType>(I->getType()->getElementType()), - I->getLinkage(), I->getName(), New.get()); - NF->copyAttributesFrom(&*I); - VMap[&*I] = NF; + for (const Function &I : *M) { + Function *NF = Function::Create(cast<FunctionType>(I.getValueType()), + I.getLinkage(), I.getName(), New.get()); + NF->copyAttributesFrom(&I); + VMap[&I] = NF; } // Loop over the aliases in the module @@ -109,6 +108,9 @@ std::unique_ptr<Module> llvm::CloneModule( // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { + if (I->isDeclaration()) + continue; + GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); if (!ShouldCloneDefinition(&*I)) { // Skip after setting the correct linkage for an external reference. @@ -121,27 +123,31 @@ std::unique_ptr<Module> llvm::CloneModule( // Similarly, copy over function bodies now... // - for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast<Function>(VMap[&*I]); - if (!ShouldCloneDefinition(&*I)) { + for (const Function &I : *M) { + if (I.isDeclaration()) + continue; + + Function *F = cast<Function>(VMap[&I]); + if (!ShouldCloneDefinition(&I)) { // Skip after setting the correct linkage for an external reference. F->setLinkage(GlobalValue::ExternalLinkage); + // Personality function is not valid on a declaration. + F->setPersonalityFn(nullptr); continue; } - if (!I->isDeclaration()) { - Function::arg_iterator DestI = F->arg_begin(); - for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); - ++J) { - DestI->setName(J->getName()); - VMap[&*J] = &*DestI++; - } - - SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns); + + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end(); + ++J) { + DestI->setName(J->getName()); + VMap[&*J] = &*DestI++; } - if (I->hasPersonalityFn()) - F->setPersonalityFn(MapValue(I->getPersonalityFn(), VMap)); + SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns); + + if (I.hasPersonalityFn()) + F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); } // And aliases diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 823696d88e652..9f2181f87cee1 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -77,15 +77,15 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) { - if (!Result.insert(*I)) + do { + if (!Result.insert(*BBBegin)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!isBlockValidForExtraction(**I)) { + if (!isBlockValidForExtraction(**BBBegin)) { Result.clear(); return Result; } - } + } while (++BBBegin != BBEnd); #ifndef NDEBUG for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), @@ -159,23 +159,18 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { - for (SetVector<BasicBlock *>::const_iterator I = Blocks.begin(), - E = Blocks.end(); - I != E; ++I) { - BasicBlock *BB = *I; - + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) { - for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); - OI != OE; ++OI) + for (Instruction &II : *BB) { + for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; + ++OI) if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); - for (User *U : II->users()) + for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { - Outputs.insert(&*II); + Outputs.insert(&II); break; } } @@ -263,25 +258,21 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { } void CodeExtractor::splitReturnBlocks() { - for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) - if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { + for (BasicBlock *Block : Blocks) + if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) { BasicBlock *New = - (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret"); + Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); if (DT) { // Old dominates New. New node dominates all other nodes dominated // by Old. - DomTreeNode *OldNode = DT->getNode(*I); - SmallVector<DomTreeNode*, 8> Children; - for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); - DI != DE; ++DI) - Children.push_back(*DI); + DomTreeNode *OldNode = DT->getNode(Block); + SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), + OldNode->end()); - DomTreeNode *NewNode = DT->addNewBlock(New, *I); + DomTreeNode *NewNode = DT->addNewBlock(New, Block); - for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) - DT->changeImmediateDominator(*I, NewNode); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); } } } @@ -310,28 +301,26 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, std::vector<Type*> paramTy; // Add the types of the input values to the function's argument list - for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end(); - i != e; ++i) { - const Value *value = *i; + for (Value *value : inputs) { DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. - for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end(); - I != E; ++I) { - DEBUG(dbgs() << "instr used in func: " << **I << "\n"); + for (Value *output : outputs) { + DEBUG(dbgs() << "instr used in func: " << *output << "\n"); if (AggregateArgs) - paramTy.push_back((*I)->getType()); + paramTy.push_back(output->getType()); else - paramTy.push_back(PointerType::getUnqual((*I)->getType())); + paramTy.push_back(PointerType::getUnqual(output->getType())); } - DEBUG(dbgs() << "Function type: " << *RetTy << " f("); - for (std::vector<Type*>::iterator i = paramTy.begin(), - e = paramTy.end(); i != e; ++i) - DEBUG(dbgs() << **i << ", "); - DEBUG(dbgs() << ")\n"); + DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : paramTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); StructType *StructTy; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { @@ -372,9 +361,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, RewriteVal = &*AI++; std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); - use != useE; ++use) - if (Instruction* inst = dyn_cast<Instruction>(*use)) + for (User *use : Users) + if (Instruction *inst = dyn_cast<Instruction>(use)) if (Blocks.count(inst->getParent())) inst->replaceUsesOfWith(inputs[i], RewriteVal); } @@ -429,19 +417,19 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, LLVMContext &Context = newFunction->getContext(); // Add inputs as params, or to be filled into the struct - for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + for (Value *input : inputs) if (AggregateArgs) - StructValues.push_back(*i); + StructValues.push_back(input); else - params.push_back(*i); + params.push_back(input); // Create allocas for the outputs - for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + for (Value *output : outputs) { if (AggregateArgs) { - StructValues.push_back(*i); + StructValues.push_back(output); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc", + new AllocaInst(output->getType(), nullptr, output->getName() + ".loc", &codeReplacer->getParent()->front().front()); ReloadOutputs.push_back(alloca); params.push_back(alloca); @@ -522,9 +510,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, std::map<BasicBlock*, BasicBlock*> ExitBlockMap; unsigned switchVal = 0; - for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(), - e = Blocks.end(); i != e; ++i) { - TerminatorInst *TI = (*i)->getTerminator(); + for (BasicBlock *Block : Blocks) { + TerminatorInst *TI = Block->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) if (!Blocks.count(TI->getSuccessor(i))) { BasicBlock *OldTarget = TI->getSuccessor(i); @@ -576,10 +563,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Make sure we are looking at the original successor block, not // at a newly inserted exit block, which won't be in the dominator // info. - for (std::map<BasicBlock*, BasicBlock*>::iterator I = - ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I) - if (DefBlock == I->second) { - DefBlock = I->first; + for (const auto &I : ExitBlockMap) + if (DefBlock == I.second) { + DefBlock = I.first; break; } @@ -677,13 +663,12 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - for (SetVector<BasicBlock*>::const_iterator i = Blocks.begin(), - e = Blocks.end(); i != e; ++i) { + for (BasicBlock *Block : Blocks) { // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(*i); + oldBlocks.remove(Block); // Insert this basic block into the new function - newBlocks.push_back(*i); + newBlocks.push_back(Block); } } @@ -721,9 +706,9 @@ Function *CodeExtractor::extractCodeRegion() { findInputsOutputs(inputs, outputs); SmallPtrSet<BasicBlock *, 1> ExitBlocks; - for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) - for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + for (BasicBlock *Block : Blocks) + for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; + ++SI) if (!Blocks.count(*SI)) ExitBlocks.insert(*SI); NumExitBlocks = ExitBlocks.size(); diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp new file mode 100644 index 0000000000000..cd130abf45192 --- /dev/null +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -0,0 +1,596 @@ +//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Function evaluator for LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "evaluator" + +using namespace llvm; + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); + +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantAggregate>(C)) { + for (Value *Op : C->operands()) + if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) + return false; + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + // Bitcast is fine if the casted value is fine. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // int <=> ptr is fine if the int type is the same size as the + // pointer type. + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C).second) + return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); +} + +/// Return true if this constant is simple enough for us to understand. In +/// particular, if it is a cast to anything other than from one pointer type to +/// another pointer type, we punt. We basically just support direct accesses to +/// globals and GEP's of globals. This should be kept up to date with +/// CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + // Do not allow weak/*_odr/linkonce linkage or external globals. + return GV->hasUniqueInitializer(); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0)) && + cast<GEPOperator>(CE)->isInBounds()) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasUniqueInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); + } + } + + return false; +} + +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. +Constant *Evaluator::ComputeLoadResult(Constant *P) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P); + if (I != MutatedMemory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { + if (GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + return nullptr; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (GV->hasDefinitiveInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return nullptr; // don't know how to evaluate. +} + +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. +bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, + BasicBlock *&NextBB) { + // This is the main evaluation loop. + while (1) { + Constant *InstResult = nullptr; + + DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (!SI->isSimple()) { + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + Constant *Ptr = getVal(SI->getOperand(1)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + if (!isSimpleEnoughPointerToCommit(Ptr)) { + // If this is too complex for us to commit, reject it. + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); + return false; + } + + Constant *Val = getVal(SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); + return false; + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + if (CE->getOpcode() == Instruction::BitCast) { + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); + return false; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); + } + } + + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(BO->getOperand(0)), + getVal(BO->getOperand(1))); + DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult + << "\n"); + } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(CI->getOperand(0)), + getVal(CI->getOperand(1))); + DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); + } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(CI->getOperand(0)), + CI->getType()); + DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); + } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { + InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), + getVal(SI->getOperand(1)), + getVal(SI->getOperand(2))); + DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); + } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { + InstResult = ConstantExpr::getExtractValue( + getVal(EVI->getAggregateOperand()), EVI->getIndices()); + DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { + InstResult = ConstantExpr::getInsertValue( + getVal(IVI->getAggregateOperand()), + getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); + DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult + << "\n"); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { + Constant *P = getVal(GEP->getOperand(0)); + SmallVector<Constant*, 8> GEPOps; + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); + i != e; ++i) + GEPOps.push_back(getVal(*i)); + InstResult = + ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); + DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult + << "\n"); + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + + if (!LI->isSimple()) { + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + + Constant *Ptr = getVal(LI->getOperand(0)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); + } + InstResult = ComputeLoadResult(Ptr); + if (!InstResult) { + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) { + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } + Type *Ty = AI->getAllocatedType(); + AllocaTmps.push_back( + make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); + DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); + } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { + CallSite CS(&*CurInst); + + // Debug info can safely be ignored here. + if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { + DEBUG(dbgs() << "Ignoring debug info.\n"); + ++CurInst; + continue; + } + + // Cannot handle inline asm. + if (isa<InlineAsm>(CS.getCalledValue())) { + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { + if (MSI->isVolatile()) { + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } + Constant *Ptr = getVal(MSI->getDest()); + Constant *Val = getVal(MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Ptr)); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + DEBUG(dbgs() << "Ignoring no-op memset.\n"); + ++CurInst; + continue; + } + } + + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + ++CurInst; + continue; + } + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + // We don't insert an entry into Values, as it doesn't have a + // meaningful return value. + if (!II->use_empty()) { + DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); + return false; + } + ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); + Value *PtrArg = getVal(II->getArgOperand(1)); + Value *Ptr = PtrArg->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { + Type *ElemTy = GV->getValueType(); + if (!Size->isAllOnesValue() && + Size->getValue().getLimitedValue() >= + DL.getTypeStoreSize(ElemTy)) { + Invariants.insert(GV); + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } + } + // Continue even if we do nothing. + ++CurInst; + continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; + } + + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); + return false; + } + + // Resolve function pointers. + Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); + if (!Callee || Callee->isInterposable()) { + DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } + + SmallVector<Constant*, 8> Formals; + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) + Formals.push_back(getVal(*i)); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + InstResult = C; + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); + } else { + DEBUG(dbgs() << "Can not constant fold function call.\n"); + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) { + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); + return false; + } + + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = RetVal; + + if (InstResult) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } + } + } else if (isa<TerminatorInst>(CurInst)) { + DEBUG(dbgs() << "Found a terminator instruction.\n"); + + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NextBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NextBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(SI->getCondition())); + if (!Val) return false; // Cannot determine. + NextBB = SI->findCaseValue(Val).getCaseSuccessor(); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { + Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) + NextBB = BA->getBasicBlock(); + else + return false; // Cannot determine. + } else if (isa<ReturnInst>(CurInst)) { + NextBB = nullptr; + } else { + // invoke, unwind, resume, unreachable. + DEBUG(dbgs() << "Can not handle terminator."); + return false; // Cannot handle this terminator. + } + + // We succeeded at evaluating this block! + DEBUG(dbgs() << "Successfully evaluated block.\n"); + return true; + } else { + // Did not know how to evaluate this! + DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); + return false; + } + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, DL, TLI); + + setVal(&*CurInst, InstResult); + } + + // If we just processed an invoke, we finished evaluating the block. + if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { + NextBB = II->getNormalDest(); + DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); + return true; + } + + // Advance program counter. + ++CurInst; + } +} + +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. +bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, + const SmallVectorImpl<Constant*> &ActualArgs) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; + + CallStack.push_back(F); + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + setVal(&*AI, ActualArgs[ArgNo]); + + // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + // we can only evaluate any one basic block at most once. This set keeps + // track of what we have executed so we can detect recursive cases etc. + SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; + + // CurBB - The current basic block we're evaluating. + BasicBlock *CurBB = &F->front(); + + BasicBlock::iterator CurInst = CurBB->begin(); + + while (1) { + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. + DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + + if (!EvaluateBlock(CurInst, NextBB)) + return false; + + if (!NextBB) { + // Successfully running until there's no next block means that we found + // the return. Fill it the return value and pop the call stack. + ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); + if (RI->getNumOperands()) + RetVal = getVal(RI->getOperand(0)); + CallStack.pop_back(); + return true; + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NextBB).second) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + PHINode *PN = nullptr; + for (CurInst = NextBB->begin(); + (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); + + // Advance to the next block. + CurBB = NextBB; + } +} + diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp new file mode 100644 index 0000000000000..fcb25baf32167 --- /dev/null +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -0,0 +1,243 @@ +//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the FunctionImportGlobalProcessing class, used +// to perform the necessary global value handling for function importing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +using namespace llvm; + +/// Checks if we should import SGV as a definition, otherwise import as a +/// declaration. +bool FunctionImportGlobalProcessing::doImportAsDefinition( + const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) { + + // For alias, we tie the definition to the base object. Extract it and recurse + if (auto *GA = dyn_cast<GlobalAlias>(SGV)) { + if (GA->hasWeakAnyLinkage()) + return false; + const GlobalObject *GO = GA->getBaseObject(); + if (!GO->hasLinkOnceODRLinkage()) + return false; + return FunctionImportGlobalProcessing::doImportAsDefinition( + GO, GlobalsToImport); + } + // Only import the globals requested for importing. + if (GlobalsToImport->count(SGV)) + return true; + // Otherwise no. + return false; +} + +bool FunctionImportGlobalProcessing::doImportAsDefinition( + const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + return FunctionImportGlobalProcessing::doImportAsDefinition(SGV, + GlobalsToImport); +} + +bool FunctionImportGlobalProcessing::doPromoteLocalToGlobal( + const GlobalValue *SGV) { + assert(SGV->hasLocalLinkage()); + // Both the imported references and the original local variable must + // be promoted. + if (!isPerformingImport() && !isModuleExporting()) + return false; + + // Local const variables never need to be promoted unless they are address + // taken. The imported uses can simply use the clone created in this module. + // For now we are conservative in determining which variables are not + // address taken by checking the unnamed addr flag. To be more aggressive, + // the address taken information must be checked earlier during parsing + // of the module and recorded in the summary index for use when importing + // from that module. + auto *GVar = dyn_cast<GlobalVariable>(SGV); + if (GVar && GVar->isConstant() && GVar->hasGlobalUnnamedAddr()) + return false; + + if (GVar && GVar->hasSection()) + // Some sections like "__DATA,__cfstring" are "magic" and promotion is not + // allowed. Just disable promotion on any GVar with sections right now. + return false; + + // Eventually we only need to promote functions in the exporting module that + // are referenced by a potentially exported function (i.e. one that is in the + // summary index). + return true; +} + +std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV) { + // For locals that must be promoted to global scope, ensure that + // the promoted name uniquely identifies the copy in the original module, + // using the ID assigned during combined index creation. When importing, + // we rename all locals (not just those that are promoted) in order to + // avoid naming conflicts between locals imported from different modules. + if (SGV->hasLocalLinkage() && + (doPromoteLocalToGlobal(SGV) || isPerformingImport())) + return ModuleSummaryIndex::getGlobalNameForLocal( + SGV->getName(), + ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier())); + return SGV->getName(); +} + +GlobalValue::LinkageTypes +FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV) { + // Any local variable that is referenced by an exported function needs + // to be promoted to global scope. Since we don't currently know which + // functions reference which local variables/functions, we must treat + // all as potentially exported if this module is exporting anything. + if (isModuleExporting()) { + if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV)) + return GlobalValue::ExternalLinkage; + return SGV->getLinkage(); + } + + // Otherwise, if we aren't importing, no linkage change is needed. + if (!isPerformingImport()) + return SGV->getLinkage(); + + switch (SGV->getLinkage()) { + case GlobalValue::ExternalLinkage: + // External defnitions are converted to available_externally + // definitions upon import, so that they are available for inlining + // and/or optimization, but are turned into declarations later + // during the EliminateAvailableExternally pass. + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + // An imported external declaration stays external. + return SGV->getLinkage(); + + case GlobalValue::AvailableExternallyLinkage: + // An imported available_externally definition converts + // to external if imported as a declaration. + if (!doImportAsDefinition(SGV)) + return GlobalValue::ExternalLinkage; + // An imported available_externally declaration stays that way. + return SGV->getLinkage(); + + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + // These both stay the same when importing the definition. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::WeakAnyLinkage: + // Can't import weak_any definitions correctly, or we might change the + // program semantics, since the linker will pick the first weak_any + // definition and importing would change the order they are seen by the + // linker. The module linking caller needs to enforce this. + assert(!doImportAsDefinition(SGV)); + // If imported as a declaration, it becomes external_weak. + return SGV->getLinkage(); + + case GlobalValue::WeakODRLinkage: + // For weak_odr linkage, there is a guarantee that all copies will be + // equivalent, so the issue described above for weak_any does not exist, + // and the definition can be imported. It can be treated similarly + // to an imported externally visible global value. + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + + case GlobalValue::AppendingLinkage: + // It would be incorrect to import an appending linkage variable, + // since it would cause global constructors/destructors to be + // executed multiple times. This should have already been handled + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage. + return GlobalValue::AppendingLinkage; + + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + // If we are promoting the local to global scope, it is handled + // similarly to a normal externally visible global. + if (doPromoteLocalToGlobal(SGV)) { + if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + } + // A non-promoted imported local definition stays local. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::ExternalWeakLinkage: + // External weak doesn't apply to definitions, must be a declaration. + assert(!doImportAsDefinition(SGV)); + // Linkage stays external_weak. + return SGV->getLinkage(); + + case GlobalValue::CommonLinkage: + // Linkage stays common on definitions. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + } + + llvm_unreachable("unknown linkage type"); +} + +void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { + if (GV.hasLocalLinkage() && + (doPromoteLocalToGlobal(&GV) || isPerformingImport())) { + GV.setName(getName(&GV)); + GV.setLinkage(getLinkage(&GV)); + if (!GV.hasLocalLinkage()) + GV.setVisibility(GlobalValue::HiddenVisibility); + } else + GV.setLinkage(getLinkage(&GV)); + + // Remove functions imported as available externally defs from comdats, + // as this is a declaration for the linker, and will be dropped eventually. + // It is illegal for comdats to contain declarations. + auto *GO = dyn_cast_or_null<GlobalObject>(&GV); + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + // The IRMover should not have placed any imported declarations in + // a comdat, so the only declaration that should be in a comdat + // at this point would be a definition imported as available_externally. + assert(GO->hasAvailableExternallyLinkage() && + "Expected comdat on definition (possibly available external)"); + GO->setComdat(nullptr); + } +} + +void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { + if (!moduleCanBeRenamedForThinLTO(M)) { + // We would have blocked importing from this module by suppressing index + // generation. We still may be able to import into this module though. + assert(!isPerformingImport() && + "Should have blocked importing from module with local used in ASM"); + return; + } + + for (GlobalVariable &GV : M.globals()) + processGlobalForThinLTO(GV); + for (Function &SF : M) + processGlobalForThinLTO(SF); + for (GlobalAlias &GA : M.aliases()) + processGlobalForThinLTO(GA); +} + +bool FunctionImportGlobalProcessing::run() { + processGlobalsForThinLTO(); + return false; +} + +bool llvm::renameModuleForThinLTO( + Module &M, const ModuleSummaryIndex &Index, + DenseSet<const GlobalValue *> *GlobalsToImport) { + FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport); + return ThinLTOProcessing.run(); +} diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 3893a752503b6..266be41fbeadd 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -20,11 +20,11 @@ using namespace llvm; /// and release, then return AcquireRelease. /// static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { - if (X == Acquire && Y == Release) - return AcquireRelease; - if (Y == Acquire && X == Release) - return AcquireRelease; - return (AtomicOrdering)std::max(X, Y); + if (X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) + return AtomicOrdering::AcquireRelease; + if (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release) + return AtomicOrdering::AcquireRelease; + return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y); } /// It is safe to destroy a constant iff it is only used by constants itself. @@ -105,7 +105,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, } } - if (StoredVal == GV->getInitializer()) { + if (GV->hasInitializer() && StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::InitializerStored) GS.StoredType = GlobalStatus::InitializerStored; } else if (isa<LoadInst>(StoredVal) && @@ -185,4 +185,4 @@ GlobalStatus::GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored), StoredOnceValue(nullptr), AccessingFunction(nullptr), HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), - Ordering(NotAtomic) {} + Ordering(AtomicOrdering::NotAtomic) {} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 79282a2a703b3..1fbb19d2b8add 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -427,6 +427,17 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into + // invokes. The caller's "segment" of the deoptimization continuation + // attached to the newly inlined @llvm.experimental_deoptimize + // (resp. @llvm.experimental.guard) call should contain the exception + // handling logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || + F->getIntrinsicID() == Intrinsic::experimental_guard) + continue; + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { // This call is nested inside a funclet. If that funclet has an unwind // destination within the inlinee, then unwinding out of this call would @@ -677,6 +688,34 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } +/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata, +/// that metadata should be propagated to all memory-accessing cloned +/// instructions. +static void PropagateParallelLoopAccessMetadata(CallSite CS, + ValueToValueMapTy &VMap) { + MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); + if (!M) + return; + + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast<Instruction>(VMI->second); + if (!NI) + continue; + + if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { + M = MDNode::concatenate(PM, M); + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } else if (NI->mayReadOrWriteMemory()) { + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); + } + } +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then @@ -693,12 +732,11 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. - for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end(); - I != IE; ++I) - for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) { - if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope)) + for (const BasicBlock &I : *CalledFunc) + for (const Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); - if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias)) + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); } @@ -720,20 +758,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // the noalias scopes and the lists of those scopes. SmallVector<TempMDTuple, 16> DummyNodes; DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; - for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); - I != IE; ++I) { + for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); - MDMap[*I].reset(DummyNodes.back().get()); + MDMap[I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old // metadata references with either a dummy node or an already-created new // node. - for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); - I != IE; ++I) { + for (const MDNode *I : MD) { SmallVector<Metadata *, 4> NewOps; - for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { - const Metadata *V = (*I)->getOperand(i); + for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { + const Metadata *V = I->getOperand(i); if (const MDNode *M = dyn_cast<MDNode>(V)) NewOps.push_back(MDMap[M]); else @@ -741,7 +777,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { } MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDTuple *TempM = cast<MDTuple>(MDMap[*I]); + MDTuple *TempM = cast<MDTuple>(MDMap[I]); assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); @@ -801,10 +837,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const Function *CalledFunc = CS.getCalledFunction(); SmallVector<const Argument *, 4> NoAliasArgs; - for (const Argument &I : CalledFunc->args()) { - if (I.hasNoAliasAttr() && !I.hasNUses(0)) - NoAliasArgs.push_back(&I); - } + for (const Argument &Arg : CalledFunc->args()) + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); if (NoAliasArgs.empty()) return; @@ -885,17 +920,16 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, IsArgMemOnlyCall = true; } - for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(), - AE = ICS.arg_end(); AI != AE; ++AI) { + for (Value *Arg : ICS.args()) { // We need to check the underlying objects of all arguments, not just // the pointer arguments, because we might be passing pointers as // integers, etc. // However, if we know that the call only accesses pointer arguments, // then we only need to check the pointer arguments. - if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy()) + if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) continue; - PtrArgs.push_back(*AI); + PtrArgs.push_back(Arg); } } @@ -913,9 +947,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, SmallVector<Metadata *, 4> Scopes, NoAliases; SmallSetVector<const Argument *, 4> NAPtrArgs; - for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { + for (const Value *V : PtrArgs) { SmallVector<Value *, 4> Objects; - GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]), + GetUnderlyingObjects(const_cast<Value*>(V), Objects, DL, /* LI = */ nullptr); for (Value *O : Objects) @@ -1228,7 +1262,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { /// Rebuild the entire inlined-at chain for this instruction so that the top of /// the chain now is inlined-at the new call site. static DebugLoc -updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, +updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, + LLVMContext &Ctx, DenseMap<const DILocation *, DILocation *> &IANodes) { SmallVector<DILocation *, 3> InlinedAtLocations; DILocation *Last = InlinedAtNode; @@ -1249,8 +1284,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, // Starting from the top, rebuild the nodes to point to the new inlined-at // location (then rebuilding the rest of the chain behind it) and update the // map of already-constructed inlined-at nodes. - for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), - InlinedAtLocations.rend())) { + for (const DILocation *MD : reverse(InlinedAtLocations)) { Last = IANodes[MD] = DILocation::getDistinct( Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); } @@ -1264,7 +1298,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { - DebugLoc TheCallDL = TheCall->getDebugLoc(); + const DebugLoc &TheCallDL = TheCall->getDebugLoc(); if (!TheCallDL) return; @@ -1422,6 +1456,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // Determine if we are dealing with a call in an EHPad which does not unwind + // to caller. + bool EHPadForCallUnwindsLocally = false; + if (CallSiteEHPad && CS.isCall()) { + UnwindDestMemoTy FuncletUnwindMap; + Value *CallSiteUnwindDestToken = + getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); + + EHPadForCallUnwindsLocally = + CallSiteUnwindDestToken && + !isa<ConstantTokenNone>(CallSiteUnwindDestToken); + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = --Caller->end(); @@ -1552,6 +1599,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + // Propagate llvm.mem.parallel_loop_access if necessary. + PropagateParallelLoopAccessMetadata(CS, VMap); + // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. if (IFI.ACT) @@ -1602,7 +1652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } - bool InlinedMustTailCalls = false; + bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast<CallInst>(TheCall)) @@ -1615,6 +1665,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!CI) continue; + if (Function *F = CI->getCalledFunction()) + InlinedDeoptimizeCalls |= + F->getIntrinsicID() == Intrinsic::experimental_deoptimize; + // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive @@ -1677,11 +1731,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, builder.CreateLifetimeStart(AI, AllocaSize); for (ReturnInst *RI : Returns) { - // Don't insert llvm.lifetime.end calls between a musttail call and a - // return. The return kills all local allocas. + // Don't insert llvm.lifetime.end calls between a musttail or deoptimize + // call and a return. The return kills all local allocas. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; + if (InlinedDeoptimizeCalls && + RI->getParent()->getTerminatingDeoptimizeCall()) + continue; IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } @@ -1702,10 +1759,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (ReturnInst *RI : Returns) { - // Don't insert llvm.stackrestore calls between a musttail call and a - // return. The return will restore the stack pointer. + // Don't insert llvm.stackrestore calls between a musttail or deoptimize + // call and a return. The return will restore the stack pointer. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; + if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) + continue; IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } } @@ -1758,7 +1817,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); else NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); - NewInst->setDebugLoc(I->getDebugLoc()); NewInst->takeName(I); I->replaceAllUsesWith(NewInst); I->eraseFromParent(); @@ -1766,6 +1824,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, OpBundles.clear(); } + // It is problematic if the inlinee has a cleanupret which unwinds to + // caller and we inline it into a call site which doesn't unwind but into + // an EH pad that does. Such an edge must be dynamically unreachable. + // As such, we replace the cleanupret with unreachable. + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) + if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) + changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); + Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) continue; @@ -1781,6 +1847,64 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + if (InlinedDeoptimizeCalls) { + // We need to at least remove the deoptimizing returns from the Return set, + // so that the control flow from those returns does not get merged into the + // caller (but terminate it instead). If the caller's return type does not + // match the callee's return type, we also need to change the return type of + // the intrinsic. + if (Caller->getReturnType() == TheCall->getType()) { + auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; + }); + Returns.erase(NewEnd, Returns.end()); + } else { + SmallVector<ReturnInst *, 8> NormalReturns; + Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( + Caller->getParent(), Intrinsic::experimental_deoptimize, + {Caller->getReturnType()}); + + for (ReturnInst *RI : Returns) { + CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); + if (!DeoptCall) { + NormalReturns.push_back(RI); + continue; + } + + // The calling convention on the deoptimize call itself may be bogus, + // since the code we're inlining may have undefined behavior (and may + // never actually execute at runtime); but all + // @llvm.experimental.deoptimize declarations have to have the same + // calling convention in a well-formed module. + auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); + NewDeoptIntrinsic->setCallingConv(CallingConv); + auto *CurBB = RI->getParent(); + RI->eraseFromParent(); + + SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), + DeoptCall->arg_end()); + + SmallVector<OperandBundleDef, 1> OpBundles; + DeoptCall->getOperandBundlesAsDefs(OpBundles); + DeoptCall->eraseFromParent(); + assert(!OpBundles.empty() && + "Expected at least the deopt operand bundle"); + + IRBuilder<> Builder(CurBB); + CallInst *NewDeoptCall = + Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); + NewDeoptCall->setCallingConv(CallingConv); + if (NewDeoptCall->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(NewDeoptCall); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + } + // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index da890a2970051..8a1973d1db051 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -37,13 +37,13 @@ namespace { if (!AI->hasName() && !AI->getType()->isVoidTy()) AI->setName("arg"); - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (!BB->hasName()) - BB->setName("bb"); - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->hasName() && !I->getType()->isVoidTy()) - I->setName("tmp"); + for (BasicBlock &BB : F) { + if (!BB.hasName()) + BB.setName("bb"); + + for (Instruction &I : BB) + if (!I.hasName() && !I.getType()->isVoidTy()) + I.setName("tmp"); } return true; } diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 5687afa61e2a6..5a90dcb033b2a 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -390,6 +390,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), Rem->getOperand(1), Builder); + // Check whether this is the insert point while Rem is still valid. + bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint(); Rem->replaceAllUsesWith(Remainder); Rem->dropAllReferences(); Rem->eraseFromParent(); @@ -397,7 +399,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { // If we didn't actually generate an urem instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked()) + if (IsInsertPoint) return true; BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); @@ -446,6 +448,9 @@ bool llvm::expandDivision(BinaryOperator *Div) { // Lower the code to unsigned division, and reset Div to point to the udiv. Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), Div->getOperand(1), Builder); + + // Check whether this is the insert point while Div is still valid. + bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint(); Div->replaceAllUsesWith(Quotient); Div->dropAllReferences(); Div->eraseFromParent(); @@ -453,7 +458,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { // If we didn't actually generate an udiv instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Div == Builder.GetInsertPoint().getNodePtrUnchecked()) + if (IsInsertPoint) return true; BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b4b2e148dfbb1..9658966779b9a 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -27,10 +27,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -41,6 +42,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -52,154 +54,156 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables"); /// Return true if the specified block is in the list. static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i] == BB) - return true; - return false; + return find(ExitBlocks, BB) != ExitBlocks.end(); } -/// Given an instruction in the loop, check to see if it has any uses that are -/// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the -/// uses. -static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, - const SmallVectorImpl<BasicBlock *> &ExitBlocks, - PredIteratorCache &PredCache, LoopInfo *LI) { +/// For every instruction from the worklist, check to see if it has any uses +/// that are outside the current loop. If so, insert LCSSA PHI nodes and +/// rewrite the uses. +bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, + DominatorTree &DT, LoopInfo &LI) { SmallVector<Use *, 16> UsesToRewrite; + SmallVector<BasicBlock *, 8> ExitBlocks; + PredIteratorCache PredCache; + bool Changed = false; - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (Inst.getType()->isTokenTy()) - return false; + while (!Worklist.empty()) { + UsesToRewrite.clear(); + ExitBlocks.clear(); - BasicBlock *InstBB = Inst.getParent(); + Instruction *I = Worklist.pop_back_val(); + BasicBlock *InstBB = I->getParent(); + Loop *L = LI.getLoopFor(InstBB); + L->getExitBlocks(ExitBlocks); - for (Use &U : Inst.uses()) { - Instruction *User = cast<Instruction>(U.getUser()); - BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(User)) - UserBB = PN->getIncomingBlock(U); + if (ExitBlocks.empty()) + continue; - if (InstBB != UserBB && !L.contains(UserBB)) - UsesToRewrite.push_back(&U); - } + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I->getType()->isTokenTy()) + continue; - // If there are no uses outside the loop, exit with no change. - if (UsesToRewrite.empty()) - return false; + for (Use &U : I->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(U); - ++NumLCSSA; // We are applying the transformation + if (InstBB != UserBB && !L->contains(UserBB)) + UsesToRewrite.push_back(&U); + } - // Invoke instructions are special in that their result value is not available - // along their unwind edge. The code below tests to see whether DomBB - // dominates the value, so adjust DomBB to the normal destination block, - // which is effectively where the value is first usable. - BasicBlock *DomBB = Inst.getParent(); - if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst)) - DomBB = Inv->getNormalDest(); + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) + continue; - DomTreeNode *DomNode = DT.getNode(DomBB); + ++NumLCSSA; // We are applying the transformation - SmallVector<PHINode *, 16> AddedPHIs; - SmallVector<PHINode *, 8> PostProcessPHIs; + // Invoke instructions are special in that their result value is not + // available along their unwind edge. The code below tests to see whether + // DomBB dominates the value, so adjust DomBB to the normal destination + // block, which is effectively where the value is first usable. + BasicBlock *DomBB = InstBB; + if (InvokeInst *Inv = dyn_cast<InvokeInst>(I)) + DomBB = Inv->getNormalDest(); - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(Inst.getType(), Inst.getName()); + DomTreeNode *DomNode = DT.getNode(DomBB); - // Insert the LCSSA phi's into all of the exit blocks dominated by the - // value, and add them to the Phi's map. - for (BasicBlock *ExitBB : ExitBlocks) { - if (!DT.dominates(DomNode, DT.getNode(ExitBB))) - continue; + SmallVector<PHINode *, 16> AddedPHIs; + SmallVector<PHINode *, 8> PostProcessPHIs; - // If we already inserted something for this BB, don't reprocess it. - if (SSAUpdate.HasValueForBlock(ExitBB)) - continue; + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(I->getType(), I->getName()); - PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB), - Inst.getName() + ".lcssa", &ExitBB->front()); + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value, and add them to the Phi's map. + for (BasicBlock *ExitBB : ExitBlocks) { + if (!DT.dominates(DomNode, DT.getNode(ExitBB))) + continue; - // Add inputs from inside the loop for this PHI. - for (BasicBlock *Pred : PredCache.get(ExitBB)) { - PN->addIncoming(&Inst, Pred); + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) + continue; - // If the exit block has a predecessor not within the loop, arrange for - // the incoming value use corresponding to that predecessor to be - // rewritten in terms of a different LCSSA PHI. - if (!L.contains(Pred)) - UsesToRewrite.push_back( - &PN->getOperandUse(PN->getOperandNumForIncomingValue( - PN->getNumIncomingValues() - 1))); + PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa", &ExitBB->front()); + + // Add inputs from inside the loop for this PHI. + for (BasicBlock *Pred : PredCache.get(ExitBB)) { + PN->addIncoming(I, Pred); + + // If the exit block has a predecessor not within the loop, arrange for + // the incoming value use corresponding to that predecessor to be + // rewritten in terms of a different LCSSA PHI. + if (!L->contains(Pred)) + UsesToRewrite.push_back( + &PN->getOperandUse(PN->getOperandNumForIncomingValue( + PN->getNumIncomingValues() - 1))); + } + + AddedPHIs.push_back(PN); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); + + // LoopSimplify might fail to simplify some loops (e.g. when indirect + // branches are involved). In such situations, it might happen that an + // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we + // create PHIs in such an exit block, we are also inserting PHIs into L2's + // header. This could break LCSSA form for L2 because these inserted PHIs + // can also have uses outside of L2. Remember all PHIs in such situation + // as to revisit than later on. FIXME: Remove this if indirectbr support + // into LoopSimplify gets improved. + if (auto *OtherLoop = LI.getLoopFor(ExitBB)) + if (!L->contains(OtherLoop)) + PostProcessPHIs.push_back(PN); } - AddedPHIs.push_back(PN); - - // Remember that this phi makes the value alive in this block. - SSAUpdate.AddAvailableValue(ExitBB, PN); - - // LoopSimplify might fail to simplify some loops (e.g. when indirect - // branches are involved). In such situations, it might happen that an exit - // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create - // PHIs in such an exit block, we are also inserting PHIs into L2's header. - // This could break LCSSA form for L2 because these inserted PHIs can also - // have uses outside of L2. Remember all PHIs in such situation as to - // revisit than later on. FIXME: Remove this if indirectbr support into - // LoopSimplify gets improved. - if (auto *OtherLoop = LI->getLoopFor(ExitBB)) - if (!L.contains(OtherLoop)) - PostProcessPHIs.push_back(PN); - } + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (Use *UseToRewrite : UsesToRewrite) { + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses + // in the same block. It assumes the PHI we inserted is at the end of the + // block. + Instruction *User = cast<Instruction>(UseToRewrite->getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(*UseToRewrite); + + if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); + UseToRewrite->set(&UserBB->front()); + continue; + } - // Rewrite all uses outside the loop in terms of the new PHIs we just - // inserted. - for (Use *UseToRewrite : UsesToRewrite) { - // If this use is in an exit block, rewrite to use the newly inserted PHI. - // This is required for correctness because SSAUpdate doesn't handle uses in - // the same block. It assumes the PHI we inserted is at the end of the - // block. - Instruction *User = cast<Instruction>(UseToRewrite->getUser()); - BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(User)) - UserBB = PN->getIncomingBlock(*UseToRewrite); - - if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { - // Tell the VHs that the uses changed. This updates SCEV's caches. - if (UseToRewrite->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); - UseToRewrite->set(&UserBB->front()); - continue; + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UseToRewrite); } - // Otherwise, do full PHI insertion. - SSAUpdate.RewriteUse(*UseToRewrite); - } + // Post process PHI instructions that were inserted into another disjoint + // loop and update their exits properly. + for (auto *PostProcessPN : PostProcessPHIs) { + if (PostProcessPN->use_empty()) + continue; - // Post process PHI instructions that were inserted into another disjoint loop - // and update their exits properly. - for (auto *I : PostProcessPHIs) { - if (I->use_empty()) - continue; + // Reprocess each PHI instruction. + Worklist.push_back(PostProcessPN); + } - BasicBlock *PHIBB = I->getParent(); - Loop *OtherLoop = LI->getLoopFor(PHIBB); - SmallVector<BasicBlock *, 8> EBs; - OtherLoop->getExitBlocks(EBs); - if (EBs.empty()) - continue; + // Remove PHI nodes that did not have any uses rewritten. + for (PHINode *PN : AddedPHIs) + if (PN->use_empty()) + PN->eraseFromParent(); - // Recurse and re-process each PHI instruction. FIXME: we should really - // convert this entire thing to a worklist approach where we process a - // vector of instructions... - processInstruction(*OtherLoop, *I, DT, EBs, PredCache, LI); + Changed = true; } - - // Remove PHI nodes that did not have any uses rewritten. - for (PHINode *PN : AddedPHIs) - if (PN->use_empty()) - PN->eraseFromParent(); - - return true; + return Changed; } /// Return true if the specified block dominates at least @@ -209,11 +213,9 @@ blockDominatesAnExit(BasicBlock *BB, DominatorTree &DT, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { DomTreeNode *DomNode = DT.getNode(BB); - for (BasicBlock *ExitBB : ExitBlocks) - if (DT.dominates(DomNode, DT.getNode(ExitBB))) - return true; - - return false; + return llvm::any_of(ExitBlocks, [&](BasicBlock * EB) { + return DT.dominates(DomNode, DT.getNode(EB)); + }); } bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, @@ -227,10 +229,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, if (ExitBlocks.empty()) return false; - PredIteratorCache PredCache; + SmallVector<Instruction *, 8> Worklist; // Look at all the instructions in the loop, checking to see if they have uses - // outside the loop. If so, rewrite those uses. + // outside the loop. If so, put them into the worklist to rewrite those uses. for (BasicBlock *BB : L.blocks()) { // For large loops, avoid use-scanning by using dominance information: In // particular, if a block does not dominate any of the loop exits, then none @@ -246,9 +248,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, !isa<PHINode>(I.user_back()))) continue; - Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI); + Worklist.push_back(&I); } } + Changed = formLCSSAForInstructions(Worklist, DT, *LI); // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. @@ -274,11 +277,20 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, return Changed; } +/// Process all loops in the function, inner-most out. +static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT, + ScalarEvolution *SE) { + bool Changed = false; + for (auto &L : *LI) + Changed |= formLCSSARecursively(*L, DT, LI, SE); + return Changed; +} + namespace { -struct LCSSA : public FunctionPass { +struct LCSSAWrapperPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - LCSSA() : FunctionPass(ID) { - initializeLCSSAPass(*PassRegistry::getPassRegistry()); + LCSSAWrapperPass() : FunctionPass(ID) { + initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry()); } // Cached analysis information for the current function. @@ -298,6 +310,7 @@ struct LCSSA : public FunctionPass { AU.addRequired<LoopInfoWrapperPass>(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<SCEVAAWrapperPass>(); @@ -305,30 +318,39 @@ struct LCSSA : public FunctionPass { }; } -char LCSSA::ID = 0; -INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) +char LCSSAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) -INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) - -Pass *llvm::createLCSSAPass() { return new LCSSA(); } -char &llvm::LCSSAID = LCSSA::ID; +INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", + false, false) +Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); } +char &llvm::LCSSAID = LCSSAWrapperPass::ID; -/// Process all loops in the function, inner-most out. -bool LCSSA::runOnFunction(Function &F) { - bool Changed = false; +/// Transform \p F into loop-closed SSA form. +bool LCSSAWrapperPass::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); SE = SEWP ? &SEWP->getSE() : nullptr; - // Simplify each loop nest in the function. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= formLCSSARecursively(**I, *DT, LI, SE); - - return Changed; + return formLCSSAOnAllLoops(LI, *DT, SE); } +PreservedAnalyses LCSSAPass::run(Function &F, AnalysisManager<Function> &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + if (!formLCSSAOnAllLoops(&LI, DT, SE)) + return PreservedAnalyses::all(); + + // FIXME: This should also 'preserve the CFG'. + PreservedAnalyses PA; + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + PA.preserve<ScalarEvolutionAnalysis>(); + return PA; +} diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index abc9b65f7a394..f1838d891466e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -42,11 +42,13 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "local" @@ -148,9 +150,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SmallVector<uint32_t, 8> Weights; for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; ++MD_i) { - ConstantInt *CI = - mdconst::dyn_extract<ConstantInt>(MD->getOperand(MD_i)); - assert(CI); + auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); Weights.push_back(CI->getValue().getZExtValue()); } // Merge weight of this case to the default weight. @@ -321,8 +321,12 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, II->getIntrinsicID() == Intrinsic::lifetime_end) return isa<UndefValue>(II->getArgOperand(1)); - // Assumptions are dead if their condition is trivially true. - if (II->getIntrinsicID() == Intrinsic::assume) { + // Assumptions are dead if their condition is trivially true. Guards on + // true are operationally no-ops. In the future we can consider more + // sophisticated tradeoffs for guards considering potential for check + // widening, but for now we keep things simple. + if (II->getIntrinsicID() == Intrinsic::assume || + II->getIntrinsicID() == Intrinsic::experimental_guard) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) return !Cond->isZero(); @@ -452,14 +456,23 @@ simplifyAndDCEInstruction(Instruction *I, if (Value *SimpleV = SimplifyInstruction(I, DL)) { // Add the users to the worklist. CAREFUL: an instruction can use itself, // in the case of a phi node. - for (User *U : I->users()) - if (U != I) + for (User *U : I->users()) { + if (U != I) { WorkList.insert(cast<Instruction>(U)); + } + } // Replace the instruction with its simplified value. - I->replaceAllUsesWith(SimpleV); - I->eraseFromParent(); - return true; + bool Changed = false; + if (!I->use_empty()) { + I->replaceAllUsesWith(SimpleV); + Changed = true; + } + if (isInstructionTriviallyDead(I, TLI)) { + I->eraseFromParent(); + Changed = true; + } + return Changed; } return false; } @@ -486,7 +499,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Iterate over the original function, only adding insts to the worklist // if they actually need to be revisited. This avoids having to pre-init // the worklist with the entire function's worth of instructions. - for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) { + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); + BI != E;) { assert(!BI->isTerminator()); Instruction *I = &*BI; ++BI; @@ -1025,7 +1039,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// /// See if there is a dbg.value intrinsic for DIVar before I. -static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) { +static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, + Instruction *I) { // Since we can't guarantee that the original dbg.declare instrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. @@ -1035,7 +1050,8 @@ static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) { if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) if (DVI->getValue() == I->getOperand(0) && DVI->getOffset() == 0 && - DVI->getVariable() == DIVar) + DVI->getVariable() == DIVar && + DVI->getExpression() == DIExpr) return true; } return false; @@ -1049,9 +1065,6 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (LdStHasDebugValue(DIVar, SI)) - return true; - // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. Argument *ExtendedArg = nullptr; @@ -1066,25 +1079,25 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // to the alloca described by DDI, if it's first operand is an extend, // we're guaranteed that before extension, the value was narrower than // the size of the alloca, hence the size of the described variable. - SmallVector<uint64_t, 3> NewDIExpr; + SmallVector<uint64_t, 3> Ops; unsigned PieceOffset = 0; // If this already is a bit piece, we drop the bit piece from the expression // and record the offset. if (DIExpr->isBitPiece()) { - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); PieceOffset = DIExpr->getBitPieceOffset(); } else { - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); } - NewDIExpr.push_back(dwarf::DW_OP_bit_piece); - NewDIExpr.push_back(PieceOffset); //Offset + Ops.push_back(dwarf::DW_OP_bit_piece); + Ops.push_back(PieceOffset); // Offset const DataLayout &DL = DDI->getModule()->getDataLayout(); - NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, - Builder.createExpression(NewDIExpr), - DDI->getDebugLoc(), SI); - } - else + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size + auto NewDIExpr = Builder.createExpression(Ops); + if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, + DDI->getDebugLoc(), SI); + } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, DDI->getDebugLoc(), SI); return true; @@ -1098,7 +1111,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (LdStHasDebugValue(DIVar, LI)) + if (LdStHasDebugValue(DIVar, DIExpr, LI)) return true; // We are now tracking the loaded value instead of the address. In the @@ -1140,12 +1153,14 @@ bool llvm::LowerDbgDeclare(Function &F) { // the stack slot (and at a lexical-scope granularity). Later // passes will attempt to elide the stack slot. if (AI && !isArray(AI)) { - for (User *U : AI->users()) - if (StoreInst *SI = dyn_cast<StoreInst>(U)) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - else if (LoadInst *LI = dyn_cast<LoadInst>(U)) + for (auto &AIUse : AI->uses()) { + User *U = AIUse.getUser(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AIUse.getOperandNo() == 1) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - else if (CallInst *CI = dyn_cast<CallInst>(U)) { + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. @@ -1157,6 +1172,7 @@ bool llvm::LowerDbgDeclare(Function &F) { DIB.createExpression(NewDIExpr), DDI->getDebugLoc(), CI); } + } DDI->eraseFromParent(); } } @@ -1175,6 +1191,38 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return nullptr; } +static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) { + Expr.push_back(dwarf::DW_OP_deref); +} + +static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) { + if (Offset > 0) { + Expr.push_back(dwarf::DW_OP_plus); + Expr.push_back(Offset); + } else if (Offset < 0) { + Expr.push_back(dwarf::DW_OP_minus); + Expr.push_back(-Offset); + } +} + +static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder, + DIExpression *DIExpr, bool Deref, + int Offset) { + if (!Deref && !Offset) + return DIExpr; + // Create a copy of the original DIDescriptor for user variable, prepending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + SmallVector<uint64_t, 4> NewDIExpr; + if (Deref) + DIExprAddDeref(NewDIExpr); + DIExprAddOffset(NewDIExpr, Offset); + if (DIExpr) + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + return Builder.createExpression(NewDIExpr); +} + bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, bool Deref, int Offset) { @@ -1186,25 +1234,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (Deref || Offset) { - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<uint64_t, 4> NewDIExpr; - if (Deref) - NewDIExpr.push_back(dwarf::DW_OP_deref); - if (Offset > 0) { - NewDIExpr.push_back(dwarf::DW_OP_plus); - NewDIExpr.push_back(Offset); - } else if (Offset < 0) { - NewDIExpr.push_back(dwarf::DW_OP_minus); - NewDIExpr.push_back(-Offset); - } - if (DIExpr) - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); - DIExpr = Builder.createExpression(NewDIExpr); - } + DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. @@ -1219,12 +1249,73 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, Deref, Offset); } -void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, + DIBuilder &Builder, int Offset) { + DebugLoc Loc = DVI->getDebugLoc(); + auto *DIVar = DVI->getVariable(); + auto *DIExpr = DVI->getExpression(); + assert(DIVar && "Missing variable"); + + // This is an alloca-based llvm.dbg.value. The first thing it should do with + // the alloca pointer is dereference it. Otherwise we don't know how to handle + // it and give up. + if (!DIExpr || DIExpr->getNumElements() < 1 || + DIExpr->getElement(0) != dwarf::DW_OP_deref) + return; + + // Insert the offset immediately after the first deref. + // We could just change the offset argument of dbg.value, but it's unsigned... + if (Offset) { + SmallVector<uint64_t, 4> NewDIExpr; + DIExprAddDeref(NewDIExpr); + DIExprAddOffset(NewDIExpr, Offset); + NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); + DIExpr = Builder.createExpression(NewDIExpr); + } + + Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr, + Loc, DVI); + DVI->eraseFromParent(); +} + +void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, int Offset) { + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) { + Use &U = *UI++; + if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) + replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); + } +} + +unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { + unsigned NumDeadInst = 0; + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != &BB->front()) { + // Delete the next to last instruction. + Instruction *Inst = &*--EndInst->getIterator(); + if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { + EndInst = Inst; + continue; + } + if (!isa<DbgInfoIntrinsic>(Inst)) + ++NumDeadInst; + Inst->eraseFromParent(); + } + return NumDeadInst; +} + +unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - (*SI)->removePredecessor(BB); + for (BasicBlock *Successor : successors(BB)) + Successor->removePredecessor(BB); // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. @@ -1237,12 +1328,15 @@ void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { new UnreachableInst(I->getContext(), I); // All instructions after this are dead. + unsigned NumInstrsRemoved = 0; BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BB->getInstList().erase(BBI++); + ++NumInstrsRemoved; } + return NumInstrsRemoved; } /// changeToCall - Convert the specified invoke into a normal call. @@ -1280,36 +1374,52 @@ static bool markAliveBlocks(Function &F, // Do a quick scan of the basic block, turning any obviously unreachable // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ + for (Instruction &I : *BB) { // Assumptions that are known to be false are equivalent to unreachable. // Also, if the condition is undefined, then we make the choice most // beneficial to the optimizer, and choose that to also be unreachable. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI)) + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { if (II->getIntrinsicID() == Intrinsic::assume) { - bool MakeUnreachable = false; - if (isa<UndefValue>(II->getArgOperand(0))) - MakeUnreachable = true; - else if (ConstantInt *Cond = - dyn_cast<ConstantInt>(II->getArgOperand(0))) - MakeUnreachable = Cond->isZero(); - - if (MakeUnreachable) { + if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(&*BBI, false); + changeToUnreachable(II, false); Changed = true; break; } } - if (CallInst *CI = dyn_cast<CallInst>(BBI)) { + if (II->getIntrinsicID() == Intrinsic::experimental_guard) { + // A call to the guard intrinsic bails out of the current compilation + // unit if the predicate passed to it is false. If the predicate is a + // constant false, then we know the guard will bail out of the current + // compile unconditionally, so all code following it is dead. + // + // Note: unlike in llvm.assume, it is not "obviously profitable" for + // guards to treat `undef` as `false` since a guard on `undef` can + // still be useful for widening. + if (match(II->getArgOperand(0), m_Zero())) + if (!isa<UnreachableInst>(II->getNextNode())) { + changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); + Changed = true; + break; + } + } + } + + if (auto *CI = dyn_cast<CallInst>(&I)) { + Value *Callee = CI->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(CI, /*UseLLVMTrap=*/false); + Changed = true; + break; + } if (CI->doesNotReturn()) { // If we found a call to a no-return function, insert an unreachable // instruction after it. Make sure there isn't *already* one there // though. - ++BBI; - if (!isa<UnreachableInst>(BBI)) { + if (!isa<UnreachableInst>(CI->getNextNode())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(&*BBI, false); + changeToUnreachable(CI->getNextNode(), false); Changed = true; } break; @@ -1319,7 +1429,7 @@ static bool markAliveBlocks(Function &F, // Store to undef and store to null are undefined and used to signal that // they should be changed to unreachable by passes that can't modify the // CFG. - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (auto *SI = dyn_cast<StoreInst>(&I)) { // Don't touch volatile stores. if (SI->isVolatile()) continue; @@ -1393,9 +1503,9 @@ static bool markAliveBlocks(Function &F, } Changed |= ConstantFoldTerminator(BB, true); - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - if (Reachable.insert(*SI).second) - Worklist.push_back(*SI); + for (BasicBlock *Successor : successors(BB)) + if (Reachable.insert(Successor).second) + Worklist.push_back(Successor); } while (!Worklist.empty()); return Changed; } @@ -1438,7 +1548,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { - SmallPtrSet<BasicBlock*, 128> Reachable; + SmallPtrSet<BasicBlock*, 16> Reachable; bool Changed = markAliveBlocks(F, Reachable); // If there are unreachable blocks in the CFG... @@ -1454,10 +1564,9 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { if (Reachable.count(&*BB)) continue; - for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE; - ++SI) - if (Reachable.count(*SI)) - (*SI)->removePredecessor(&*BB); + for (BasicBlock *Successor : successors(&*BB)) + if (Reachable.count(Successor)) + Successor->removePredecessor(&*BB); if (LVI) LVI->eraseBlock(&*BB); BB->dropAllReferences(); @@ -1495,6 +1604,7 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); break; case LLVMContext::MD_noalias: + case LLVMContext::MD_mem_parallel_loop_access: K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); break; case LLVMContext::MD_range: @@ -1566,7 +1676,7 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, UI != UE;) { Use &U = *UI++; auto *I = cast<Instruction>(U.getUser()); - if (DT.dominates(BB, I->getParent())) { + if (DT.properlyDominates(BB, I->getParent())) { U.set(To); DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " << *To << " in " << *U << "\n"); @@ -1577,18 +1687,18 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, } bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { - if (isa<IntrinsicInst>(CS.getInstruction())) - // Most LLVM intrinsics are things which can never take a safepoint. - // As a result, we don't need to have the stack parsable at the - // callsite. This is a highly useful optimization since intrinsic - // calls are fairly prevalent, particularly in debug builds. - return true; - // Check if the function is specifically marked as a gc leaf function. if (CS.hasFnAttr("gc-leaf-function")) return true; - if (const Function *F = CS.getCalledFunction()) - return F->hasFnAttribute("gc-leaf-function"); + if (const Function *F = CS.getCalledFunction()) { + if (F->hasFnAttribute("gc-leaf-function")) + return true; + + if (auto IID = F->getIntrinsicID()) + // Most LLVM intrinsics do not take safepoints. + return IID != Intrinsic::experimental_gc_statepoint && + IID != Intrinsic::experimental_deoptimize; + } return false; } @@ -1723,7 +1833,23 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If the AndMask is zero for this bit, clear the bit. if ((AndMask & Bit) == 0) Result->Provenance[i] = BitPart::Unset; + return Result; + } + // If this is a zext instruction zero extend the result. + if (I->getOpcode() == Instruction::ZExt) { + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + auto NarrowBitWidth = + cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth(); + for (unsigned i = 0; i < NarrowBitWidth; ++i) + Result->Provenance[i] = Res->Provenance[i]; + for (unsigned i = NarrowBitWidth; i < BitWidth; ++i) + Result->Provenance[i] = BitPart::Unset; return Result; } } @@ -1754,7 +1880,7 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, /// Given an OR instruction, check to see if this is a bitreverse /// idiom. If so, insert the new intrinsic and return true. -bool llvm::recognizeBitReverseOrBSwapIdiom( +bool llvm::recognizeBSwapOrBitReverseIdiom( Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl<Instruction *> &InsertedInsts) { if (Operator::getOpcode(I) != Instruction::Or) @@ -1766,6 +1892,15 @@ bool llvm::recognizeBitReverseOrBSwapIdiom( return false; // Can't do vectors or integers > 128 bits. unsigned BW = ITy->getBitWidth(); + unsigned DemandedBW = BW; + IntegerType *DemandedTy = ITy; + if (I->hasOneUse()) { + if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) { + DemandedTy = cast<IntegerType>(Trunc->getType()); + DemandedBW = DemandedTy->getBitWidth(); + } + } + // Try to find all the pieces corresponding to the bswap. std::map<Value *, Optional<BitPart>> BPS; auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); @@ -1775,11 +1910,12 @@ bool llvm::recognizeBitReverseOrBSwapIdiom( // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; - for (unsigned i = 0; i < BW; ++i) { - OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); + bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; + for (unsigned i = 0; i < DemandedBW; ++i) { + OKForBSwap &= + bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW); OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW); } Intrinsic::ID Intrin; @@ -1790,7 +1926,51 @@ bool llvm::recognizeBitReverseOrBSwapIdiom( else return false; + if (ITy != DemandedTy) { + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + IntegerType *ProviderTy = cast<IntegerType>(Provider->getType()); + // We may need to truncate the provider. + if (DemandedTy != ProviderTy) { + auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, + "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + auto *CI = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(CI); + auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + InsertedInsts.push_back(ExtInst); + return true; + } + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); return true; } + +// CodeGen has special handling for some string functions that may replace +// them with target-specific intrinsics. Since that'd skip our interceptors +// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses, +// we mark affected calls as NoBuiltin, which will disable optimization +// in CodeGen. +void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, + const TargetLibraryInfo *TLI) { + Function *F = CI->getCalledFunction(); + LibFunc::Func Func; + if (!F || F->hasLocalLinkage() || !F->hasName() || + !TLI->getLibFunc(F->getName(), Func)) + return; + switch (Func) { + default: break; + case LibFunc::memcmp: + case LibFunc::memchr: + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcmp: + case LibFunc::strlen: + case LibFunc::strnlen: + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin); + break; + } +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 1fa469595d168..b3a928bf77531 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,6 +37,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -489,14 +490,9 @@ ReprocessLoop: DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " << P->getName() << "\n"); - // Inform each successor of each dead pred. - for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI) - (*SI)->removePredecessor(P); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - P->getTerminator()->eraseFromParent(); - new UnreachableInst(P->getContext(), P); + changeToUnreachable(TI, /*UseLLVMTrap=*/false); Changed = true; } } @@ -506,14 +502,13 @@ ReprocessLoop: // trip count computations. SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), - E = ExitingBlocks.end(); I != E; ++I) - if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) + for (BasicBlock *ExitingBlock : ExitingBlocks) + if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " - << (*I)->getName() << "\n"); + << ExitingBlock->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); @@ -545,9 +540,7 @@ ReprocessLoop: SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); - for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), - E = ExitBlockSet.end(); I != E; ++I) { - BasicBlock *ExitBlock = *I; + for (BasicBlock *ExitBlock : ExitBlockSet) { for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); PI != PE; ++PI) // Must be exactly this loop: no subloops, parent loops, or non-loop preds @@ -691,8 +684,10 @@ ReprocessLoop: } DT->eraseNode(ExitingBlock); - BI->getSuccessor(0)->removePredecessor(ExitingBlock); - BI->getSuccessor(1)->removePredecessor(ExitingBlock); + BI->getSuccessor(0)->removePredecessor( + ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); + BI->getSuccessor(1)->removePredecessor( + ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); ExitingBlock->eraseFromParent(); } } @@ -731,11 +726,6 @@ namespace { initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); } - DominatorTree *DT; - LoopInfo *LI; - ScalarEvolution *SE; - AssumptionCache *AC; - bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -753,7 +743,8 @@ namespace { AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreserved<DependenceAnalysis>(); + AU.addPreservedID(LCSSAID); + AU.addPreserved<DependenceAnalysisWrapperPass>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -768,9 +759,6 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) @@ -783,20 +771,64 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); - SE = SEWP ? &SEWP->getSE() : nullptr; - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); +#ifndef NDEBUG + if (PreserveLCSSA) { + assert(DT && "DT not available."); + assert(LI && "LI not available."); + bool InLCSSA = + all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); }); + assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken."); + } +#endif // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); +#ifndef NDEBUG + if (PreserveLCSSA) { + bool InLCSSA = + all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); }); + assert(InLCSSA && "LCSSA is broken after loop-simplify."); + } +#endif return Changed; } +PreservedAnalyses LoopSimplifyPass::run(Function &F, + AnalysisManager<Function> &AM) { + bool Changed = false; + LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); + ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + + // FIXME: This pass should verify that the loops on which it's operating + // are in canonical SSA form, and that the pass itself preserves this form. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<DependenceAnalysis>(); + return PA; +} + // FIXME: Restore this code when we re-enable verification in verifyAnalysis // below. #if 0 diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index eea9237ba80c6..7f1f78fa8b411 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -34,6 +34,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -44,9 +45,14 @@ using namespace llvm; STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); -/// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by VMap. -static inline void RemapInstruction(Instruction *I, +static cl::opt<bool> +UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden, + cl::desc("Allow runtime unrolled loops to be unrolled " + "with epilog instead of prolog.")); + +/// Convert the instruction operands from referencing the current values into +/// those specified by VMap. +static inline void remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); @@ -64,8 +70,8 @@ static inline void RemapInstruction(Instruction *I, } } -/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it -/// only has one predecessor, and that predecessor only has one successor. +/// Folds a basic block into its predecessor if it only has one predecessor, and +/// that predecessor only has one successor. /// The LoopInfo Analysis that is passed will be kept consistent. If folding is /// successful references to the containing loop must be removed from /// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have @@ -73,8 +79,9 @@ static inline void RemapInstruction(Instruction *I, /// of loops that have already been forgotten to prevent redundant, expensive /// calls to ScalarEvolution::forgetLoop. Returns the new combined block. static BasicBlock * -FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, - SmallPtrSetImpl<Loop *> &ForgottenLoops) { +foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, + SmallPtrSetImpl<Loop *> &ForgottenLoops, + DominatorTree *DT) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -106,7 +113,16 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, // OldName will be valid until erased. StringRef OldName = BB->getName(); - // Erase basic block from the function... + // Erase the old block and update dominator info. + if (DT) + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(OnlyPred); + SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); + for (auto *DI : Children) + DT->changeImmediateDominator(DI, PredDTN); + + DT->eraseNode(BB); + } // ScalarEvolution holds references to loop exit blocks. if (SE) { @@ -126,6 +142,35 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, return OnlyPred; } +/// Check if unrolling created a situation where we need to insert phi nodes to +/// preserve LCSSA form. +/// \param Blocks is a vector of basic blocks representing unrolled loop. +/// \param L is the outer loop. +/// It's possible that some of the blocks are in L, and some are not. In this +/// case, if there is a use is outside L, and definition is inside L, we need to +/// insert a phi-node, otherwise LCSSA will be broken. +/// The function is just a helper function for llvm::UnrollLoop that returns +/// true if this situation occurs, indicating that LCSSA needs to be fixed. +static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, + LoopInfo *LI) { + for (BasicBlock *BB : Blocks) { + if (LI->getLoopFor(BB) == L) + continue; + for (Instruction &I : *BB) { + for (Use &U : I.operands()) { + if (auto Def = dyn_cast<Instruction>(U)) { + Loop *DefLoop = LI->getLoopFor(Def->getParent()); + if (!DefLoop) + continue; + if (DefLoop->contains(L)) + return true; + } + } + } + } + return false; +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional @@ -155,7 +200,7 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. -bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, @@ -218,20 +263,48 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool CompletelyUnroll = Count == TripCount; SmallVector<BasicBlock *, 4> ExitBlocks; L->getExitBlocks(ExitBlocks); - Loop *ParentL = L->getParentLoop(); - bool AllExitsAreInsideParentLoop = !ParentL || - std::all_of(ExitBlocks.begin(), ExitBlocks.end(), - [&](BasicBlock *BB) { return ParentL->contains(BB); }); + std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && + std::any_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && - !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT, - PreserveLCSSA)) - return false; + // Loops containing convergent instructions must have a count that divides + // their TripMultiple. + DEBUG( + { + bool HasConvergent = false; + for (auto &BB : L->blocks()) + for (auto &I : *BB) + if (auto CS = CallSite(&I)) + HasConvergent |= CS.isConvergent(); + assert((!HasConvergent || TripMultiple % Count == 0) && + "Unroll count must divide trip multiple if loop contains a " + "convergent operation."); + }); + // Don't output the runtime loop remainder if Count is a multiple of + // TripMultiple. Such a remainder is never needed, and is unsafe if the loop + // contains a convergent instruction. + if (RuntimeTripCount && TripMultiple % Count != 0 && + !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, + UnrollRuntimeEpilog, LI, SE, DT, + PreserveLCSSA)) { + if (Force) + RuntimeTripCount = false; + else + return false; + } // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. @@ -308,6 +381,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -349,13 +423,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); + for (PHINode *OrigPHI : OrigPHINode) { + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; - VMap[OrigPHINode[i]] = InVal; + VMap[OrigPHI] = InVal; New->getInstList().erase(NewPHI); } @@ -366,11 +440,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, LastValueMap[VI->first] = VI->second; // Add phi entries for newly created values to all exit blocks. - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); - SI != SE; ++SI) { - if (L->contains(*SI)) + for (BasicBlock *Succ : successors(*BB)) { + if (L->contains(Succ)) continue; - for (BasicBlock::iterator BBI = (*SI)->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); @@ -387,18 +460,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Latches.push_back(New); NewBlocks.push_back(New); + UnrolledLoopBlocks.push_back(New); + + // Update DomTree: since we just copy the loop body, and each copy has a + // dedicated entry block (copy of the header block), this header's copy + // dominates all copied blocks. That means, dominance relations in the + // copied body are the same as in the original body. + if (DT) { + if (*BB == Header) + DT->addNewBlock(New, Latches[It - 1]); + else { + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); + } + } } // Remap all instructions in the most recent iteration - for (unsigned i = 0; i < NewBlocks.size(); ++i) - for (BasicBlock::iterator I = NewBlocks[i]->begin(), - E = NewBlocks[i]->end(); I != E; ++I) - ::RemapInstruction(&*I, LastValueMap); + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + ::remapInstruction(&I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; + for (PHINode *PN : OrigPHINode) { if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); @@ -453,11 +541,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - if (*SI == Headers[i]) + for (BasicBlock *Succ: successors(BB)) { + if (Succ == Headers[i]) continue; - for (BasicBlock::iterator BBI = (*SI)->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } @@ -468,16 +555,43 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. + // Immediate dominator of such block might change, because we add more + // routes which can lead to the exit: we can now reach it from the copied + // iterations too. Thus, the new idom of the block will be the nearest + // common dominator of the previous idom and common dominator of all copies of + // the previous idom. This is equivalent to the nearest common dominator of + // the previous idom and the first latch, which dominates all copies of the + // previous idom. + if (DT && Count > 1) { + for (auto *BB : OriginalLoopBlocks) { + auto *BBDomNode = DT->getNode(BB); + SmallVector<BasicBlock *, 16> ChildrenToUpdate; + for (auto *ChildDomNode : BBDomNode->getChildren()) { + auto *ChildBB = ChildDomNode->getBlock(); + if (!L->contains(ChildBB)) + ChildrenToUpdate.push_back(ChildBB); + } + BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + for (auto *ChildBB : ChildrenToUpdate) + DT->changeImmediateDominator(ChildBB, NewIDom); + } + } // Merge adjacent basic blocks, if possible. SmallPtrSet<Loop *, 4> ForgottenLoops; - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + for (BasicBlock *Latch : Latches) { + BranchInst *Term = cast<BranchInst>(Latch->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE, - ForgottenLoops)) + if (BasicBlock *Fold = + foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { + // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); + UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), + UnrolledLoopBlocks.end(), Dest), + UnrolledLoopBlocks.end()); + } } } @@ -485,10 +599,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // whole function's cache. AC->clear(); - // FIXME: Reconstruct dom info, because it is not preserved properly. - // Incrementally updating domtree after loop unrolling would be easy. - if (DT) + // FIXME: We only preserve DT info for complete unrolling now. Incrementally + // updating domtree after partial loop unrolling should also be easy. + if (DT && !CompletelyUnroll) DT->recalculate(*L->getHeader()->getParent()); + else if (DT) + DEBUG(DT->verifyDomTree()); // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { @@ -508,19 +624,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // go. const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); - for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), - BBE = NewLoopBlocks.end(); BB != BBE; ++BB) - for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { + for (BasicBlock *BB : NewLoopBlocks) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; - if (isInstructionTriviallyDead(Inst)) - (*BB)->getInstList().erase(Inst); - else if (Value *V = SimplifyInstruction(Inst, DL)) - if (LI->replacementPreservesLCSSAForm(Inst, V)) { + if (Value *V = SimplifyInstruction(Inst, DL)) + if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); - (*BB)->getInstList().erase(Inst); - } + if (isInstructionTriviallyDead(Inst)) + BB->getInstList().erase(Inst); } + } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; @@ -530,6 +644,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (CompletelyUnroll) LI->markAsRemoved(L); + // After complete unrolling most of the blocks should be contained in OuterL. + // However, some of them might happen to be out of OuterL (e.g. if they + // precede a loop exit). In this case we might need to insert PHI nodes in + // order to preserve LCSSA form. + // We don't need to check this if we already know that we need to fix LCSSA + // form. + // TODO: For now we just recompute LCSSA for the outer loop in this case, but + // it should be possible to fix it in-place. + if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) + NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); + // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any @@ -538,7 +663,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); + simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -548,7 +673,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + if (NeedToFixLCSSA) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) && diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 0d68f18ad0e5e..861a50cf354d8 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -16,8 +16,8 @@ // case, we need to generate code to execute these 'left over' iterations. // // The current strategy generates an if-then-else sequence prior to the -// unrolled loop to execute the 'left over' iterations. Other strategies -// include generate a loop before or after the unrolled loop. +// unrolled loop to execute the 'left over' iterations before or after the +// unrolled loop. // //===----------------------------------------------------------------------===// @@ -60,91 +60,220 @@ STATISTIC(NumRuntimeUnrolled, /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *LastPrologBB, BasicBlock *PrologEnd, - BasicBlock *OrigPH, BasicBlock *NewPH, - ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, BasicBlock *PreHeader, + BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, + DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); + BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. - // The new PHI name is added as an operand of a PHI node in either + // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. - for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); - SBI != SBE; ++SBI) { - for (BasicBlock::iterator BBI = (*SBI)->begin(); - PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { - + for (BasicBlock *Succ : successors(Latch)) { + for (Instruction &BBI : *Succ) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. - PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", - PrologEnd->getTerminator()); + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { - NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), + PreHeader); } else { - NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH); + NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { - V = VMap[I]; + V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. - NewPN->addIncoming(V, LastPrologBB); + NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { - PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { - PN->addIncoming(NewPN, PrologEnd); + PN->addIncoming(NewPN, PrologExit); } } } - // Create a branch around the orignal loop, which is taken if there are no + // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. - Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); - // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1) - // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and - // and all of the iterations of this loop were executed by the prologue. Note - // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow. + // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) + // This means %xtraiter is (BECount + 1) and all of the iterations of this + // loop were executed by the prologue. Note that if BECount <u (Count - 1) + // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPH); + B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + InsertPt->eraseFromParent(); +} + +/// Connect the unrolling epilog code to the original loop. +/// The unrolling epilog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Update PHI nodes at the unrolling loop exit and epilog loop exit +/// - Create PHI nodes at the unrolling loop exit to combine +/// values that exit the unrolling loop code and jump around it. +/// - Update PHI operands in the epilog loop by the new PHI nodes +/// - Branch around the epilog loop if extra iters (ModVal) is zero. +/// +static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, + BasicBlock *Exit, BasicBlock *PreHeader, + BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop must have a latch"); + BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); + + // Loop structure should be the following: + // + // PreHeader + // NewPreHeader + // Header + // ... + // Latch + // NewExit (PN) + // EpilogPreHeader + // EpilogHeader + // ... + // EpilogLatch + // Exit (EpilogPN) + + // Update PHI nodes at NewExit and Exit. + for (Instruction &BBI : *NewExit) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; + // PN should be used in another PHI located in Exit block as + // Exit was split by SplitBlockPredecessors into Exit and NewExit + // Basicaly it should look like: + // NewExit: + // PN = PHI [I, Latch] + // ... + // Exit: + // EpilogPN = PHI [PN, EpilogPreHeader] + // + // There is EpilogPreHeader incoming block instead of NewExit as + // NewExit was spilt 1 more time to get EpilogPreHeader. + assert(PN->hasOneUse() && "The phi should have 1 use"); + PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); + assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); + + // Add incoming PreHeader from branch around the Loop + PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); + + Value *V = PN->getIncomingValueForBlock(Latch); + Instruction *I = dyn_cast<Instruction>(V); + if (I && L->contains(I)) + // If value comes from an instruction in the loop add VMap value. + V = VMap.lookup(I); + // For the instruction out of the loop, constant or undefined value + // insert value itself. + EpilogPN->addIncoming(V, EpilogLatch); + + assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && + "EpilogPN should have EpilogPreHeader incoming block"); + // Change EpilogPreHeader incoming block to NewExit. + EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), + NewExit); + // Now PHIs should look like: + // NewExit: + // PN = PHI [I, Latch], [undef, PreHeader] + // ... + // Exit: + // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] + } + + // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). + // Update corresponding PHI nodes in epilog loop. + for (BasicBlock *Succ : successors(Latch)) { + // Skip this as we already updated phis in exit blocks. + if (!L->contains(Succ)) + continue; + for (Instruction &BBI : *Succ) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + // Exit when we passed all PHI nodes. + if (!PN) + break; + // Add new PHI nodes to the loop exit block and update epilog + // PHIs with the new PHI values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + NewExit->getFirstNonPHI()); + // Adding a value to the new PHI node from the unrolling loop preheader. + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); + // Adding a value to the new PHI node from the unrolling loop latch. + NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); + + // Update the existing PHI node operand with the value from the new PHI + // node. Corresponding instruction in epilog loop should be PHI. + PHINode *VPN = cast<PHINode>(VMap[&BBI]); + VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); + } + } + + Instruction *InsertPt = NewExit->getTerminator(); + IRBuilder<> B(InsertPt); + Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); + assert(Exit && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, + PreserveLCSSA); + // Add the branch to the exit block (around the unrolling loop) + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); } /// Create a clone of the blocks in a loop and connect them together. -/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new -/// loop will be created including all cloned blocks, and the iterator of it -/// switches to count NewIter down to 0. +/// If CreateRemainderLoop is false, loop structure will not be cloned, +/// otherwise a new loop will be created including all cloned blocks, and the +/// iterator of it switches to count NewIter down to 0. +/// The cloned blocks should be inserted between InsertTop and InsertBot. +/// If loop structure is cloned InsertTop should be new preheader, InsertBot +/// new loop exit. /// -static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, +static void CloneLoopBlocks(Loop *L, Value *NewIter, + const bool CreateRemainderLoop, + const bool UseEpilogRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, + BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { - BasicBlock *Preheader = L->getLoopPreheader(); + StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); @@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); - if (!UnrollProlog) { + if (CreateRemainderLoop) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); @@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); if (NewLoop) @@ -176,19 +305,20 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); - } + if (Latch == *BB) { - // For the last block, if UnrollProlog is true, create a direct jump to - // InsertBot. If not, create a loop back to cloned head. + // For the last block, if CreateRemainderLoop is false, create a direct + // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); - if (UnrollProlog) { + if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { - PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", + PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, + suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), @@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - if (UnrollProlog) { - VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); - cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); + if (!CreateRemainderLoop) { + if (UseEpilogRemainder) { + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + NewPHI->removeIncomingValue(Latch, false); + } else { + VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); + cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); + } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); @@ -217,8 +353,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); - if (VMap[InVal]) - NewPHI->setIncomingValue(idx, VMap[InVal]); + if (Value *V = VMap.lookup(InVal)) + NewPHI->setIncomingValue(idx, V); } } if (NewLoop) { @@ -254,11 +390,11 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, } } -/// Insert code in the prolog code when unrolling a loop with a +/// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number -/// of loop bodes in the loop after unrolling. (Some folks refer +/// of loop bodies in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, @@ -266,37 +402,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// +/// ***Prolog case*** /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: -/// else jump Prol +/// else jump Prol: /// Prol: LoopBody; /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. -/// if (tripcount < loopfactor) jump End +/// if (tripcount < loopfactor) jump End: /// Loop: /// ... /// End: /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, - bool AllowExpensiveTripCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - bool PreserveLCSSA) { +/// ***Epilog case*** +/// extraiters = tripcount % loopfactor +/// if (tripcount < loopfactor) jump LoopExit: +/// unroll_iters = tripcount - extraiters +/// Loop: LoopBody; (executes unroll_iter times); +/// unroll_iter -= 1 +/// if (unroll_iter != 0) jump Loop: +/// LoopExit: +/// if (extraiters == 0) jump EpilExit: +/// Epil: LoopBody; (executes extraiters times) +/// extraiters -= 1 // Omitted if unroll factor is 2. +/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. +/// EpilExit: + +bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, + bool UseEpilogRemainder, + LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. - if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) + if (!L->isLoopSimplifyForm()) + return false; + BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop + if (!Exit) return false; - // Use Scalar Evolution to compute the trip count. This allows more - // loops to be unrolled than relying on induction var simplification + // Use Scalar Evolution to compute the trip count. This allows more loops to + // be unrolled than relying on induction var simplification. if (!SE) return false; - // Only unroll loops with a computable trip count and the trip count needs - // to be an int value (allowing a pointer type is a TODO item) + // Only unroll loops with a computable trip count, and the trip count needs + // to be an int value (allowing a pointer type is a TODO item). const SCEV *BECountSC = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) @@ -304,21 +459,19 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); - // Add 1 since the backedge count doesn't include the first loop iteration + // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); + BasicBlock *PreHeader = L->getLoopPreheader(); + BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); - if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) - return false; - - // We only handle cases when the unroll factor is a power of 2. - // Count is the loop unroll factor, the number of extra copies added + 1. - if (!isPowerOf2_32(Count)) + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // This constraint lets us deal with an overflowing trip count easily; see the @@ -326,51 +479,115 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, if (Log2_32(Count) > BEWidth) return false; - // If this loop is nested, then the loop unroller changes the code in - // parent loop, so the Scalar Evolution pass needs to be run again + // If this loop is nested, then the loop unroller changes the code in the + // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); - BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); - // It helps to splits the original preheader twice, one for the end of the - // prolog code and one for a new loop preheader - BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); - BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); - BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + // Loop structure is the following: + // + // PreHeader + // Header + // ... + // Latch + // Exit + + BasicBlock *NewPreHeader; + BasicBlock *NewExit = nullptr; + BasicBlock *PrologExit = nullptr; + BasicBlock *EpilogPreHeader = nullptr; + BasicBlock *PrologPreHeader = nullptr; + + if (UseEpilogRemainder) { + // If epilog remainder + // Split PreHeader to insert a branch around loop for unrolling. + NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + // Split Exit to create phi nodes from branch above. + SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); + NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", + DT, LI, PreserveLCSSA); + // Split NewExit to insert epilog remainder loop. + EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); + } else { + // If prolog remainder + // Split the original preheader twice to insert prolog remainder loop + PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); + PrologPreHeader->setName(Header->getName() + ".prol.preheader"); + PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), + DT, LI); + PrologExit->setName(Header->getName() + ".prol.loopexit"); + // Split PrologExit to get NewPreHeader. + NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); + NewPreHeader->setName(PreHeader->getName() + ".new"); + } + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // *NewPreHeader *PrologPreHeader + // Header *PrologExit + // ... *NewPreHeader + // Latch Header + // *NewExit ... + // *EpilogPreHeader Latch + // Exit Exit + + // Calculate conditions for branch around loop for unrolling + // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: - // extra iterations = run-time trip count % (loop unroll factor + 1) + // extra iterations = run-time trip count % loop unroll factor + PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); - IRBuilder<> B(PreHeaderBR); - Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); - - // If ModVal is zero, we know that either - // 1. there are no iteration to be run in the prologue loop - // OR - // 2. the addition computing TripCount overflowed - // - // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the - // number of iterations that remain to be run in the original loop is a - // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we - // explicitly check this above). - - Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod"); - - // Branch to either the extra iterations or the cloned/unrolled loop - // We will fix up the true branch label when adding loop body copies - B.CreateCondBr(BranchVal, PEnd, PEnd); - assert(PreHeaderBR->isUnconditional() && - PreHeaderBR->getSuccessor(0) == PEnd && - "CFG edges in Preheader are not correct"); + Value *ModVal; + // Calculate ModVal = (BECount + 1) % Count. + // Note that TripCount is BECount + 1. + if (isPowerOf2_32(Count)) { + // When Count is power of 2 we don't BECount for epilog case, however we'll + // need it for a branch around unrolling loop for prolog case. + ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + // 1. There are no iterations to be run in the prolog/epilog loop. + // OR + // 2. The addition computing TripCount overflowed. + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so + // the number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we + // explicitly check this above). + } else { + // As (BECount + 1) can potentially unsigned overflow we count + // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. + Value *ModValTmp = B.CreateURem(BECount, + ConstantInt::get(BECount->getType(), + Count)); + Value *ModValAdd = B.CreateAdd(ModValTmp, + ConstantInt::get(ModValTmp->getType(), 1)); + // At that point (BECount % Count) + 1 could be equal to Count. + // To handle this case we need to take mod by Count one more time. + ModVal = B.CreateURem(ModValAdd, + ConstantInt::get(BECount->getType(), Count), + "xtraiter"); + } + Value *BranchVal = + UseEpilogRemainder ? B.CreateICmpULT(BECount, + ConstantInt::get(BECount->getType(), + Count - 1)) : + B.CreateIsNotNull(ModVal, "lcmp.mod"); + BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; + BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; + // Branch to either remainder (extra iterations) loop or unrolling loop. + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the - // cloned blocks in the prolog code + // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); @@ -382,34 +599,80 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; - bool UnrollPrologue = Count == 2; + // For unroll factor 2 remainder loop will have 1 iterations. + // Do not create 1 iteration loop. + bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. - CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks, - VMap, LI); - - // Insert the cloned blocks into function just before the original loop - F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(), - NewBlocks[0]->getIterator(), F->end()); - - // Rewrite the cloned instruction operands to use the values - // created when the clone is created. - for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { - for (BasicBlock::iterator I = NewBlocks[i]->begin(), - E = NewBlocks[i]->end(); - I != E; ++I) { - RemapInstruction(&*I, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; + BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; + CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + + // Insert the cloned blocks into the function. + F->getBasicBlockList().splice(InsertBot->getIterator(), + F->getBasicBlockList(), + NewBlocks[0]->getIterator(), + F->end()); + + // Loop structure should be the following: + // Epilog Prolog + // + // PreHeader PreHeader + // NewPreHeader PrologPreHeader + // Header PrologHeader + // ... ... + // Latch PrologLatch + // NewExit PrologExit + // EpilogPreHeader NewPreHeader + // EpilogHeader Header + // ... ... + // EpilogLatch Latch + // Exit Exit + + // Rewrite the cloned instruction operands to use the values created when the + // clone is created. + for (BasicBlock *BB : NewBlocks) { + for (Instruction &I : *BB) { + RemapInstruction(&I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } - // Connect the prolog code to the original loop and update the - // PHI functions. - BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); - ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI, - PreserveLCSSA); + if (UseEpilogRemainder) { + // Connect the epilog code to the original loop and update the + // PHI functions. + ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, + EpilogPreHeader, NewPreHeader, VMap, DT, LI, + PreserveLCSSA); + + // Update counter in loop for unrolling. + // I should be multiply of Count. + IRBuilder<> B2(NewPreHeader->getTerminator()); + Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + B2.SetInsertPoint(LatchBR); + PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", + Header->getFirstNonPHI()); + Value *IdxSub = + B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".nsub"); + Value *IdxCmp; + if (LatchBR->getSuccessor(0) == Header) + IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); + else + IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); + NewIdx->addIncoming(TestVal, NewPreHeader); + NewIdx->addIncoming(IdxSub, Latch); + LatchBR->setCondition(IdxCmp); + } else { + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, + VMap, DT, LI, PreserveLCSSA); + } NumRuntimeUnrolled++; return true; } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index fa958e913b7bd..3902c67c6a013 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -11,13 +11,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -423,7 +430,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, default: return InstDesc(false, I); case Instruction::PHI: - return InstDesc(I, Prev.getMinMaxKind()); + return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); case Instruction::Sub: case Instruction::Add: return InstDesc(Kind == RK_IntegerAdd, I); @@ -466,12 +473,10 @@ bool RecurrenceDescriptor::hasMultipleUsesOf( bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes) { - bool HasFunNoNaNAttr = false; BasicBlock *Header = TheLoop->getHeader(); Function &F = *Header->getParent(); - if (F.hasFnAttribute("no-nans-fp-math")) - HasFunNoNaNAttr = - F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + bool HasFunNoNaNAttr = + F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); @@ -514,6 +519,43 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, return false; } +bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT) { + + // Ensure the phi node is in the loop header and has two incoming values. + if (Phi->getParent() != TheLoop->getHeader() || + Phi->getNumIncomingValues() != 2) + return false; + + // Ensure the loop has a preheader and a single latch block. The loop + // vectorizer will need the latch to set up the next iteration of the loop. + auto *Preheader = TheLoop->getLoopPreheader(); + auto *Latch = TheLoop->getLoopLatch(); + if (!Preheader || !Latch) + return false; + + // Ensure the phi node's incoming blocks are the loop preheader and latch. + if (Phi->getBasicBlockIndex(Preheader) < 0 || + Phi->getBasicBlockIndex(Latch) < 0) + return false; + + // Get the previous value. The previous value comes from the latch edge while + // the initial value comes form the preheader edge. + auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); + if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) + return false; + + // Ensure every user of the phi node is dominated by the previous value. The + // dominance requirement ensures the loop vectorizer will not need to + // vectorize the initial value prior to the first iteration of the loop. + for (User *U : Phi->users()) + if (auto *I = dyn_cast<Instruction>(U)) + if (!DT->dominates(Previous, I)) + return false; + + return true; +} + /// This function returns the identity element (or neutral element) for /// the operation K. Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, @@ -612,61 +654,120 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - ConstantInt *Step) - : StartValue(Start), IK(K), StepValue(Step) { + const SCEV *Step) + : StartValue(Start), IK(K), Step(Step) { assert(IK != IK_NoInduction && "Not an induction"); + + // Start value type should match the induction kind and the value + // itself should not be null. assert(StartValue && "StartValue is null"); - assert(StepValue && !StepValue->isZero() && "StepValue is zero"); assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && "StartValue is not a pointer for pointer induction"); assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && "StartValue is not an integer for integer induction"); - assert(StepValue->getType()->isIntegerTy() && - "StepValue is not an integer"); + + // Check the Step Value. It should be non-zero integer value. + assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && + "Step value is zero"); + + assert((IK != IK_PtrInduction || getConstIntStepValue()) && + "Step value should be constant for pointer induction"); + assert(Step->getType()->isIntegerTy() && "StepValue is not an integer"); } int InductionDescriptor::getConsecutiveDirection() const { - if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) - return StepValue->getSExtValue(); + ConstantInt *ConstStep = getConstIntStepValue(); + if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) + return ConstStep->getSExtValue(); return 0; } -Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const { +ConstantInt *InductionDescriptor::getConstIntStepValue() const { + if (isa<SCEVConstant>(Step)) + return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); + return nullptr; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, + ScalarEvolution *SE, + const DataLayout& DL) const { + + SCEVExpander Exp(*SE, DL, "induction"); switch (IK) { - case IK_IntInduction: + case IK_IntInduction: { assert(Index->getType() == StartValue->getType() && "Index type does not match StartValue type"); - if (StepValue->isMinusOne()) - return B.CreateSub(StartValue, Index); - if (!StepValue->isOne()) - Index = B.CreateMul(Index, StepValue); - return B.CreateAdd(StartValue, Index); - case IK_PtrInduction: - assert(Index->getType() == StepValue->getType() && + // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution + // and calculate (Start + Index * Step) for all cases, without + // special handling for "isOne" and "isMinusOne". + // But in the real life the result code getting worse. We mix SCEV + // expressions and ADD/SUB operations and receive redundant + // intermediate values being calculated in different ways and + // Instcombine is unable to reduce them all. + + if (getConstIntStepValue() && + getConstIntStepValue()->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (getConstIntStepValue() && + getConstIntStepValue()->isOne()) + return B.CreateAdd(StartValue, Index); + const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), + SE->getMulExpr(Step, SE->getSCEV(Index))); + return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); + } + case IK_PtrInduction: { + assert(Index->getType() == Step->getType() && "Index type does not match StepValue type"); - if (StepValue->isMinusOne()) - Index = B.CreateNeg(Index); - else if (!StepValue->isOne()) - Index = B.CreateMul(Index, StepValue); + assert(isa<SCEVConstant>(Step) && + "Expected constant step for pointer induction"); + const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); + Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); return B.CreateGEP(nullptr, StartValue, Index); - + } case IK_NoInduction: return nullptr; } llvm_unreachable("invalid enum"); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, - InductionDescriptor &D) { +bool InductionDescriptor::isInductionPHI(PHINode *Phi, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, + bool Assume) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + const SCEV *PhiScev = PSE.getSCEV(Phi); + const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + + // We need this expression to be an AddRecExpr. + if (Assume && !AR) + AR = PSE.getAsAddRec(Phi); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + return isInductionPHI(Phi, PSE.getSE(), D, AR); +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, + ScalarEvolution *SE, + InductionDescriptor &D, + const SCEV *Expr) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return false; // Check that the PHI is consecutive. - const SCEV *PhiScev = SE->getSCEV(Phi); + const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; @@ -678,17 +779,22 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. - const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); - if (!C) + // The stride may be a constant or a loop invariant integer value. + const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); + if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop())) return false; - ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, CV); + D = InductionDescriptor(StartValue, IK_IntInduction, Step); return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + // Pointer induction should be a constant. + if (!ConstStep) + return false; + + ConstantInt *CV = ConstStep->getValue(); Type *PointerElementType = PhiTy->getPointerElementType(); // The pointer stride cannot be determined if the pointer element type is not // sized. @@ -703,8 +809,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; - auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); - + auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, + true /* signed */); D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); return true; } @@ -727,3 +833,137 @@ SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { return UsedOutside; } + +void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { + // By definition, all loop passes need the LoopInfo analysis and the + // Dominator tree it depends on. Because they all participate in the loop + // pass manager, they must also preserve these. + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + // We must also preserve LoopSimplify and LCSSA. We locally access their IDs + // here because users shouldn't directly get them from this header. + extern char &LoopSimplifyID; + extern char &LCSSAID; + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + + // Loop passes are designed to run inside of a loop pass manager which means + // that any function analyses they require must be required by the first loop + // pass in the manager (so that it is computed before the loop pass manager + // runs) and preserved by all loop pasess in the manager. To make this + // reasonably robust, the set needed for most loop passes is maintained here. + // If your loop pass requires an analysis not listed here, you will need to + // carefully audit the loop pass manager nesting structure that results. + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); +} + +/// Manually defined generic "LoopPass" dependency initialization. This is used +/// to initialize the exact set of passes from above in \c +/// getLoopAnalysisUsage. It can be used within a loop pass's initialization +/// with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LoopPass) +/// +/// As-if "LoopPass" were a pass. +void llvm::initializeLoopPassPass(PassRegistry &Registry) { + INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + INITIALIZE_PASS_DEPENDENCY(LoopSimplify) + INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) + INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) + INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +} + +/// \brief Find string metadata for loop +/// +/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an +/// operand or null otherwise. If the string metadata is not found return +/// Optional's not-a-value. +Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, + StringRef Name) { + MDNode *LoopID = TheLoop->getLoopID(); + // Return none if LoopID is false. + if (!LoopID) + return None; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + // Iterate over LoopID operands and look for MDString Metadata + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + // Return true if MDString holds expected MetaData. + if (Name.equals(S->getString())) + switch (MD->getNumOperands()) { + case 1: + return nullptr; + case 2: + return &MD->getOperand(1); + default: + llvm_unreachable("loop metadata has 0 or 1 operand"); + } + } + return None; +} + +/// Returns true if the instruction in a loop is guaranteed to execute at least +/// once. +bool llvm::isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo) { + // We have to check to make sure that the instruction dominates all + // of the exit blocks. If it doesn't, then there is a path out of the loop + // which does not execute this instruction, so we can't hoist it. + + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (Inst.getParent() == CurLoop->getHeader()) + // If there's a throw in the header block, we can't guarantee we'll reach + // Inst. + return !SafetyInfo->HeaderMayThrow; + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (SafetyInfo->MayThrow) + return false; + + // Get the exit blocks for the current loop. + SmallVector<BasicBlock *, 8> ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + // Verify that the block dominates each of the exit blocks of the loop. + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(Inst.getParent(), ExitBlock)) + return false; + + // As a degenerate case, if the loop is statically infinite then we haven't + // proven anything since there are no exit blocks. + if (ExitBlocks.empty()) + return false; + + // FIXME: In general, we have to prove that the loop isn't an infinite loop. + // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is + // just a special case of this.) + return true; +} diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index 9a2a06cf68915..b3c61691da30a 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -18,11 +18,18 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; +static cl::opt<bool> + AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), + cl::Hidden, + cl::desc("Add no-alias annotation for instructions that " + "are disambiguated by memchecks")); + LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, bool UseLAIChecks) @@ -32,12 +39,12 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, assert(L->getLoopPreheader() && "No preheader"); if (UseLAIChecks) { setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.PSE.getUnionPredicate()); + setSCEVChecks(LAI.getPSE().getUnionPredicate()); } } void LoopVersioning::setAliasChecks( - const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) { + SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) { AliasChecks = std::move(Checks); } @@ -56,9 +63,8 @@ void LoopVersioning::versionLoop( BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); std::tie(FirstCheckInst, MemRuntimeCheck) = LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); - assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); + const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate(); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = @@ -71,7 +77,7 @@ void LoopVersioning::versionLoop( if (MemRuntimeCheck && SCEVRuntimeCheck) { RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, - SCEVRuntimeCheck, "ldist.safe"); + SCEVRuntimeCheck, "lver.safe"); if (auto *I = dyn_cast<Instruction>(RuntimeCheck)) I->insertBefore(RuntimeCheckBB->getTerminator()); } else @@ -119,16 +125,14 @@ void LoopVersioning::addPHINodes( const SmallVectorImpl<Instruction *> &DefsUsedOutside) { BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); assert(PHIBlock && "No single successor to loop exit block"); + PHINode *PN; + // First add a single-operand PHI for each DefsUsedOutside if one does not + // exists yet. for (auto *Inst : DefsUsedOutside) { - auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]); - PHINode *PN; - - // First see if we have a single-operand PHI with the value defined by the + // See if we have a single-operand PHI with the value defined by the // original loop. for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { - assert(PN->getNumOperands() == 1 && - "Exit block should only have on predecessor"); if (PN->getIncomingValue(0) == Inst) break; } @@ -141,7 +145,179 @@ void LoopVersioning::addPHINodes( User->replaceUsesOfWith(Inst, PN); PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); } - // Add the new incoming value from the non-versioned loop. - PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock()); } + + // Then for each PHI add the operand for the edge from the cloned loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + assert(PN->getNumOperands() == 1 && + "Exit block should only have on predecessor"); + + // If the definition was cloned used that otherwise use the same value. + Value *ClonedValue = PN->getIncomingValue(0); + auto Mapped = VMap.find(ClonedValue); + if (Mapped != VMap.end()) + ClonedValue = Mapped->second; + + PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock()); + } +} + +void LoopVersioning::prepareNoAliasMetadata() { + // We need to turn the no-alias relation between pointer checking groups into + // no-aliasing annotations between instructions. + // + // We accomplish this by mapping each pointer checking group (a set of + // pointers memchecked together) to an alias scope and then also mapping each + // group to the list of scopes it can't alias. + + const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + + // First allocate an aliasing scope for each pointer checking group. + // + // While traversing through the checking groups in the loop, also create a + // reverse map from pointers to the pointer checking group they were assigned + // to. + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); + + for (const auto &Group : RtPtrChecking->CheckingGroups) { + GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); + + for (unsigned PtrIdx : Group.Members) + PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; + } + + // Go through the checks and for each pointer group, collect the scopes for + // each non-aliasing pointer group. + DenseMap<const RuntimePointerChecking::CheckingPtrGroup *, + SmallVector<Metadata *, 4>> + GroupToNonAliasingScopes; + + for (const auto &Check : AliasChecks) + GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); + + // Finally, transform the above to actually map to scope list which is what + // the metadata uses. + + for (auto Pair : GroupToNonAliasingScopes) + GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second); +} + +void LoopVersioning::annotateLoopWithNoAlias() { + if (!AnnotateNoAlias) + return; + + // First prepare the maps. + prepareNoAliasMetadata(); + + // Add the scope and no-alias metadata to the instructions. + for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { + annotateInstWithNoAlias(I); + } +} + +void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, + const Instruction *OrigInst) { + if (!AnnotateNoAlias) + return; + + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + const Value *Ptr = isa<LoadInst>(OrigInst) + ? cast<LoadInst>(OrigInst)->getPointerOperand() + : cast<StoreInst>(OrigInst)->getPointerOperand(); + + // Find the group for the pointer and then add the scope metadata. + auto Group = PtrToGroup.find(Ptr); + if (Group != PtrToGroup.end()) { + VersionedInst->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, GroupToScope[Group->second]))); + + // Add the no-alias metadata. + auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); + if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) + VersionedInst->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate( + VersionedInst->getMetadata(LLVMContext::MD_noalias), + NonAliasingScopeList->second)); + } +} + +namespace { +/// \brief Also expose this is a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningPass : public FunctionPass { +public: + LoopVersioningPass() : FunctionPass(ID) { + initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + const LoopAccessInfo &LAI = LAA->getInfo(L); + if (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) { + LoopVersioning LVer(LAI, L, LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + } + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequired<LoopAccessLegacyAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + } + + static char ID; +}; +} + +#define LVER_OPTION "loop-versioning" +#define DEBUG_TYPE LVER_OPTION + +char LoopVersioningPass::ID; +static const char LVer_name[] = "Loop Versioning"; + +INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) + +namespace llvm { +FunctionPass *createLoopVersioningPass() { + return new LoopVersioningPass(); +} } diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index b0ad4d5e84a1b..1b31c5ae580a1 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -14,14 +14,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; #define DEBUG_TYPE "lowerinvoke" @@ -53,8 +52,8 @@ FunctionPass *llvm::createLowerInvokePass() { bool LowerInvoke::runOnFunction(Function &F) { bool Changed = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + for (BasicBlock &BB : F) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), @@ -69,10 +68,10 @@ bool LowerInvoke::runOnFunction(Function &F) { BranchInst::Create(II->getNormalDest(), II); // Remove any PHI node entries from the exception destination. - II->getUnwindDest()->removePredecessor(&*BB); + II->getUnwindDest()->removePredecessor(&BB); // Remove the invoke instruction now. - BB->getInstList().erase(II); + BB.getInstList().erase(II); ++NumInvokes; Changed = true; } diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 52beb1542497e..5c07469869ff7 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -59,12 +59,6 @@ namespace { bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - // This is a cluster of orthogonal Transforms - AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreservedID(LowerInvokePassID); - } - struct CaseRange { ConstantInt* Low; ConstantInt* High; @@ -192,8 +186,8 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } // Remove incoming values in the reverse order to prevent invalidating // *successive* index. - for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III) - PN->removeIncomingValue(*III); + for (unsigned III : reverse(Indices)) + PN->removeIncomingValue(III); } } diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile deleted file mode 100644 index d1e9336d67f02..0000000000000 --- a/lib/Transforms/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMTransformUtils -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index aa1e35ddba024..1419254bcb4f4 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -12,12 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; @@ -26,51 +27,11 @@ using namespace llvm; STATISTIC(NumPromoted, "Number of alloca's promoted"); -namespace { - struct PromotePass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - PromotePass() : FunctionPass(ID) { - initializePromotePassPass(*PassRegistry::getPassRegistry()); - } - - // runOnFunction - To run this pass, first we calculate the alloca - // instructions that are safe for promotion, then we promote each one. - // - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.setPreservesCFG(); - // This is a cluster of orthogonal Transforms - AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreservedID(LowerSwitchID); - AU.addPreservedID(LowerInvokePassID); - } - }; -} // end of anonymous namespace - -char PromotePass::ID = 0; -INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", - false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", - false, false) - -bool PromotePass::runOnFunction(Function &F) { - std::vector<AllocaInst*> Allocas; - - BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - - if (F.hasFnAttribute(Attribute::OptimizeNone)) - return false; - - bool Changed = false; - - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - AssumptionCache &AC = - getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); +static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, + AssumptionCache &AC) { + std::vector<AllocaInst *> Allocas; + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + bool Changed = false; while (1) { Allocas.clear(); @@ -78,22 +39,69 @@ bool PromotePass::runOnFunction(Function &F) { // Find allocas that are safe to promote, by looking at all instructions in // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? if (isAllocaPromotable(AI)) Allocas.push_back(AI); - if (Allocas.empty()) break; + if (Allocas.empty()) + break; PromoteMemToReg(Allocas, DT, nullptr, &AC); NumPromoted += Allocas.size(); Changed = true; } - return Changed; } +PreservedAnalyses PromotePass::run(Function &F, AnalysisManager<Function> &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + if (!promoteMemoryToRegister(F, DT, AC)) + return PreservedAnalyses::all(); + + // FIXME: This should also 'preserve the CFG'. + return PreservedAnalyses::none(); +} + +namespace { +struct PromoteLegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromoteLegacyPass() : FunctionPass(ID) { + initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + return promoteMemoryToRegister(F, DT, AC); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.setPreservesCFG(); + } + }; +} // end of anonymous namespace + +char PromoteLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " + "Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", + false, false) + // createPromoteMemoryToRegister - Provide an entry point to create this pass. // FunctionPass *llvm::createPromoteMemoryToRegisterPass() { - return new PromotePass(); + return new PromoteLegacyPass(); } diff --git a/lib/Transforms/Utils/MemorySSA.cpp b/lib/Transforms/Utils/MemorySSA.cpp new file mode 100644 index 0000000000000..8ba3cae43b188 --- /dev/null +++ b/lib/Transforms/Utils/MemorySSA.cpp @@ -0,0 +1,1361 @@ +//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSA class. +// +//===----------------------------------------------------------------===// +#include "llvm/Transforms/Utils/MemorySSA.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include <algorithm> + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups"); +STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits"); +STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts"); + +INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) + +INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) + +static cl::opt<bool> + VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Verify MemorySSA in legacy printer pass.")); + +namespace llvm { +/// \brief An assembly annotator class to print Memory SSA information in +/// comments. +class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { + friend class MemorySSA; + const MemorySSA *MSSA; + +public: + MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) + OS << "; " << *MA << "\n"; + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + OS << "; " << *MA << "\n"; + } +}; + +/// \brief A MemorySSAWalker that does AA walks and caching of lookups to +/// disambiguate accesses. +/// +/// FIXME: The current implementation of this can take quadratic space in rare +/// cases. This can be fixed, but it is something to note until it is fixed. +/// +/// In order to trigger this behavior, you need to store to N distinct locations +/// (that AA can prove don't alias), perform M stores to other memory +/// locations that AA can prove don't alias any of the initial N locations, and +/// then load from all of the N locations. In this case, we insert M cache +/// entries for each of the N loads. +/// +/// For example: +/// define i32 @foo() { +/// %a = alloca i32, align 4 +/// %b = alloca i32, align 4 +/// store i32 0, i32* %a, align 4 +/// store i32 0, i32* %b, align 4 +/// +/// ; Insert M stores to other memory that doesn't alias %a or %b here +/// +/// %c = load i32, i32* %a, align 4 ; Caches M entries in +/// ; CachedUpwardsClobberingAccess for the +/// ; MemoryLocation %a +/// %d = load i32, i32* %b, align 4 ; Caches M entries in +/// ; CachedUpwardsClobberingAccess for the +/// ; MemoryLocation %b +/// +/// ; For completeness' sake, loading %a or %b again would not cache *another* +/// ; M entries. +/// %r = add i32 %c, %d +/// ret i32 %r +/// } +class MemorySSA::CachingWalker final : public MemorySSAWalker { +public: + CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); + ~CachingWalker() override; + + MemoryAccess *getClobberingMemoryAccess(const Instruction *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + MemoryLocation &) override; + void invalidateInfo(MemoryAccess *) override; + +protected: + struct UpwardsMemoryQuery; + MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &, + const MemoryLocation &); + + void doCacheInsert(const MemoryAccess *, MemoryAccess *, + const UpwardsMemoryQuery &, const MemoryLocation &); + + void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &, + const MemoryLocation &); + +private: + MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &, + UpwardsMemoryQuery &, bool); + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); + bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &, + const MemoryLocation &Loc) const; + void verifyRemoved(MemoryAccess *); + SmallDenseMap<ConstMemoryAccessPair, MemoryAccess *> + CachedUpwardsClobberingAccess; + DenseMap<const MemoryAccess *, MemoryAccess *> CachedUpwardsClobberingCall; + AliasAnalysis *AA; + DominatorTree *DT; +}; +} + +namespace { +struct RenamePassData { + DomTreeNode *DTN; + DomTreeNode::const_iterator ChildIt; + MemoryAccess *IncomingVal; + + RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, + MemoryAccess *M) + : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { + std::swap(DTN, RHS.DTN); + std::swap(ChildIt, RHS.ChildIt); + std::swap(IncomingVal, RHS.IncomingVal); + } +}; +} + +namespace llvm { +/// \brief Rename a single basic block into MemorySSA form. +/// Uses the standard SSA renaming algorithm. +/// \returns The new incoming value. +MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, + MemoryAccess *IncomingVal) { + auto It = PerBlockAccesses.find(BB); + // Skip most processing if the list is empty. + if (It != PerBlockAccesses.end()) { + AccessList *Accesses = It->second.get(); + for (MemoryAccess &L : *Accesses) { + switch (L.getValueID()) { + case Value::MemoryUseVal: + cast<MemoryUse>(&L)->setDefiningAccess(IncomingVal); + break; + case Value::MemoryDefVal: + // We can't legally optimize defs, because we only allow single + // memory phis/uses on operations, and if we optimize these, we can + // end up with multiple reaching defs. Uses do not have this + // problem, since they do not produce a value + cast<MemoryDef>(&L)->setDefiningAccess(IncomingVal); + IncomingVal = &L; + break; + case Value::MemoryPhiVal: + IncomingVal = &L; + break; + } + } + } + + // Pass through values to our successors + for (const BasicBlock *S : successors(BB)) { + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + Phi->addIncoming(IncomingVal, BB); + } + + return IncomingVal; +} + +/// \brief This is the standard SSA renaming algorithm. +/// +/// We walk the dominator tree in preorder, renaming accesses, and then filling +/// in phi nodes in our successors. +void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, + SmallPtrSet<BasicBlock *, 16> &Visited) { + SmallVector<RenamePassData, 32> WorkStack; + IncomingVal = renameBlock(Root->getBlock(), IncomingVal); + WorkStack.push_back({Root, Root->begin(), IncomingVal}); + Visited.insert(Root->getBlock()); + + while (!WorkStack.empty()) { + DomTreeNode *Node = WorkStack.back().DTN; + DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; + IncomingVal = WorkStack.back().IncomingVal; + + if (ChildIt == Node->end()) { + WorkStack.pop_back(); + } else { + DomTreeNode *Child = *ChildIt; + ++WorkStack.back().ChildIt; + BasicBlock *BB = Child->getBlock(); + Visited.insert(BB); + IncomingVal = renameBlock(BB, IncomingVal); + WorkStack.push_back({Child, Child->begin(), IncomingVal}); + } + } +} + +/// \brief Compute dominator levels, used by the phi insertion algorithm above. +void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) { + for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); + DFI != DFE; ++DFI) + DomLevels[*DFI] = DFI.getPathLength() - 1; +} + +/// \brief This handles unreachable block accesses by deleting phi nodes in +/// unreachable blocks, and marking all other unreachable MemoryAccess's as +/// being uses of the live on entry definition. +void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { + assert(!DT->isReachableFromEntry(BB) && + "Reachable block found while handling unreachable blocks"); + + // Make sure phi nodes in our reachable successors end up with a + // LiveOnEntryDef for our incoming edge, even though our block is forward + // unreachable. We could just disconnect these blocks from the CFG fully, + // but we do not right now. + for (const BasicBlock *S : successors(BB)) { + if (!DT->isReachableFromEntry(S)) + continue; + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + Phi->addIncoming(LiveOnEntryDef.get(), BB); + } + + auto It = PerBlockAccesses.find(BB); + if (It == PerBlockAccesses.end()) + return; + + auto &Accesses = It->second; + for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { + auto Next = std::next(AI); + // If we have a phi, just remove it. We are going to replace all + // users with live on entry. + if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) + UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); + else + Accesses->erase(AI); + AI = Next; + } +} + +MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) + : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + NextID(0) { + buildMemorySSA(); +} + +MemorySSA::MemorySSA(MemorySSA &&MSSA) + : AA(MSSA.AA), DT(MSSA.DT), F(MSSA.F), + ValueToMemoryAccess(std::move(MSSA.ValueToMemoryAccess)), + PerBlockAccesses(std::move(MSSA.PerBlockAccesses)), + LiveOnEntryDef(std::move(MSSA.LiveOnEntryDef)), + Walker(std::move(MSSA.Walker)), NextID(MSSA.NextID) { + // Update the Walker MSSA pointer so it doesn't point to the moved-from MSSA + // object any more. + Walker->MSSA = this; +} + +MemorySSA::~MemorySSA() { + // Drop all our references + for (const auto &Pair : PerBlockAccesses) + for (MemoryAccess &MA : *Pair.second) + MA.dropAllReferences(); +} + +MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { + auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); + + if (Res.second) + Res.first->second = make_unique<AccessList>(); + return Res.first->second.get(); +} + +void MemorySSA::buildMemorySSA() { + // We create an access to represent "live on entry", for things like + // arguments or users of globals, where the memory they use is defined before + // the beginning of the function. We do not actually insert it into the IR. + // We do not define a live on exit for the immediate uses, and thus our + // semantics do *not* imply that something with no immediate uses can simply + // be removed. + BasicBlock &StartingPoint = F.getEntryBlock(); + LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++); + + // We maintain lists of memory accesses per-block, trading memory for time. We + // could just look up the memory access for every possible instruction in the + // stream. + SmallPtrSet<BasicBlock *, 32> DefiningBlocks; + SmallPtrSet<BasicBlock *, 32> DefUseBlocks; + // Go through each block, figure out where defs occur, and chain together all + // the accesses. + for (BasicBlock &B : F) { + bool InsertIntoDef = false; + AccessList *Accesses = nullptr; + for (Instruction &I : B) { + MemoryUseOrDef *MUD = createNewAccess(&I); + if (!MUD) + continue; + InsertIntoDef |= isa<MemoryDef>(MUD); + + if (!Accesses) + Accesses = getOrCreateAccessList(&B); + Accesses->push_back(MUD); + } + if (InsertIntoDef) + DefiningBlocks.insert(&B); + if (Accesses) + DefUseBlocks.insert(&B); + } + + // Compute live-in. + // Live in is normally defined as "all the blocks on the path from each def to + // each of it's uses". + // MemoryDef's are implicit uses of previous state, so they are also uses. + // This means we don't really have def-only instructions. The only + // MemoryDef's that are not really uses are those that are of the LiveOnEntry + // variable (because LiveOnEntry can reach anywhere, and every def is a + // must-kill of LiveOnEntry). + // In theory, you could precisely compute live-in by using alias-analysis to + // disambiguate defs and uses to see which really pair up with which. + // In practice, this would be really expensive and difficult. So we simply + // assume all defs are also uses that need to be kept live. + // Because of this, the end result of this live-in computation will be "the + // entire set of basic blocks that reach any use". + + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(DefUseBlocks.begin(), + DefUseBlocks.end()); + // Now that we have a set of blocks where a value is live-in, recursively add + // predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. + LiveInBlockWorklist.append(pred_begin(BB), pred_end(BB)); + } + + // Determine where our MemoryPhi's should go + ForwardIDFCalculator IDFs(*DT); + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.setLiveInBlocks(LiveInBlocks); + SmallVector<BasicBlock *, 32> IDFBlocks; + IDFs.calculate(IDFBlocks); + + // Now place MemoryPhi nodes. + for (auto &BB : IDFBlocks) { + // Insert phi node + AccessList *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + ValueToMemoryAccess.insert(std::make_pair(BB, Phi)); + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + } + + // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get + // filled in with all blocks. + SmallPtrSet<BasicBlock *, 16> Visited; + renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); + + MemorySSAWalker *Walker = getWalker(); + + // Now optimize the MemoryUse's defining access to point to the nearest + // dominating clobbering def. + // This ensures that MemoryUse's that are killed by the same store are + // immediate users of that store, one of the invariants we guarantee. + for (auto DomNode : depth_first(DT)) { + BasicBlock *BB = DomNode->getBlock(); + auto AI = PerBlockAccesses.find(BB); + if (AI == PerBlockAccesses.end()) + continue; + AccessList *Accesses = AI->second.get(); + for (auto &MA : *Accesses) { + if (auto *MU = dyn_cast<MemoryUse>(&MA)) { + Instruction *Inst = MU->getMemoryInst(); + MU->setDefiningAccess(Walker->getClobberingMemoryAccess(Inst)); + } + } + } + + // Mark the uses in unreachable blocks as live on entry, so that they go + // somewhere. + for (auto &BB : F) + if (!Visited.count(&BB)) + markUnreachableAsLiveOnEntry(&BB); +} + +MemorySSAWalker *MemorySSA::getWalker() { + if (Walker) + return Walker.get(); + + Walker = make_unique<CachingWalker>(this, AA, DT); + return Walker.get(); +} + +MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { + assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); + AccessList *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + ValueToMemoryAccess.insert(std::make_pair(BB, Phi)); + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + return Phi; +} + +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); + MemoryUseOrDef *NewAccess = createNewAccess(I); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + NewAccess->setDefiningAccess(Definition); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I, + MemoryAccess *Definition, + const BasicBlock *BB, + InsertionPlace Point) { + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // It goes after any phi nodes + auto AI = std::find_if( + Accesses->begin(), Accesses->end(), + [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); }); + + Accesses->insert(AI, NewAccess); + } else { + Accesses->push_back(NewAccess); + } + + return NewAccess; +} +MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insert(AccessList::iterator(InsertPt), NewAccess); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess); + return NewAccess; +} + +/// \brief Helper function to create new memory accesses +MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { + // The assume intrinsic has a control dependency which we model by claiming + // that it writes arbitrarily. Ignore that fake memory dependency here. + // FIXME: Replace this special casing with a more accurate modelling of + // assume's control dependency. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::assume) + return nullptr; + + // Find out what affect this instruction has on memory. + ModRefInfo ModRef = AA->getModRefInfo(I); + bool Def = bool(ModRef & MRI_Mod); + bool Use = bool(ModRef & MRI_Ref); + + // It's possible for an instruction to not modify memory at all. During + // construction, we ignore them. + if (!Def && !Use) + return nullptr; + + assert((Def || Use) && + "Trying to create a memory access with a non-memory instruction"); + + MemoryUseOrDef *MUD; + if (Def) + MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); + else + MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); + ValueToMemoryAccess.insert(std::make_pair(I, MUD)); + return MUD; +} + +MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock, + enum InsertionPlace Where) { + // Handle the initial case + if (Where == Beginning) + // The only thing that could define us at the beginning is a phi node + if (MemoryPhi *Phi = getMemoryAccess(UseBlock)) + return Phi; + + DomTreeNode *CurrNode = DT->getNode(UseBlock); + // Need to be defined by our dominator + if (Where == Beginning) + CurrNode = CurrNode->getIDom(); + Where = End; + while (CurrNode) { + auto It = PerBlockAccesses.find(CurrNode->getBlock()); + if (It != PerBlockAccesses.end()) { + auto &Accesses = It->second; + for (MemoryAccess &RA : reverse(*Accesses)) { + if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA)) + return &RA; + } + } + CurrNode = CurrNode->getIDom(); + } + return LiveOnEntryDef.get(); +} + +/// \brief Returns true if \p Replacer dominates \p Replacee . +bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, + const MemoryAccess *Replacee) const { + if (isa<MemoryUseOrDef>(Replacee)) + return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); + const auto *MP = cast<MemoryPhi>(Replacee); + // For a phi node, the use occurs in the predecessor block of the phi node. + // Since we may occur multiple times in the phi node, we have to check each + // operand to ensure Replacer dominates each operand where Replacee occurs. + for (const Use &Arg : MP->operands()) { + if (Arg.get() != Replacee && + !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) + return false; + } + return true; +} + +/// \brief If all arguments of a MemoryPHI are defined by the same incoming +/// argument, return that argument. +static MemoryAccess *onlySingleValue(MemoryPhi *MP) { + MemoryAccess *MA = nullptr; + + for (auto &Arg : MP->operands()) { + if (!MA) + MA = cast<MemoryAccess>(Arg); + else if (MA != Arg) + return nullptr; + } + return MA; +} + +/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. +/// +/// Because of the way the intrusive list and use lists work, it is important to +/// do removal in the right order. +void MemorySSA::removeFromLookups(MemoryAccess *MA) { + assert(MA->use_empty() && + "Trying to remove memory access that still has uses"); + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) + MUD->setDefiningAccess(nullptr); + // Invalidate our walker's cache if necessary + if (!isa<MemoryUse>(MA)) + Walker->invalidateInfo(MA); + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + Value *MemoryInst; + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + MemoryInst = MUD->getMemoryInst(); + } else { + MemoryInst = MA->getBlock(); + } + ValueToMemoryAccess.erase(MemoryInst); + + auto AccessIt = PerBlockAccesses.find(MA->getBlock()); + std::unique_ptr<AccessList> &Accesses = AccessIt->second; + Accesses->erase(MA); + if (Accesses->empty()) + PerBlockAccesses.erase(AccessIt); +} + +void MemorySSA::removeMemoryAccess(MemoryAccess *MA) { + assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); + // We can only delete phi nodes if they have no uses, or we can replace all + // uses with a single definition. + MemoryAccess *NewDefTarget = nullptr; + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) { + // Note that it is sufficient to know that all edges of the phi node have + // the same argument. If they do, by the definition of dominance frontiers + // (which we used to place this phi), that argument must dominate this phi, + // and thus, must dominate the phi's uses, and so we will not hit the assert + // below. + NewDefTarget = onlySingleValue(MP); + assert((NewDefTarget || MP->use_empty()) && + "We can't delete this memory phi"); + } else { + NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess(); + } + + // Re-point the uses at our defining access + if (!MA->use_empty()) + MA->replaceAllUsesWith(NewDefTarget); + + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + removeFromLookups(MA); +} + +void MemorySSA::print(raw_ostream &OS) const { + MemorySSAAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void MemorySSA::dump() const { + MemorySSAAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +void MemorySSA::verifyMemorySSA() const { + verifyDefUses(F); + verifyDomination(F); + verifyOrdering(F); +} + +/// \brief Verify that the order and existence of MemoryAccesses matches the +/// order and existence of memory affecting instructions. +void MemorySSA::verifyOrdering(Function &F) const { + // Walk all the blocks, comparing what the lookups think and what the access + // lists think, as well as the order in the blocks vs the order in the access + // lists. + SmallVector<MemoryAccess *, 32> ActualAccesses; + for (BasicBlock &B : F) { + const AccessList *AL = getBlockAccesses(&B); + MemoryAccess *Phi = getMemoryAccess(&B); + if (Phi) + ActualAccesses.push_back(Phi); + for (Instruction &I : B) { + MemoryAccess *MA = getMemoryAccess(&I); + assert((!MA || AL) && "We have memory affecting instructions " + "in this block but they are not in the " + "access list"); + if (MA) + ActualAccesses.push_back(MA); + } + // Either we hit the assert, really have no accesses, or we have both + // accesses and an access list + if (!AL) + continue; + assert(AL->size() == ActualAccesses.size() && + "We don't have the same number of accesses in the block as on the " + "access list"); + auto ALI = AL->begin(); + auto AAI = ActualAccesses.begin(); + while (ALI != AL->end() && AAI != ActualAccesses.end()) { + assert(&*ALI == *AAI && "Not the same accesses in the same order"); + ++ALI; + ++AAI; + } + ActualAccesses.clear(); + } +} + +/// \brief Verify the domination properties of MemorySSA by checking that each +/// definition dominates all of its uses. +void MemorySSA::verifyDomination(Function &F) const { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *MP = getMemoryAccess(&B)) { + for (User *U : MP->users()) { + BasicBlock *UseBlock; + // Phi operands are used on edges, we simulate the right domination by + // acting as if the use occurred at the end of the predecessor block. + if (MemoryPhi *P = dyn_cast<MemoryPhi>(U)) { + for (const auto &Arg : P->operands()) { + if (Arg == MP) { + UseBlock = P->getIncomingBlock(Arg); + break; + } + } + } else { + UseBlock = cast<MemoryAccess>(U)->getBlock(); + } + (void)UseBlock; + assert(DT->dominates(MP->getBlock(), UseBlock) && + "Memory PHI does not dominate it's uses"); + } + } + + for (Instruction &I : B) { + MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); + if (!MD) + continue; + + for (User *U : MD->users()) { + BasicBlock *UseBlock; + (void)UseBlock; + // Things are allowed to flow to phi nodes over their predecessor edge. + if (auto *P = dyn_cast<MemoryPhi>(U)) { + for (const auto &Arg : P->operands()) { + if (Arg == MD) { + UseBlock = P->getIncomingBlock(Arg); + break; + } + } + } else { + UseBlock = cast<MemoryAccess>(U)->getBlock(); + } + assert(DT->dominates(MD->getBlock(), UseBlock) && + "Memory Def does not dominate it's uses"); + } + } + } +} + +/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// appears in the use list of \p Def. +/// +/// llvm_unreachable is used instead of asserts because this may be called in +/// a build without asserts. In that case, we don't want this to turn into a +/// nop. +void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { + // The live on entry use may cause us to get a NULL def here + if (!Def) { + if (!isLiveOnEntryDef(Use)) + llvm_unreachable("Null def but use not point to live on entry def"); + } else if (std::find(Def->user_begin(), Def->user_end(), Use) == + Def->user_end()) { + llvm_unreachable("Did not find use in def's use list"); + } +} + +/// \brief Verify the immediate use information, by walking all the memory +/// accesses and verifying that, for each use, it appears in the +/// appropriate def's use list +void MemorySSA::verifyDefUses(Function &F) const { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *Phi = getMemoryAccess(&B)) { + assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( + pred_begin(&B), pred_end(&B))) && + "Incomplete MemoryPhi Node"); + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + verifyUseInDefs(Phi->getIncomingValue(I), Phi); + } + + for (Instruction &I : B) { + if (MemoryAccess *MA = getMemoryAccess(&I)) { + assert(isa<MemoryUseOrDef>(MA) && + "Found a phi node not attached to a bb"); + verifyUseInDefs(cast<MemoryUseOrDef>(MA)->getDefiningAccess(), MA); + } + } + } +} + +MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const { + return ValueToMemoryAccess.lookup(I); +} + +MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { + return cast_or_null<MemoryPhi>(getMemoryAccess((const Value *)BB)); +} + +/// \brief Determine, for two memory accesses in the same block, +/// whether \p Dominator dominates \p Dominatee. +/// \returns True if \p Dominator dominates \p Dominatee. +bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + + assert((Dominator->getBlock() == Dominatee->getBlock()) && + "Asking for local domination when accesses are in different blocks!"); + + // A node dominates itself. + if (Dominatee == Dominator) + return true; + + // When Dominatee is defined on function entry, it is not dominated by another + // memory access. + if (isLiveOnEntryDef(Dominatee)) + return false; + + // When Dominator is defined on function entry, it dominates the other memory + // access. + if (isLiveOnEntryDef(Dominator)) + return true; + + // Get the access list for the block + const AccessList *AccessList = getBlockAccesses(Dominator->getBlock()); + AccessList::const_reverse_iterator It(Dominator->getIterator()); + + // If we hit the beginning of the access list before we hit dominatee, we must + // dominate it + return std::none_of(It, AccessList->rend(), + [&](const MemoryAccess &MA) { return &MA == Dominatee; }); +} + +const static char LiveOnEntryStr[] = "liveOnEntry"; + +void MemoryDef::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + + OS << getID() << " = MemoryDef("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryPhi::print(raw_ostream &OS) const { + bool First = true; + OS << getID() << " = MemoryPhi("; + for (const auto &Op : operands()) { + BasicBlock *BB = getIncomingBlock(Op); + MemoryAccess *MA = cast<MemoryAccess>(Op); + if (!First) + OS << ','; + else + First = false; + + OS << '{'; + if (BB->hasName()) + OS << BB->getName(); + else + BB->printAsOperand(OS, false); + OS << ','; + if (unsigned ID = MA->getID()) + OS << ID; + else + OS << LiveOnEntryStr; + OS << '}'; + } + OS << ')'; +} + +MemoryAccess::~MemoryAccess() {} + +void MemoryUse::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + OS << "MemoryUse("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryAccess::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +char MemorySSAPrinterLegacyPass::ID = 0; + +MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { + initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); +} + +bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { + auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); + MSSA.print(dbgs()); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); + return false; +} + +char MemorySSAAnalysis::PassID; + +MemorySSA MemorySSAAnalysis::run(Function &F, AnalysisManager<Function> &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + return MemorySSA(F, &AA, &DT); +} + +PreservedAnalyses MemorySSAPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "MemorySSA for function: " << F.getName() << "\n"; + AM.getResult<MemorySSAAnalysis>(F).print(OS); + + return PreservedAnalyses::all(); +} + +PreservedAnalyses MemorySSAVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + AM.getResult<MemorySSAAnalysis>(F).verifyMemorySSA(); + + return PreservedAnalyses::all(); +} + +char MemorySSAWrapperPass::ID = 0; + +MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { + initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } + +void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); +} + +bool MemorySSAWrapperPass::runOnFunction(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MSSA.reset(new MemorySSA(F, &AA, &DT)); + return false; +} + +void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } + +void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { + MSSA->print(OS); +} + +MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} + +MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, + DominatorTree *D) + : MemorySSAWalker(M), AA(A), DT(D) {} + +MemorySSA::CachingWalker::~CachingWalker() {} + +struct MemorySSA::CachingWalker::UpwardsMemoryQuery { + // True if we saw a phi whose predecessor was a backedge + bool SawBackedgePhi; + // True if our original query started off as a call + bool IsCall; + // The pointer location we started the query with. This will be empty if + // IsCall is true. + MemoryLocation StartingLoc; + // This is the instruction we were querying about. + const Instruction *Inst; + // Set of visited Instructions for this query. + DenseSet<MemoryAccessPair> Visited; + // Vector of visited call accesses for this query. This is separated out + // because you can always cache and lookup the result of call queries (IE when + // IsCall == true) for every call in the chain. The calls have no AA location + // associated with them with them, and thus, no context dependence. + SmallVector<const MemoryAccess *, 32> VisitedCalls; + // The MemoryAccess we actually got called with, used to test local domination + const MemoryAccess *OriginalAccess; + + UpwardsMemoryQuery() + : SawBackedgePhi(false), IsCall(false), Inst(nullptr), + OriginalAccess(nullptr) {} + + UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) + : SawBackedgePhi(false), IsCall(ImmutableCallSite(Inst)), Inst(Inst), + OriginalAccess(Access) {} +}; + +void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { + + // TODO: We can do much better cache invalidation with differently stored + // caches. For now, for MemoryUses, we simply remove them + // from the cache, and kill the entire call/non-call cache for everything + // else. The problem is for phis or defs, currently we'd need to follow use + // chains down and invalidate anything below us in the chain that currently + // terminates at this access. + + // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse + // is by definition never a barrier, so nothing in the cache could point to + // this use. In that case, we only need invalidate the info for the use + // itself. + + if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) { + UpwardsMemoryQuery Q; + Instruction *I = MU->getMemoryInst(); + Q.IsCall = bool(ImmutableCallSite(I)); + Q.Inst = I; + if (!Q.IsCall) + Q.StartingLoc = MemoryLocation::get(I); + doCacheRemove(MA, Q, Q.StartingLoc); + } else { + // If it is not a use, the best we can do right now is destroy the cache. + CachedUpwardsClobberingCall.clear(); + CachedUpwardsClobberingAccess.clear(); + } + +#ifdef EXPENSIVE_CHECKS + // Run this only when expensive checks are enabled. + verifyRemoved(MA); +#endif +} + +void MemorySSA::CachingWalker::doCacheRemove(const MemoryAccess *M, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + if (Q.IsCall) + CachedUpwardsClobberingCall.erase(M); + else + CachedUpwardsClobberingAccess.erase({M, Loc}); +} + +void MemorySSA::CachingWalker::doCacheInsert(const MemoryAccess *M, + MemoryAccess *Result, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + // This is fine for Phis, since there are times where we can't optimize them. + // Making a def its own clobber is never correct, though. + assert((Result != M || isa<MemoryPhi>(M)) && + "Something can't clobber itself!"); + ++NumClobberCacheInserts; + if (Q.IsCall) + CachedUpwardsClobberingCall[M] = Result; + else + CachedUpwardsClobberingAccess[{M, Loc}] = Result; +} + +MemoryAccess * +MemorySSA::CachingWalker::doCacheLookup(const MemoryAccess *M, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + ++NumClobberCacheLookups; + MemoryAccess *Result; + + if (Q.IsCall) + Result = CachedUpwardsClobberingCall.lookup(M); + else + Result = CachedUpwardsClobberingAccess.lookup({M, Loc}); + + if (Result) + ++NumClobberCacheHits; + return Result; +} + +bool MemorySSA::CachingWalker::instructionClobbersQuery( + const MemoryDef *MD, UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) const { + Instruction *DefMemoryInst = MD->getMemoryInst(); + assert(DefMemoryInst && "Defining instruction not actually an instruction"); + + if (!Q.IsCall) + return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod; + + // If this is a call, mark it for caching + if (ImmutableCallSite(DefMemoryInst)) + Q.VisitedCalls.push_back(MD); + ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst)); + return I != MRI_NoModRef; +} + +MemoryAccessPair MemorySSA::CachingWalker::UpwardsDFSWalk( + MemoryAccess *StartingAccess, const MemoryLocation &Loc, + UpwardsMemoryQuery &Q, bool FollowingBackedge) { + MemoryAccess *ModifyingAccess = nullptr; + + auto DFI = df_begin(StartingAccess); + for (auto DFE = df_end(StartingAccess); DFI != DFE;) { + MemoryAccess *CurrAccess = *DFI; + if (MSSA->isLiveOnEntryDef(CurrAccess)) + return {CurrAccess, Loc}; + // If this is a MemoryDef, check whether it clobbers our current query. This + // needs to be done before consulting the cache, because the cache reports + // the clobber for CurrAccess. If CurrAccess is a clobber for this query, + // and we ask the cache for information first, then we might skip this + // clobber, which is bad. + if (auto *MD = dyn_cast<MemoryDef>(CurrAccess)) { + // If we hit the top, stop following this path. + // While we can do lookups, we can't sanely do inserts here unless we were + // to track everything we saw along the way, since we don't know where we + // will stop. + if (instructionClobbersQuery(MD, Q, Loc)) { + ModifyingAccess = CurrAccess; + break; + } + } + if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc)) + return {CacheResult, Loc}; + + // We need to know whether it is a phi so we can track backedges. + // Otherwise, walk all upward defs. + if (!isa<MemoryPhi>(CurrAccess)) { + ++DFI; + continue; + } + +#ifndef NDEBUG + // The loop below visits the phi's children for us. Because phis are the + // only things with multiple edges, skipping the children should always lead + // us to the end of the loop. + // + // Use a copy of DFI because skipChildren would kill our search stack, which + // would make caching anything on the way back impossible. + auto DFICopy = DFI; + assert(DFICopy.skipChildren() == DFE && + "Skipping phi's children doesn't end the DFS?"); +#endif + + const MemoryAccessPair PHIPair(CurrAccess, Loc); + + // Don't try to optimize this phi again if we've already tried to do so. + if (!Q.Visited.insert(PHIPair).second) { + ModifyingAccess = CurrAccess; + break; + } + + std::size_t InitialVisitedCallSize = Q.VisitedCalls.size(); + + // Recurse on PHI nodes, since we need to change locations. + // TODO: Allow graphtraits on pairs, which would turn this whole function + // into a normal single depth first walk. + MemoryAccess *FirstDef = nullptr; + for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end(); + MPI != MPE; ++MPI) { + bool Backedge = + !FollowingBackedge && + DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock()); + + MemoryAccessPair CurrentPair = + UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge); + // All the phi arguments should reach the same point if we can bypass + // this phi. The alternative is that they hit this phi node, which + // means we can skip this argument. + if (FirstDef && CurrentPair.first != PHIPair.first && + CurrentPair.first != FirstDef) { + ModifyingAccess = CurrAccess; + break; + } + + if (!FirstDef) + FirstDef = CurrentPair.first; + } + + // If we exited the loop early, go with the result it gave us. + if (!ModifyingAccess) { + assert(FirstDef && "Found a Phi with no upward defs?"); + ModifyingAccess = FirstDef; + } else { + // If we can't optimize this Phi, then we can't safely cache any of the + // calls we visited when trying to optimize it. Wipe them out now. + Q.VisitedCalls.resize(InitialVisitedCallSize); + } + break; + } + + if (!ModifyingAccess) + return {MSSA->getLiveOnEntryDef(), Q.StartingLoc}; + + const BasicBlock *OriginalBlock = StartingAccess->getBlock(); + assert(DFI.getPathLength() > 0 && "We dropped our path?"); + unsigned N = DFI.getPathLength(); + // If we found a clobbering def, the last element in the path will be our + // clobber, so we don't want to cache that to itself. OTOH, if we optimized a + // phi, we can add the last thing in the path to the cache, since that won't + // be the result. + if (DFI.getPath(N - 1) == ModifyingAccess) + --N; + for (; N > 1; --N) { + MemoryAccess *CacheAccess = DFI.getPath(N - 1); + BasicBlock *CurrBlock = CacheAccess->getBlock(); + if (!FollowingBackedge) + doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + if (DT->dominates(CurrBlock, OriginalBlock) && + (CurrBlock != OriginalBlock || !FollowingBackedge || + MSSA->locallyDominates(CacheAccess, StartingAccess))) + break; + } + + // Cache everything else on the way back. The caller should cache + // StartingAccess for us. + for (; N > 1; --N) { + MemoryAccess *CacheAccess = DFI.getPath(N - 1); + doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + } + + return {ModifyingAccess, Loc}; +} + +/// \brief Walk the use-def chains starting at \p MA and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns our clobbering memory access +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { + return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first; +} + +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, MemoryLocation &Loc) { + if (isa<MemoryPhi>(StartingAccess)) + return StartingAccess; + + auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); + if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) + return StartingUseOrDef; + + Instruction *I = StartingUseOrDef->getMemoryInst(); + + // Conservatively, fences are always clobbers, so don't perform the walk if we + // hit a fence. + if (!ImmutableCallSite(I) && I->isFenceLike()) + return StartingUseOrDef; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingUseOrDef; + Q.StartingLoc = Loc; + Q.Inst = StartingUseOrDef->getMemoryInst(); + Q.IsCall = false; + + if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc)) + return CacheResult; + + // Unlike the other function, do not walk to the def of a def, because we are + // handed something we already believe is the clobbering access. + MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) + ? StartingUseOrDef->getDefiningAccess() + : StartingUseOrDef; + + MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); + // Only cache this if it wouldn't make Clobber point to itself. + if (Clobber != StartingAccess) + doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *StartingUseOrDef << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Clobber << "\n"); + return Clobber; +} + +MemoryAccess * +MemorySSA::CachingWalker::getClobberingMemoryAccess(const Instruction *I) { + // There should be no way to lookup an instruction and get a phi as the + // access, since we only map BB's to PHI's. So, this must be a use or def. + auto *StartingAccess = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(I)); + + bool IsCall = bool(ImmutableCallSite(I)); + + // We can't sanely do anything with a fences, they conservatively + // clobber all memory, and have no locations to get pointers from to + // try to disambiguate. + if (!IsCall && I->isFenceLike()) + return StartingAccess; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingAccess; + Q.IsCall = IsCall; + if (!Q.IsCall) + Q.StartingLoc = MemoryLocation::get(I); + Q.Inst = I; + if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc)) + return CacheResult; + + // Start with the thing we already think clobbers this location + MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); + + // At this point, DefiningAccess may be the live on entry def. + // If it is, we will not get a better result. + if (MSSA->isLiveOnEntryDef(DefiningAccess)) + return DefiningAccess; + + MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); + // DFS won't cache a result for DefiningAccess. So, if DefiningAccess isn't + // our clobber, be sure that it gets a cache entry, too. + if (Result != DefiningAccess) + doCacheInsert(DefiningAccess, Result, Q, Q.StartingLoc); + doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc); + // TODO: When this implementation is more mature, we may want to figure out + // what this additional caching buys us. It's most likely A Good Thing. + if (Q.IsCall) + for (const MemoryAccess *MA : Q.VisitedCalls) + if (MA != Result) + doCacheInsert(MA, Result, Q, Q.StartingLoc); + + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *DefiningAccess << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Result << "\n"); + + return Result; +} + +// Verify that MA doesn't exist in any of the caches. +void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) { +#ifndef NDEBUG + for (auto &P : CachedUpwardsClobberingAccess) + assert(P.first.first != MA && P.second != MA && + "Found removed MemoryAccess in cache."); + for (auto &P : CachedUpwardsClobberingCall) + assert(P.first != MA && P.second != MA && + "Found removed MemoryAccess in cache."); +#endif // !NDEBUG +} + +MemoryAccess * +DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) + return Use->getDefiningAccess(); + return MA; +} + +MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, MemoryLocation &) { + if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) + return Use->getDefiningAccess(); + return StartingAccess; +} +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 9ec28a3f3d47c..eb91885186244 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -21,8 +20,8 @@ using namespace llvm; -static void appendToGlobalArray(const char *Array, - Module &M, Function *F, int Priority) { +static void appendToGlobalArray(const char *Array, Module &M, Function *F, + int Priority, Constant *Data) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); @@ -31,15 +30,26 @@ static void appendToGlobalArray(const char *Array, SmallVector<Constant *, 16> CurrentCtors; StructType *EltTy; if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { - // If there is a global_ctors array, use the existing struct type, which can - // have 2 or 3 fields. - ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType()); - EltTy = cast<StructType>(ATy->getElementType()); + ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType()); + StructType *OldEltTy = cast<StructType>(ATy->getElementType()); + // Upgrade a 2-field global array type to the new 3-field format if needed. + if (Data && OldEltTy->getNumElements() < 3) + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + IRB.getInt8PtrTy(), nullptr); + else + EltTy = OldEltTy; if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); - for (unsigned i = 0; i != n; ++i) - CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); + for (unsigned i = 0; i != n; ++i) { + auto Ctor = cast<Constant>(Init->getOperand(i)); + if (EltTy != OldEltTy) + Ctor = ConstantStruct::get( + EltTy, Ctor->getAggregateElement((unsigned)0), + Ctor->getAggregateElement(1), + Constant::getNullValue(IRB.getInt8PtrTy()), nullptr); + CurrentCtors.push_back(Ctor); + } } GVCtor->eraseFromParent(); } else { @@ -54,7 +64,8 @@ static void appendToGlobalArray(const char *Array, CSVals[1] = F; // FIXME: Drop support for the two element form in LLVM 4.0. if (EltTy->getNumElements() >= 3) - CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy()); + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) + : Constant::getNullValue(IRB.getInt8PtrTy()); Constant *RuntimeCtorInit = ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); @@ -70,29 +81,12 @@ static void appendToGlobalArray(const char *Array, GlobalValue::AppendingLinkage, NewInit, Array); } -void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { - appendToGlobalArray("llvm.global_ctors", M, F, Priority); +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); } -void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { - appendToGlobalArray("llvm.global_dtors", M, F, Priority); -} - -GlobalVariable * -llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set, - bool CompilerUsed) { - const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; - GlobalVariable *GV = M.getGlobalVariable(Name); - if (!GV || !GV->hasInitializer()) - return GV; - - const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); - for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) { - Value *Op = Init->getOperand(I); - GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); - Set.insert(G); - } - return GV; +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); } Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { @@ -132,4 +126,3 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( } return std::make_pair(Ctor, InitFunction); } - diff --git a/lib/Transforms/Utils/NameAnonFunctions.cpp b/lib/Transforms/Utils/NameAnonFunctions.cpp new file mode 100644 index 0000000000000..c4f3839d8482a --- /dev/null +++ b/lib/Transforms/Utils/NameAnonFunctions.cpp @@ -0,0 +1,102 @@ +//===- NameAnonFunctions.cpp - ThinLTO Summary-based Function Import ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements naming anonymous function to make sure they can be +// refered to by ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MD5.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +// Compute a "unique" hash for the module based on the name of the public +// functions. +class ModuleHasher { + Module &TheModule; + std::string TheHash; + +public: + ModuleHasher(Module &M) : TheModule(M) {} + + /// Return the lazily computed hash. + std::string &get() { + if (!TheHash.empty()) + // Cache hit :) + return TheHash; + + MD5 Hasher; + for (auto &F : TheModule) { + if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName()) + continue; + auto Name = F.getName(); + Hasher.update(Name); + } + for (auto &GV : TheModule.globals()) { + if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName()) + continue; + auto Name = GV.getName(); + Hasher.update(Name); + } + + // Now return the result. + MD5::MD5Result Hash; + Hasher.final(Hash); + SmallString<32> Result; + MD5::stringifyResult(Hash, Result); + TheHash = Result.str(); + return TheHash; + } +}; + +// Rename all the anon functions in the module +bool llvm::nameUnamedFunctions(Module &M) { + bool Changed = false; + ModuleHasher ModuleHash(M); + int count = 0; + for (auto &F : M) { + if (F.hasName()) + continue; + F.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++)); + Changed = true; + } + return Changed; +} + +namespace { + +// Simple pass that provides a name to every anon function. +class NameAnonFunction : public ModulePass { + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + const char *getPassName() const override { return "Name Anon Functions"; } + + explicit NameAnonFunction() : ModulePass(ID) {} + + bool runOnModule(Module &M) override { return nameUnamedFunctions(M); } +}; +char NameAnonFunction::ID = 0; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(NameAnonFunction, "name-anon-functions", + "Provide a name to nameless functions", false, false) +INITIALIZE_PASS_END(NameAnonFunction, "name-anon-functions", + "Provide a name to nameless functions", false, false) + +namespace llvm { +ModulePass *createNameAnonFunctionPass() { return new NameAnonFunction(); } +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index c4f9b9f614078..cbf385d563399 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -523,7 +523,7 @@ void PromoteMem2Reg::run() { AllocaInfo Info; LargeBlockInfo LBI; - IDFCalculator IDF(DT); + ForwardIDFCalculator IDF(DT); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -802,7 +802,8 @@ void PromoteMem2Reg::ComputeLiveInBlocks( // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); LiveInBlockWorklist.pop_back(); - --i, --e; + --i; + --e; break; } diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp new file mode 100644 index 0000000000000..9afd175c10ed5 --- /dev/null +++ b/lib/Transforms/Utils/SanitizerStats.cpp @@ -0,0 +1,108 @@ +//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements code generation for sanitizer statistics gathering. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SanitizerStats.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) { + StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2); + EmptyModuleStatsTy = makeModuleStatsTy(); + + ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false, + GlobalValue::InternalLinkage, nullptr); +} + +ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() { + return ArrayType::get(StatTy, Inits.size()); +} + +StructType *SanitizerStatReport::makeModuleStatsTy() { + return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()), + Type::getInt32Ty(M->getContext()), + makeModuleStatsArrayTy()}); +} + +void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { + Function *F = B.GetInsertBlock()->getParent(); + Module *M = F->getParent(); + PointerType *Int8PtrTy = B.getInt8PtrTy(); + IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout()); + ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2); + + Inits.push_back(ConstantArray::get( + StatTy, + {Constant::getNullValue(Int8PtrTy), + ConstantExpr::getIntToPtr( + ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() - + kSanitizerStatKindBits)), + Int8PtrTy)})); + + FunctionType *StatReportTy = + FunctionType::get(B.getVoidTy(), Int8PtrTy, false); + Constant *StatReport = M->getOrInsertFunction( + "__sanitizer_stat_report", StatReportTy); + + auto InitAddr = ConstantExpr::getGetElementPtr( + EmptyModuleStatsTy, ModuleStatsGV, + ArrayRef<Constant *>{ + ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2), + ConstantInt::get(IntPtrTy, Inits.size() - 1), + }); + B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy)); +} + +void SanitizerStatReport::finish() { + if (Inits.empty()) { + ModuleStatsGV->eraseFromParent(); + return; + } + + PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(M->getContext()); + Type *VoidTy = Type::getVoidTy(M->getContext()); + + // Create a new ModuleStatsGV to replace the old one. We can't just set the + // old one's initializer because its type is different. + auto NewModuleStatsGV = new GlobalVariable( + *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage, + ConstantStruct::getAnon( + {Constant::getNullValue(Int8PtrTy), + ConstantInt::get(Int32Ty, Inits.size()), + ConstantArray::get(makeModuleStatsArrayTy(), Inits)})); + ModuleStatsGV->replaceAllUsesWith( + ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType())); + ModuleStatsGV->eraseFromParent(); + + // Create a global constructor to register NewModuleStatsGV. + auto F = Function::Create(FunctionType::get(VoidTy, false), + GlobalValue::InternalLinkage, "", M); + auto BB = BasicBlock::Create(M->getContext(), "", F); + IRBuilder<> B(BB); + + FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false); + Constant *StatInit = M->getOrInsertFunction( + "__sanitizer_stat_init", StatInitTy); + + B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); + B.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e484b690597e9..0504646c304ef 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" @@ -45,6 +44,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <map> @@ -58,17 +58,18 @@ using namespace PatternMatch; // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the // minimum reasonable default. -static cl::opt<unsigned> -PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), - cl::desc("Control the amount of phi node folding to perform (default = 2)")); +static cl::opt<unsigned> PHINodeFoldingThreshold( + "phi-node-folding-threshold", cl::Hidden, cl::init(2), + cl::desc( + "Control the amount of phi node folding to perform (default = 2)")); -static cl::opt<bool> -DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), - cl::desc("Duplicate return instructions into unconditional branches")); +static cl::opt<bool> DupRet( + "simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); static cl::opt<bool> -SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), - cl::desc("Sink common instructions down to the end block")); + SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); static cl::opt<bool> HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), @@ -96,48 +97,54 @@ static cl::opt<unsigned> MaxSpeculationDepth( "speculatively executed instructions")); STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); -STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); -STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); -STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); +STATISTIC(NumLinearMaps, + "Number of switch instructions turned into linear mapping"); +STATISTIC(NumLookupTables, + "Number of switch instructions turned into lookup tables"); +STATISTIC( + NumLookupTablesHoles, + "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSinkCommons, + "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { - // The first field contains the value that the switch produces when a certain - // case group is selected, and the second field is a vector containing the - // cases composing the case group. - typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2> +// The first field contains the value that the switch produces when a certain +// case group is selected, and the second field is a vector containing the +// cases composing the case group. +typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2> SwitchCaseResultVectorTy; - // The first field contains the phi node that generates a result of the switch - // and the second field contains the value generated for a certain case in the - // switch for that PHI. - typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy; +// The first field contains the phi node that generates a result of the switch +// and the second field contains the value generated for a certain case in the +// switch for that PHI. +typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy; - /// ValueEqualityComparisonCase - Represents a case of a switch. - struct ValueEqualityComparisonCase { - ConstantInt *Value; - BasicBlock *Dest; +/// ValueEqualityComparisonCase - Represents a case of a switch. +struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; - ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) : Value(Value), Dest(Dest) {} - bool operator<(ValueEqualityComparisonCase RHS) const { - // Comparing pointers is ok as we only rely on the order for uniquing. - return Value < RHS.Value; - } + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } - bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } - }; + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } +}; class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout &DL; unsigned BonusInstThreshold; AssumptionCache *AC; + SmallPtrSetImpl<BasicBlock *> *LoopHeaders; Value *isValueEqualityComparison(TerminatorInst *TI); - BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector<ValueEqualityComparisonCase> &Cases); + BasicBlock *GetValueEqualityComparisonCases( + TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder); @@ -152,13 +159,15 @@ class SimplifyCFGOpt { bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); - bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder); - bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); + bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - unsigned BonusInstThreshold, AssumptionCache *AC) - : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl<BasicBlock *> *LoopHeaders) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), + LoopHeaders(LoopHeaders) {} bool run(BasicBlock *BB); }; } @@ -166,19 +175,19 @@ public: /// Return true if it is safe to merge these two /// terminator instructions together. static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { - if (SI1 == SI2) return false; // Can't merge with self! + if (SI1 == SI2) + return false; // Can't merge with self! // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); - SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) - if (SI1Succs.count(*I)) - for (BasicBlock::iterator BBI = (*I)->begin(); - isa<PHINode>(BBI); ++BBI) { + for (BasicBlock *Succ : successors(SI2BB)) + if (SI1Succs.count(Succ)) + for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { PHINode *PN = cast<PHINode>(BBI); if (PN->getIncomingValueForBlock(SI1BB) != PN->getIncomingValueForBlock(SI2BB)) @@ -191,11 +200,12 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { /// Return true if it is safe and profitable to merge these two terminator /// instructions together, where SI1 is an unconditional branch. PhiNodes will /// store all PHI nodes in common successors. -static bool isProfitableToFoldUnconditional(BranchInst *SI1, - BranchInst *SI2, - Instruction *Cond, - SmallVectorImpl<PHINode*> &PhiNodes) { - if (SI1 == SI2) return false; // Can't merge with self! +static bool +isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, + Instruction *Cond, + SmallVectorImpl<PHINode *> &PhiNodes) { + if (SI1 == SI2) + return false; // Can't merge with self! assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in @@ -204,7 +214,8 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); - if (!Ci2) return false; + if (!Ci2) + return false; if (!(Cond->getOperand(0) == Ci2->getOperand(0) && Cond->getOperand(1) == Ci2->getOperand(1)) && !(Cond->getOperand(0) == Ci2->getOperand(1) && @@ -213,11 +224,10 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); - SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) - if (SI1Succs.count(*I)) - for (BasicBlock::iterator BBI = (*I)->begin(); - isa<PHINode>(BBI); ++BBI) { + SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + for (BasicBlock *Succ : successors(SI2BB)) + if (SI1Succs.count(Succ)) + for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { PHINode *PN = cast<PHINode>(BBI); if (PN->getIncomingValueForBlock(SI1BB) != Cond || !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) @@ -233,11 +243,11 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, /// of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { - if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do + if (!isa<PHINode>(Succ->begin())) + return; // Quick exit if nothing to do PHINode *PN; - for (BasicBlock::iterator I = Succ->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) + for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } @@ -270,7 +280,7 @@ static unsigned ComputeSpeculationCost(const User *I, /// V plus its non-dominating operands. If that cost is greater than /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - SmallPtrSetImpl<Instruction*> *AggressiveInsts, + SmallPtrSetImpl<Instruction *> *AggressiveInsts, unsigned &CostRemaining, const TargetTransformInfo &TTI, unsigned Depth = 0) { @@ -294,7 +304,8 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // We don't want to allow weird loops that might have the "if condition" in // the bottom of this block. - if (PBB == BB) return false; + if (PBB == BB) + return false; // If this instruction is defined in a block that contains an unconditional // branch to BB, then it must be in the 'conditional' part of the "if @@ -305,10 +316,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. - if (!AggressiveInsts) return false; + if (!AggressiveInsts) + return false; // If we have seen this instruction before, don't count it again. - if (AggressiveInsts->count(I)) return true; + if (AggressiveInsts->count(I)) + return true; // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it @@ -366,8 +379,8 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { if (CI->getType() == PtrTy) return CI; else - return cast<ConstantInt> - (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + return cast<ConstantInt>( + ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); } return nullptr; } @@ -403,11 +416,11 @@ struct ConstantComparesGatherer { operator=(const ConstantComparesGatherer &) = delete; private: - /// Try to set the current value used for the comparison, it succeeds only if /// it wasn't set before or if the new value is the same as the old one bool setValueOnce(Value *NewVal) { - if(CompValue && CompValue != NewVal) return false; + if (CompValue && CompValue != NewVal) + return false; CompValue = NewVal; return (CompValue != nullptr); } @@ -424,35 +437,99 @@ private: ICmpInst *ICI; ConstantInt *C; if (!((ICI = dyn_cast<ICmpInst>(I)) && - (C = GetConstantInt(I->getOperand(1), DL)))) { + (C = GetConstantInt(I->getOperand(1), DL)))) { return false; } Value *RHSVal; - ConstantInt *RHSC; + const APInt *RHSC; // Pattern match a special case - // (x & ~2^x) == y --> x == y || x == y|2^x + // (x & ~2^z) == y --> x == y || x == y|2^z // This undoes a transformation done by instcombine to fuse 2 compares. - if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { + + // It's a little bit hard to see why the following transformations are + // correct. Here is a CVC3 program to verify them for 64-bit values: + + /* + ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); + x : BITVECTOR(64); + y : BITVECTOR(64); + z : BITVECTOR(64); + mask : BITVECTOR(64) = BVSHL(ONE, z); + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + + // Please note that each pattern must be a dual implication (<--> or + // iff). One directional implication can create spurious matches. If the + // implication is only one-way, an unsatisfiable condition on the left + // side can imply a satisfiable condition on the right side. Dual + // implication ensures that satisfiable conditions are transformed to + // other satisfiable conditions and unsatisfiable conditions are + // transformed to other unsatisfiable conditions. + + // Here is a concrete example of a unsatisfiable condition on the left + // implying a satisfiable condition on the right: + // + // mask = (1 << z) + // (x & ~mask) == y --> (x == y || x == (y | mask)) + // + // Substituting y = 3, z = 0 yields: + // (x & -2) == 3 --> (x == 3 || x == 2) + + // Pattern match a special case: + /* + QUERY( (y & ~mask = y) => + ((x & ~mask = y) <=> (x = y OR x = (y | mask))) + ); + */ if (match(ICI->getOperand(0), - m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { - APInt Not = ~RHSC->getValue(); - if (Not.isPowerOf2()) { + m_And(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = ~*RHSC; + if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) { // If we already have a value for the switch, it has to match! - if(!setValueOnce(RHSVal)) + if (!setValueOnce(RHSVal)) + return false; + + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), + C->getValue() | Mask)); + UsedICmps++; + return true; + } + } + + // Pattern match a special case: + /* + QUERY( (y | mask = y) => + ((x | mask = y) <=> (x = y OR x = (y & ~mask))) + ); + */ + if (match(ICI->getOperand(0), + m_Or(m_Value(RHSVal), m_APInt(RHSC)))) { + APInt Mask = *RHSC; + if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) { + // If we already have a value for the switch, it has to match! + if (!setValueOnce(RHSVal)) return false; Vals.push_back(C); Vals.push_back(ConstantInt::get(C->getContext(), - C->getValue() | Not)); + C->getValue() & ~Mask)); UsedICmps++; return true; } } // If we already have a value for the switch, it has to match! - if(!setValueOnce(ICI->getOperand(0))) + if (!setValueOnce(ICI->getOperand(0))) return false; UsedICmps++; @@ -467,8 +544,8 @@ private: // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. Value *CandidateVal = I->getOperand(0); - if(match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)))) { - Span = Span.subtract(RHSC->getValue()); + if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) { + Span = Span.subtract(*RHSC); CandidateVal = RHSVal; } @@ -484,7 +561,7 @@ private: } // If we already have a value for the switch, it has to match! - if(!setValueOnce(CandidateVal)) + if (!setValueOnce(CandidateVal)) return false; // Add all values from the range to the set @@ -493,7 +570,6 @@ private: UsedICmps++; return true; - } /// Given a potentially 'or'd or 'and'd together collection of icmp @@ -507,18 +583,22 @@ private: // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector<Value *, 8> DFT; + SmallPtrSet<Value *, 8> Visited; // Initialize + Visited.insert(V); DFT.push_back(V); - while(!DFT.empty()) { + while (!DFT.empty()) { V = DFT.pop_back_val(); if (Instruction *I = dyn_cast<Instruction>(V)) { // If it is a || (or && depending on isEQ), process the operands. if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) { - DFT.push_back(I->getOperand(1)); - DFT.push_back(I->getOperand(0)); + if (Visited.insert(I->getOperand(1)).second) + DFT.push_back(I->getOperand(1)); + if (Visited.insert(I->getOperand(0)).second) + DFT.push_back(I->getOperand(0)); continue; } @@ -541,7 +621,6 @@ private: } } }; - } static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { @@ -556,7 +635,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { } TI->eraseFromParent(); - if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond); + if (Cond) + RecursivelyDeleteTriviallyDeadInstructions(Cond); } /// Return true if the specified terminator checks @@ -566,8 +646,9 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. - if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()), - pred_end(SI->getParent())) <= 128) + if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) <= + 128) CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) @@ -589,46 +670,44 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { /// Given a value comparison instruction, /// decode all of the 'cases' that it represents and return the 'default' block. -BasicBlock *SimplifyCFGOpt:: -GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector<ValueEqualityComparisonCase> - &Cases) { +BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( + TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cases.reserve(SI->getNumCases()); - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) - Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(), - i.getCaseSuccessor())); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) + Cases.push_back( + ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor())); return SI->getDefaultDest(); } BranchInst *BI = cast<BranchInst>(TI); ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); - Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1), - DL), - Succ)); + Cases.push_back(ValueEqualityComparisonCase( + GetConstantInt(ICI->getOperand(1), DL), Succ)); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); } - /// Given a vector of bb/value pairs, remove any entries /// in the list that match the specified block. -static void EliminateBlockCases(BasicBlock *BB, - std::vector<ValueEqualityComparisonCase> &Cases) { +static void +EliminateBlockCases(BasicBlock *BB, + std::vector<ValueEqualityComparisonCase> &Cases) { Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); } /// Return true if there are any keys in C1 that exist in C2 as well. -static bool -ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, - std::vector<ValueEqualityComparisonCase > &C2) { +static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, + std::vector<ValueEqualityComparisonCase> &C2) { std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2; // Make V1 be smaller than V2. if (V1->size() > V2->size()) std::swap(V1, V2); - if (V1->size() == 0) return false; + if (V1->size() == 0) + return false; if (V1->size() == 1) { // Just scan V2. ConstantInt *TheVal = (*V1)[0].Value; @@ -657,30 +736,30 @@ ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, /// also a value comparison with the same value, and if that comparison /// determines the outcome of this comparison. If so, simplify TI. This does a /// very limited form of jump threading. -bool SimplifyCFGOpt:: -SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred, - IRBuilder<> &Builder) { +bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( + TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); - if (!PredVal) return false; // Not a value comparison in predecessor. + if (!PredVal) + return false; // Not a value comparison in predecessor. Value *ThisVal = isValueEqualityComparison(TI); assert(ThisVal && "This isn't a value comparison!!"); - if (ThisVal != PredVal) return false; // Different predicates. + if (ThisVal != PredVal) + return false; // Different predicates. // TODO: Preserve branch weight metadata, similarly to how // FoldValueComparisonIntoPredecessors preserves it. // Find out information about when control will move from Pred to TI's block. std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), - PredCases); - EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + BasicBlock *PredDef = + GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. // Find information about how control leaves this block. std::vector<ValueEqualityComparisonCase> ThisCases; BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); - EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. // If TI's block is the default block from Pred's comparison, potentially // simplify TI based on this knowledge. @@ -697,13 +776,14 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisCases.size() == 1 && "Branch can only have one case!"); // Insert the new branch. Instruction *NI = Builder.CreateBr(ThisDef); - (void) NI; + (void)NI; // Remove PHI node entries for the dead edge. ThisCases[0].Dest->removePredecessor(TI->getParent()); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -711,7 +791,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SwitchInst *SI = cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. - SmallPtrSet<Constant*, 16> DeadCases; + SmallPtrSet<Constant *, 16> DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].Value); @@ -732,7 +812,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, --i; if (DeadCases.count(i.getCaseValue())) { if (HasWeight) { - std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + std::swap(Weights[i.getCaseIndex() + 1], Weights.back()); Weights.pop_back(); } i.getCaseSuccessor()->removePredecessor(TI->getParent()); @@ -741,8 +821,8 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, } if (HasWeight && Weights.size() >= 2) SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getParent()->getContext()). - createBranchWeights(Weights)); + MDBuilder(SI->getParent()->getContext()) + .createBranchWeights(Weights)); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -755,7 +835,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == TIBB) { if (TIV) - return false; // Cannot handle multiple values coming to this block. + return false; // Cannot handle multiple values coming to this block. TIV = PredCases[i].Value; } assert(TIV && "No edge from pred to succ?"); @@ -770,53 +850,53 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, } // If not handled by any explicit cases, it is handled by the default case. - if (!TheRealDest) TheRealDest = ThisDef; + if (!TheRealDest) + TheRealDest = ThisDef; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; - for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) - if (*SI != CheckEdge) - (*SI)->removePredecessor(TIBB); + for (BasicBlock *Succ : successors(TIBB)) + if (Succ != CheckEdge) + Succ->removePredecessor(TIBB); else CheckEdge = nullptr; // Insert the new branch. Instruction *NI = Builder.CreateBr(TheRealDest); - (void) NI; + (void)NI; DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); EraseTerminatorInstAndDCECond(TI); return true; } namespace { - /// This class implements a stable ordering of constant - /// integers that does not depend on their address. This is important for - /// applications that sort ConstantInt's to ensure uniqueness. - struct ConstantIntOrdering { - bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { - return LHS->getValue().ult(RHS->getValue()); - } - }; +/// This class implements a stable ordering of constant +/// integers that does not depend on their address. This is important for +/// applications that sort ConstantInt's to ensure uniqueness. +struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } +}; } static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2) { const ConstantInt *LHS = *P1; const ConstantInt *RHS = *P2; - if (LHS->getValue().ult(RHS->getValue())) - return 1; - if (LHS->getValue() == RHS->getValue()) + if (LHS == RHS) return 0; - return -1; + return LHS->getValue().ult(RHS->getValue()) ? 1 : -1; } -static inline bool HasBranchWeights(const Instruction* I) { +static inline bool HasBranchWeights(const Instruction *I) { MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof); if (ProfMD && ProfMD->getOperand(0)) - if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) return MDS->getString().equals("branch_weights"); return false; @@ -837,7 +917,7 @@ static void GetBranchWeights(TerminatorInst *TI, // If TI is a conditional eq, the default case is the false case, // and the corresponding branch-weight data is at index 2. We swap the // default weight to be the first entry. - if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { assert(Weights.size() == 2); ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) @@ -862,17 +942,17 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) { bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); - Value *CV = isValueEqualityComparison(TI); // CondVal + Value *CV = isValueEqualityComparison(TI); // CondVal assert(CV && "Not a comparison?"); bool Changed = false; - SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); while (!Preds.empty()) { BasicBlock *Pred = Preds.pop_back_val(); // See if the predecessor is a comparison with the same value. TerminatorInst *PTI = Pred->getTerminator(); - Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { // Figure out which 'cases' to copy from SI to PSI. @@ -885,7 +965,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Based on whether the default edge from PTI goes to BB or not, fill in // PredCases and PredDefault with the new switch cases we would like to // build. - SmallVector<BasicBlock*, 8> NewSuccessors; + SmallVector<BasicBlock *, 8> NewSuccessors; // Update the branch weight metadata along the way SmallVector<uint64_t, 8> Weights; @@ -915,7 +995,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. - std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest != BB) PTIHandled.insert(PredCases[i].Value); @@ -925,13 +1005,14 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (PredHasWeights || SuccHasWeights) { // Increase weight for the default case. - Weights[0] += Weights[i+1]; - std::swap(Weights[i+1], Weights.back()); + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); Weights.pop_back(); } PredCases.pop_back(); - --i; --e; + --i; + --e; } // Reconstruct the new switch statement we will be building. @@ -952,8 +1033,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // The default weight is at index 0, so weight for the ith case // should be at index i+1. Scale the cases from successor by // PredDefaultWeight (Weights[0]). - Weights.push_back(Weights[0] * SuccWeights[i+1]); - ValidTotalSuccWeight += SuccWeights[i+1]; + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; } } @@ -969,21 +1050,22 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. - std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; - std::map<ConstantInt*, uint64_t> WeightsForHandled; + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == BB) { PTIHandled.insert(PredCases[i].Value); if (PredHasWeights || SuccHasWeights) { - WeightsForHandled[PredCases[i].Value] = Weights[i+1]; - std::swap(Weights[i+1], Weights.back()); + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); Weights.pop_back(); } std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); - --i; --e; + --i; + --e; } // Okay, now we know which constants were sent to BB from the @@ -995,17 +1077,16 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); - PTIHandled.erase(BBCases[i].Value);// This constant is taken care of + PTIHandled.erase( + BBCases[i].Value); // This constant is taken care of } // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = - PTIHandled.begin(), - E = PTIHandled.end(); I != E; ++I) { + for (ConstantInt *I : PTIHandled) { if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[*I]); - PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); NewSuccessors.push_back(BBDefault); } } @@ -1024,8 +1105,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, } // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, - PredCases.size()); + SwitchInst *NewSI = + Builder.CreateSwitch(CV, PredDefault, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); for (ValueEqualityComparisonCase &V : PredCases) NewSI->addCase(V.Value, V.Dest); @@ -1036,9 +1117,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - NewSI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(MDWeights)); + NewSI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(BB->getContext()).createBranchWeights(MDWeights)); } EraseTerminatorInstAndDCECond(PTI); @@ -1052,8 +1133,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (!InfLoopBlock) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create(BB->getContext(), - "infloop", BB->getParent()); + InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", + BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); } NewSI->setSuccessor(i, InfLoopBlock); @@ -1070,13 +1151,13 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // can't hoist the invoke, as there is nowhere to put the select in this case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { - for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + for (BasicBlock *Succ : successors(BB1)) { PHINode *PN; - for (BasicBlock::iterator BBI = SI->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); - if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) { + if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { return false; } } @@ -1096,8 +1177,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As // such, we currently just scan for obviously identical instructions in an // identical order. - BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. - BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination BasicBlock::iterator BB1_Itr = BB1->begin(); BasicBlock::iterator BB2_Itr = BB2->begin(); @@ -1135,12 +1216,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_range, - LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, - LLVMContext::MD_align, LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null}; + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access}; combineMetadata(I1, I2, KnownIDs); I2->eraseFromParent(); Changed = true; @@ -1165,9 +1250,9 @@ HoistTerminator: if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) return Changed; - for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + for (BasicBlock *Succ : successors(BB1)) { PHINode *PN; - for (BasicBlock::iterator BBI = SI->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); @@ -1178,7 +1263,7 @@ HoistTerminator: // eliminate undefined control flow then converting it to a select. if (passingValueIsAlwaysUndefined(BB1V, PN) || passingValueIsAlwaysUndefined(BB2V, PN)) - return Changed; + return Changed; if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) return Changed; @@ -1196,27 +1281,28 @@ HoistTerminator: NT->takeName(I1); } - IRBuilder<true, NoFolder> Builder(NT); + IRBuilder<NoFolder> Builder(NT); // Hoisting one of the terminators from our successor is a great thing. // Unfortunately, the successors of the if/else blocks may have PHI nodes in // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI // nodes, so we insert select instruction to compute the final result. - std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects; - for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; + for (BasicBlock *Succ : successors(BB1)) { PHINode *PN; - for (BasicBlock::iterator BBI = SI->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); - if (BB1V == BB2V) continue; + if (BB1V == BB2V) + continue; // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; if (!SI) - SI = cast<SelectInst> - (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, - BB1V->getName()+"."+BB2V->getName())); + SI = cast<SelectInst>( + Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName() + "." + BB2V->getName(), BI)); // Make the PHI node use the select for all incoming values for BB1/BB2 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -1226,8 +1312,8 @@ HoistTerminator: } // Update any PHI nodes in our new successors. - for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) - AddPredecessorToBlock(*SI, BIParent, BB1); + for (BasicBlock *Succ : successors(BB1)) + AddPredecessorToBlock(Succ, BIParent, BB1); EraseTerminatorInstAndDCECond(BI); return true; @@ -1280,10 +1366,12 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { RI2 = BB2->getInstList().rbegin(), RE2 = BB2->getInstList().rend(); // Skip debug info. - while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) + ++RI1; if (RI1 == RE1) return false; - while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) + ++RI2; if (RI2 == RE2) return false; // Skip the unconditional branches. @@ -1293,10 +1381,12 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { bool Changed = false; while (RI1 != RE1 && RI2 != RE2) { // Skip debug info. - while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) + ++RI1; if (RI1 == RE1) return Changed; - while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) + ++RI2; if (RI2 == RE2) return Changed; @@ -1305,22 +1395,19 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // I1 and I2 should have a single use in the same PHI node, and they // perform the same operation. // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I1) || isa<PHINode>(I2) || - isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || - I1->isEHPad() || I2->isEHPad() || + if (isa<PHINode>(I1) || isa<PHINode>(I2) || isa<TerminatorInst>(I1) || + isa<TerminatorInst>(I2) || I1->isEHPad() || I2->isEHPad() || isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || - !I1->hasOneUse() || !I2->hasOneUse() || - !JointValueMap.count(InstPair)) + !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair)) return Changed; // Check whether we should swap the operands of ICmpInst. // TODO: Add support of communativity. ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); bool SwapOpnds = false; - if (ICmp1 && ICmp2 && - ICmp1->getOperand(0) != ICmp2->getOperand(0) && + if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) && ICmp1->getOperand(1) != ICmp2->getOperand(1) && (ICmp1->getOperand(0) == ICmp2->getOperand(1) || ICmp1->getOperand(1) == ICmp2->getOperand(0))) { @@ -1343,8 +1430,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { continue; // Early exit if we have more-than one pair of different operands or if // we need a PHI node to replace a constant. - if (Op1Idx != ~0U || - isa<Constant>(I1->getOperand(I)) || + if (Op1Idx != ~0U || isa<Constant>(I1->getOperand(I)) || isa<Constant>(I2->getOperand(I))) { // If we can't sink the instructions, undo the swapping. if (SwapOpnds) @@ -1379,7 +1465,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // We need to update RE1 and RE2 if we are going to sink the first // instruction in the basic block down. - bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front()); // Sink the instruction. BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(), BB1->getInstList(), I1); @@ -1444,22 +1530,26 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, Value *StorePtr = StoreToHoist->getPointerOperand(); // Look for a store to the same pointer in BrBB. - unsigned MaxNumInstToLookAt = 10; - for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), - RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { - Instruction *CurI = &*RI; + unsigned MaxNumInstToLookAt = 9; + for (Instruction &CurI : reverse(*BrBB)) { + if (!MaxNumInstToLookAt) + break; + // Skip debug info. + if (isa<DbgInfoIntrinsic>(CurI)) + continue; + --MaxNumInstToLookAt; // Could be calling an instruction that effects memory like free(). - if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) + if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI)) return nullptr; - StoreInst *SI = dyn_cast<StoreInst>(CurI); - // Found the previous store make sure it stores to the same location. - if (SI && SI->getPointerOperand() == StorePtr) - // Found the previous store, return its value operand. - return SI->getValueOperand(); - else if (SI) + if (auto *SI = dyn_cast<StoreInst>(&CurI)) { + // Found the previous store make sure it stores to the same location. + if (SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); return nullptr; // Unknown store. + } } return nullptr; @@ -1562,11 +1652,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Do not hoist the instruction if any of its operands are defined but not // used in BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); - i != e; ++i) { + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); - if (!OpI || OpI->getParent() != BB || - OpI->mayHaveSideEffects()) + if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) continue; // Not a candidate for sinking. ++SinkCandidateUseCounts[OpI]; @@ -1576,8 +1664,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Consider any sink candidates which are only used in CondBB as costs for // speculation. Note, while we iterate over a DenseMap here, we are summing // and so iteration order isn't significant. - for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I = - SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); + for (SmallDenseMap<Instruction *, unsigned, 4>::iterator + I = SinkCandidateUseCounts.begin(), + E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->getNumUses() == I->second) { ++SpeculationCost; @@ -1613,8 +1702,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = 2 * PHINodeFoldingThreshold * - TargetTransformInfo::TCC_Basic; + unsigned MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) return false; @@ -1637,19 +1726,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Insert a select of the value of the speculated store. if (SpeculatedStoreValue) { - IRBuilder<true, NoFolder> Builder(BI); + IRBuilder<NoFolder> Builder(BI); Value *TrueV = SpeculatedStore->getValueOperand(); Value *FalseV = SpeculatedStoreValue; if (Invert) std::swap(TrueV, FalseV); - Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + - "." + FalseV->getName()); + Value *S = Builder.CreateSelect( + BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); SpeculatedStore->setOperand(0, S); } // Metadata can be dependent on the condition we are hoisting above. // Conservatively strip all metadata on the instruction. - for (auto &I: *ThenBB) + for (auto &I : *ThenBB) I.dropUnknownNonDebugMetadata(); // Hoist the instructions. @@ -1657,7 +1746,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, ThenBB->begin(), std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. - IRBuilder<true, NoFolder> Builder(BI); + IRBuilder<NoFolder> Builder(BI); for (BasicBlock::iterator I = EndBB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { unsigned OrigI = PN->getBasicBlockIndex(BB); @@ -1675,8 +1764,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, Value *TrueV = ThenV, *FalseV = OrigV; if (Invert) std::swap(TrueV, FalseV); - Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, - TrueV->getName() + "." + FalseV->getName()); + Value *V = Builder.CreateSelect( + BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); PN->setIncomingValue(OrigI, V); PN->setIncomingValue(ThenI, V); } @@ -1685,19 +1774,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return true; } -/// \returns True if this block contains a CallInst with the NoDuplicate -/// attribute. -static bool HasNoDuplicateCall(const BasicBlock *BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - const CallInst *CI = dyn_cast<CallInst>(I); - if (!CI) - continue; - if (CI->cannotDuplicate()) - return true; - } - return false; -} - /// Return true if we can thread a branch across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); @@ -1706,14 +1782,16 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa<DbgInfoIntrinsic>(BBI)) continue; - if (Size > 10) return false; // Don't clone large BB's. + if (Size > 10) + return false; // Don't clone large BB's. ++Size; // We can only support instructions that do not define values that are // live outside of the current basic block. for (User *U : BBI->users()) { Instruction *UI = cast<Instruction>(U); - if (UI->getParent() != BB || isa<PHINode>(UI)) return false; + if (UI->getParent() != BB || isa<PHINode>(UI)) + return false; } // Looks ok, continue checking. @@ -1740,32 +1818,41 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { } // Now we know that this block has multiple preds and two succs. - if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + if (!BlockIsSimpleEnoughToThreadThrough(BB)) + return false; - if (HasNoDuplicateCall(BB)) return false; + // Can't fold blocks that contain noduplicate or convergent calls. + if (llvm::any_of(*BB, [](const Instruction &I) { + const CallInst *CI = dyn_cast<CallInst>(&I); + return CI && (CI->cannotDuplicate() || CI->isConvergent()); + })) + return false; // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); - if (!CB || !CB->getType()->isIntegerTy(1)) continue; + if (!CB || !CB->getType()->isIntegerTy(1)) + continue; // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - if (RealDest == BB) continue; // Skip self loops. + if (RealDest == BB) + continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. - if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; + if (isa<IndirectBrInst>(PredBB->getTerminator())) + continue; // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting // the edge we are about to create. - BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), - RealDest->getName()+".critedge", - RealDest->getParent(), RealDest); + BasicBlock *EdgeBB = + BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", + RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); // Update PHI nodes. @@ -1775,7 +1862,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { // instructions into EdgeBB. We know that there will be no uses of the // cloned instructions outside of EdgeBB. BasicBlock::iterator InsertPt = EdgeBB->begin(); - DenseMap<Value*, Value*> TranslateMap; // Track translated values. + DenseMap<Value *, Value *> TranslateMap; // Track translated values. for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast<PHINode>(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); @@ -1783,26 +1870,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { } // Clone the instruction. Instruction *N = BBI->clone(); - if (BBI->hasName()) N->setName(BBI->getName()+".c"); + if (BBI->hasName()) + N->setName(BBI->getName() + ".c"); // Update operands due to translation. - for (User::op_iterator i = N->op_begin(), e = N->op_end(); - i != e; ++i) { - DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i); + for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { + DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i); if (PI != TranslateMap.end()) *i = PI->second; } // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, DL)) { - TranslateMap[&*BBI] = V; - delete N; // Instruction folded away, don't need actual inst + if (!BBI->use_empty()) + TranslateMap[&*BBI] = V; + if (!N->mayHaveSideEffects()) { + delete N; // Instruction folded away, don't need actual inst + N = nullptr; + } } else { - // Insert the new instruction into its new home. - EdgeBB->getInstList().insert(InsertPt, N); if (!BBI->use_empty()) TranslateMap[&*BBI] = N; } + // Insert the new instruction into its new home. + if (N) + EdgeBB->getInstList().insert(InsertPt, N); } // Loop over all of the edges from PredBB to BB, changing them to branch @@ -1852,7 +1944,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. - SmallPtrSet<Instruction*, 4> AggressiveInsts; + SmallPtrSet<Instruction *, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; MaxCostVal0 *= TargetTransformInfo::TCC_Basic; @@ -1876,7 +1968,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); - if (!PN) return true; + if (!PN) + return true; // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. @@ -1886,10 +1979,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, isa<BinaryOperator>(IfCond))) return false; - // If we all PHI nodes are promotable, check to make sure that all - // instructions in the predecessor blocks can be promoted as well. If - // not, we won't be able to get rid of the control flow, so it's not - // worth promoting to select instructions. + // If all PHI nodes are promotable, check to make sure that all instructions + // in the predecessor blocks can be promoted as well. If not, we won't be able + // to get rid of the control flow, so it's not worth promoting to select + // instructions. BasicBlock *DomBlock = nullptr; BasicBlock *IfBlock1 = PN->getIncomingBlock(0); BasicBlock *IfBlock2 = PN->getIncomingBlock(1); @@ -1897,11 +1990,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, IfBlock1 = nullptr; } else { DomBlock = *pred_begin(IfBlock1); - for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) + for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I); + ++I) if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. - // Because of this, we won't be able to get rid of the control - // flow, so the xform is not worth it. + // Because of this, we won't be able to get rid of the control flow, so + // the xform is not worth it. return false; } } @@ -1910,11 +2004,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, IfBlock2 = nullptr; } else { DomBlock = *pred_begin(IfBlock2); - for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) + for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I); + ++I) if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. - // Because of this, we won't be able to get rid of the control - // flow, so the xform is not worth it. + // Because of this, we won't be able to get rid of the control flow, so + // the xform is not worth it. return false; } } @@ -1925,7 +2020,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); - IRBuilder<true, NoFolder> Builder(InsertPt); + IRBuilder<NoFolder> Builder(InsertPt); // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. @@ -1940,13 +2035,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. - Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - SelectInst *NV = - cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); - PN->replaceAllUsesWith(NV); - NV->takeName(PN); + Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt); + PN->replaceAllUsesWith(Sel); + Sel->takeName(PN); PN->eraseFromParent(); } @@ -2029,51 +2123,32 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } else if (isa<UndefValue>(TrueValue)) { TrueValue = FalseValue; } else { - TrueValue = Builder.CreateSelect(BrCond, TrueValue, - FalseValue, "retval"); + TrueValue = + Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI); } } - Value *RI = !TrueValue ? - Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); + Value *RI = + !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); - (void) RI; + (void)RI; DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI - << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); EraseTerminatorInstAndDCECond(BI); return true; } -/// Given a conditional BranchInstruction, retrieve the probabilities of the -/// branch taking each edge. Fills in the two APInt parameters and returns true, -/// or returns false if no or invalid metadata was found. -static bool ExtractBranchMetadata(BranchInst *BI, - uint64_t &ProbTrue, uint64_t &ProbFalse) { - assert(BI->isConditional() && - "Looking for probabilities on unconditional branch?"); - MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); - if (!ProfileData || ProfileData->getNumOperands() != 3) return false; - ConstantInt *CITrue = - mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); - ConstantInt *CIFalse = - mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); - if (!CITrue || !CIFalse) return false; - ProbTrue = CITrue->getValue().getZExtValue(); - ProbFalse = CIFalse->getValue().getZExtValue(); - return true; -} - /// Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) return false; - for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) { - Instruction *PBI = &*I; + for (Instruction &I : *PB) { + Instruction *PBI = &I; // Check whether Inst and PBI generate the same value. if (Inst->isIdenticalTo(PBI)) { Inst->replaceAllUsesWith(PBI); @@ -2084,6 +2159,29 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { return false; } +/// Return true if either PBI or BI has branch weight available, and store +/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does +/// not have branch weight, use 1:1 as its weight. +static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, + uint64_t &PredTrueWeight, + uint64_t &PredFalseWeight, + uint64_t &SuccTrueWeight, + uint64_t &SuccFalseWeight) { + bool PredHasWeights = + PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); + bool SuccHasWeights = + BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight); + if (PredHasWeights || SuccHasWeights) { + if (!PredHasWeights) + PredTrueWeight = PredFalseWeight = 1; + if (!SuccHasWeights) + SuccTrueWeight = SuccFalseWeight = 1; + return true; + } else { + return false; + } +} + /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. @@ -2103,8 +2201,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { if (PBI->isConditional() && (BI->getSuccessor(0) == PBI->getSuccessor(0) || BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Curr = &*I++; if (isa<CmpInst>(Curr)) { Cond = Curr; @@ -2122,13 +2219,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) - return false; + return false; // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = ++Cond->getIterator(); // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; + while (isa<DbgInfoIntrinsic>(CondIt)) + ++CondIt; if (&*CondIt != BI) return false; @@ -2139,7 +2237,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // as "bonus instructions", and only allow this transformation when the // number of the bonus instructions does not exceed a certain threshold. unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != I; ++I) { + for (auto I = BB->begin(); Cond != &*I; ++I) { // Ignore dbg intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; @@ -2168,7 +2266,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; // Finally, don't infinitely unroll conditional loops. - BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; if (TrueDest == BB || FalseDest == BB) return false; @@ -2180,10 +2278,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - SmallVector<PHINode*, 4> PHIs; + SmallVector<PHINode *, 4> PHIs; if (!PBI || PBI->isUnconditional() || - (BI->isConditional() && - !SafeToMergeTerminators(BI, PBI)) || + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; @@ -2193,16 +2290,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { bool InvertPredCond = false; if (BI->isConditional()) { - if (PBI->getSuccessor(0) == TrueDest) + if (PBI->getSuccessor(0) == TrueDest) { Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) + } else if (PBI->getSuccessor(1) == FalseDest) { Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else + } else if (PBI->getSuccessor(0) == FalseDest) { + Opc = Instruction::And; + InvertPredCond = true; + } else if (PBI->getSuccessor(1) == TrueDest) { + Opc = Instruction::Or; + InvertPredCond = true; + } else { continue; + } } else { if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) continue; @@ -2219,8 +2319,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, - PBI->getCondition()->getName()+".not"); + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); } PBI->setCondition(NewCond); @@ -2234,12 +2334,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // We already make sure Cond is the last instruction before BI. Therefore, // all instructions before Cond other than DbgInfoIntrinsic are bonus // instructions. - for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) { + for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { if (isa<DbgInfoIntrinsic>(BonusInst)) continue; Instruction *NewBonusInst = BonusInst->clone(); RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); VMap[&*BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about @@ -2258,49 +2358,49 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // two conditions together. Instruction *New = Cond->clone(); RemapInstruction(New, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); PredBlock->getInstList().insert(PBI->getIterator(), New); New->takeName(Cond); Cond->setName(New->getName() + ".old"); if (BI->isConditional()) { - Instruction *NewCond = - cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), - New, "or.cond")); + Instruction *NewCond = cast<Instruction>( + Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, - PredFalseWeight); - bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, - SuccFalseWeight); + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); SmallVector<uint64_t, 8> NewWeights; if (PBI->getSuccessor(0) == BB) { - if (PredHasWeights && SuccHasWeights) { + if (HasWeights) { // PBI: br i1 %x, BB, FalseDest // BI: br i1 %y, TrueDest, FalseDest - //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + // TrueWeight for PBI * FalseWeight for BI. // We assume that total weights of a BranchInst can fit into 32 bits. // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + - SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); } AddPredecessorToBlock(TrueDest, PredBlock, BB); PBI->setSuccessor(0, TrueDest); } if (PBI->getSuccessor(1) == BB) { - if (PredHasWeights && SuccHasWeights) { + if (HasWeights) { // PBI: br i1 %x, TrueDest, BB // BI: br i1 %y, TrueDest, FalseDest - //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + - SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); - //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredTrueWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. NewWeights.push_back(PredFalseWeight * SuccFalseWeight); } AddPredecessorToBlock(FalseDest, PredBlock, BB); @@ -2310,51 +2410,42 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); - SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end()); - PBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BI->getContext()). - createBranchWeights(MDWeights)); + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), + NewWeights.end()); + PBI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(BI->getContext()).createBranchWeights(MDWeights)); } else PBI->setMetadata(LLVMContext::MD_prof, nullptr); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { ConstantInt *PBI_C = cast<ConstantInt>( - PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); assert(PBI_C->getType()->isIntegerTy(1)); Instruction *MergedCond = nullptr; if (PBI->getSuccessor(0) == TrueDest) { // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) // is false: !PBI_Cond and BI_Value - Instruction *NotCond = - cast<Instruction>(Builder.CreateNot(PBI->getCondition(), - "not.cond")); - MergedCond = - cast<Instruction>(Builder.CreateBinOp(Instruction::And, - NotCond, New, - "and.cond")); + Instruction *NotCond = cast<Instruction>( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast<Instruction>( + Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond")); if (PBI_C->isOne()) - MergedCond = - cast<Instruction>(Builder.CreateBinOp(Instruction::Or, - PBI->getCondition(), MergedCond, - "or.cond")); + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); } else { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = - cast<Instruction>(Builder.CreateBinOp(Instruction::And, - PBI->getCondition(), New, - "and.cond")); + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::And, PBI->getCondition(), New, "and.cond")); if (PBI_C->isOne()) { - Instruction *NotCond = - cast<Instruction>(Builder.CreateNot(PBI->getCondition(), - "not.cond")); - MergedCond = - cast<Instruction>(Builder.CreateBinOp(Instruction::Or, - NotCond, MergedCond, - "or.cond")); + Instruction *NotCond = cast<Instruction>( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast<Instruction>(Builder.CreateBinOp( + Instruction::Or, NotCond, MergedCond, "or.cond")); } } // Update PHI Node. @@ -2371,9 +2462,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // could replace PBI's branch probabilities with BI's. // Copy any debug value intrinsics into the end of PredBlock. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (isa<DbgInfoIntrinsic>(*I)) - I->clone()->insertBefore(PBI); + for (Instruction &I : *BB) + if (isa<DbgInfoIntrinsic>(I)) + I.clone()->insertBefore(PBI); return true; } @@ -2417,7 +2508,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, // where OtherBB is the single other predecessor of BB's only successor. PHINode *PHI = nullptr; BasicBlock *Succ = BB->getSingleSuccessor(); - + for (auto I = Succ->begin(); isa<PHINode>(I); ++I) if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) { PHI = cast<PHINode>(I); @@ -2443,8 +2534,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, PHI->addIncoming(V, BB); for (BasicBlock *PredBB : predecessors(Succ)) if (PredBB != BB) - PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()), - PredBB); + PHI->addIncoming( + AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); return PHI; } @@ -2481,10 +2572,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, return N <= PHINodeFoldingThreshold; }; - if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) || - !IsWorthwhile(PFB) || - !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || + !IsWorthwhile(QFB))) return false; // For every pointer, there must be exactly two stores, one coming from @@ -2561,7 +2651,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, QStore->eraseFromParent(); PStore->eraseFromParent(); - + return true; } @@ -2593,7 +2683,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { // We model triangles as a type of diamond with a nullptr "true" block. // Triangles are canonicalized so that the fallthrough edge is represented by // a true condition, as in the diagram above. - // + // BasicBlock *PTB = PBI->getSuccessor(0); BasicBlock *PFB = PBI->getSuccessor(1); BasicBlock *QTB = QBI->getSuccessor(0); @@ -2622,8 +2712,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { // the post-dominating block, and the non-fallthroughs must only have one // predecessor. auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { - return BB->getSinglePredecessor() == P && - BB->getSingleSuccessor() == S; + return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; if (!PostBB || !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || @@ -2637,7 +2726,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { // OK, this is a sequence of two diamonds or triangles. // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. - SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses; + SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses; for (auto *BB : {PTB, PFB}) { if (!BB) continue; @@ -2652,7 +2741,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { if (StoreInst *SI = dyn_cast<StoreInst>(&I)) QStoreAddresses.insert(SI->getPointerOperand()); } - + set_intersect(PStoreAddresses, QStoreAddresses); // set_intersect mutates PStoreAddresses in place. Rename it here to make it // clear what it contains. @@ -2684,9 +2773,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - CondIsTrue)); - return true; // Nuke the branch on constant. + BI->setCondition( + ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); + return true; // Nuke the branch on constant. } // Otherwise, if there are multiple predecessors, insert a PHI that merges @@ -2702,13 +2791,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Any predecessor where the condition is not computable we keep symbolic. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; - if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && - PBI != BI && PBI->isConditional() && - PBI->getCondition() == BI->getCondition() && + if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && + PBI->isConditional() && PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - CondIsTrue), P); + NewPN->addIncoming( + ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), + P); } else { NewPN->addIncoming(BI->getCondition(), P); } @@ -2723,19 +2812,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (CE->canTrap()) return false; - // If BI is reached from the true path of PBI and PBI's condition implies - // BI's condition, we know the direction of the BI branch. - if (PBI->getSuccessor(0) == BI->getParent() && - isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) && - PBI->getSuccessor(0) != PBI->getSuccessor(1) && - BB->getSinglePredecessor()) { - // Turn this into a branch on constant. - auto *OldCond = BI->getCondition(); - BI->setCondition(ConstantInt::getTrue(BB->getContext())); - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return true; // Nuke the branch on constant. - } - // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. @@ -2753,16 +2829,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, return false; int PBIOp, BIOp; - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - PBIOp = BIOp = 0; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - PBIOp = 0, BIOp = 1; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - PBIOp = 1, BIOp = 0; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - PBIOp = BIOp = 1; - else + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + PBIOp = 0; + BIOp = 0; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + PBIOp = 0; + BIOp = 1; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + PBIOp = 1; + BIOp = 0; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + PBIOp = 1; + BIOp = 1; + } else { return false; + } // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will @@ -2780,8 +2861,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); unsigned NumPhis = 0; - for (BasicBlock::iterator II = CommonDest->begin(); - isa<PHINode>(II); ++II, ++NumPhis) { + for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); + ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; @@ -2804,7 +2885,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2815,8 +2895,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (OtherDest == BB) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), - "infloop", BB->getParent()); + BasicBlock *InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; } @@ -2828,13 +2908,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); - IRBuilder<true, NoFolder> Builder(PBI); + IRBuilder<NoFolder> Builder(PBI); if (PBIOp) - PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not"); + PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not"); Value *BICond = BI->getCondition(); if (BIOp) - BICond = Builder.CreateNot(BICond, BICond->getName()+".not"); + BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); @@ -2846,15 +2926,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, - PredFalseWeight); - bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, - SuccFalseWeight); - if (PredHasWeights && SuccHasWeights) { - uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; - uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; - uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; - uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + uint64_t PredCommon, PredOther, SuccCommon, SuccOther; + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); + if (HasWeights) { + PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; // The weight to CommonDest should be PredCommon * SuccTotal + // PredOther * SuccCommon. // The weight to OtherDest should be PredOther * SuccOther. @@ -2885,9 +2965,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, Value *PBIV = PN->getIncomingValue(PBBIdx); if (BIV != PBIV) { // Insert a select in PBI to pick the right value. - Value *NV = cast<SelectInst> - (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux")); + SelectInst *NV = cast<SelectInst>( + Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); PN->setIncomingValue(PBBIdx, NV); + // Although the select has the same condition as PBI, the original branch + // weights for PBI do not apply to the new select because the select's + // 'logical' edges are incoming edges of the phi that is eliminated, not + // the outgoing edges of PBI. + if (HasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to PredCommonDest should be PredCommon * SuccTotal. + // The weight to PredOtherDest should be PredOther * SuccCommon. + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), + PredOther * SuccCommon}; + + FitWeights(NewWeights); + + NV->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()) + .createBranchWeights(NewWeights[0], NewWeights[1])); + } } } @@ -2907,7 +3007,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, uint32_t TrueWeight, - uint32_t FalseWeight){ + uint32_t FalseWeight) { // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -2942,8 +3042,8 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); if (TrueWeight != FalseWeight) NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(OldTerm->getContext()). - createBranchWeights(TrueWeight, FalseWeight)); + MDBuilder(OldTerm->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this @@ -2988,16 +3088,16 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { if (HasWeights) { GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { - TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). - getSuccessorIndex()]; - FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). - getSuccessorIndex()]; + TrueWeight = + (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()]; + FalseWeight = + (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()]; } } // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, - TrueWeight, FalseWeight); + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight, + FalseWeight); } // Replaces @@ -3017,8 +3117,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, - 0, 0); + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, + 0); } /// This is called when we find an icmp instruction @@ -3046,7 +3146,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // If the block has any PHIs in it or the icmp has multiple uses, it is too // complex. - if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false; + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) + return false; Value *V = ICI->getOperand(0); ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); @@ -3055,7 +3156,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); - if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false; + if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) + return false; SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) @@ -3104,7 +3206,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // If the icmp is a SETEQ, then the default dest gets false, the new edge gets // true in the PHI. Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); - Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) std::swap(DefaultCst, NewCst); @@ -3116,21 +3218,21 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // Okay, the switch goes to this block on a default value. Add an edge from // the switch to the merge point on the compared value. - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", - BB->getParent(), BB); + BasicBlock *NewBB = + BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); SmallVector<uint64_t, 8> Weights; bool HasWeights = HasBranchWeights(SI); if (HasWeights) { GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { // Split weight for default case to case for "Cst". - Weights[0] = (Weights[0]+1) >> 1; + Weights[0] = (Weights[0] + 1) >> 1; Weights.push_back(Weights[0]); SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getContext()). - createBranchWeights(MDWeights)); + SI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(SI->getContext()).createBranchWeights(MDWeights)); } } SI->addCase(Cst, NewBB); @@ -3149,7 +3251,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, const DataLayout &DL) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (!Cond) return false; + if (!Cond) + return false; // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of @@ -3158,13 +3261,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // Try to gather values from a chain of and/or to be turned into a switch ConstantComparesGatherer ConstantCompare(Cond, DL); // Unpack the result - SmallVectorImpl<ConstantInt*> &Values = ConstantCompare.Vals; + SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals; Value *CompVal = ConstantCompare.CompValue; unsigned UsedICmps = ConstantCompare.UsedICmps; Value *ExtraCase = ConstantCompare.Extra; // If we didn't have a multiply compared value, fail. - if (!CompVal) return false; + if (!CompVal) + return false; // Avoid turning single icmps into a switch. if (UsedICmps <= 1) @@ -3179,20 +3283,23 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just a conditional branch. - if (ExtraCase && Values.size() < 2) return false; + if (ExtraCase && Values.size() < 2) + return false; // TODO: Preserve branch weight metadata, similarly to how // FoldValueComparisonIntoPredecessors preserves it. // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); - BasicBlock *EdgeBB = BI->getSuccessor(0); - if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) + std::swap(DefaultBB, EdgeBB); BasicBlock *BB = BI->getParent(); DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() - << " cases into SWITCH. BB is:\n" << *BB); + << " cases into SWITCH. BB is:\n" + << *BB); // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block @@ -3216,7 +3323,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, AddPredecessorToBlock(EdgeBB, BB, NewBB); DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase - << "\nEXTRABB = " << *BB); + << "\nEXTRABB = " << *BB); BB = NewBB; } @@ -3237,11 +3344,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. - for (BasicBlock::iterator BBI = EdgeBB->begin(); - isa<PHINode>(BBI); ++BBI) { + for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) { PHINode *PN = cast<PHINode>(BBI); Value *InVal = PN->getIncomingValueForBlock(BB); - for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + for (unsigned i = 0, e = Values.size() - 1; i != e; ++i) PN->addIncoming(InVal, BB); } @@ -3270,7 +3376,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { // Check that there are no other instructions except for debug intrinsics // between the phi of landing pads (RI->getValue()) and resume instruction. BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), - E = RI->getIterator(); + E = RI->getIterator(); while (++I != E) if (!isa<DbgInfoIntrinsic>(I)) return false; @@ -3279,8 +3385,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { auto *PhiLPInst = cast<PHINode>(RI->getValue()); // Check incoming blocks to see if any of them are trivial. - for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); - Idx != End; Idx++) { + for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End; + Idx++) { auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); @@ -3289,8 +3395,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (IncomingBB->getUniqueSuccessor() != BB) continue; - auto *LandingPad = - dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); + auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); // Not the landing pad that caused the control to branch here. if (IncomingValue != LandingPad) continue; @@ -3310,7 +3415,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { } // If no trivial unwind blocks, don't do any simplifications. - if (TrivialUnwindBlocks.empty()) return false; + if (TrivialUnwindBlocks.empty()) + return false; // Turn all invokes that unwind here into calls. for (auto *TrivialBB : TrivialUnwindBlocks) { @@ -3346,8 +3452,8 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); - assert (RI->getValue() == LPInst && - "Resume must unwind the exception that caused control to here"); + assert(RI->getValue() == LPInst && + "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); @@ -3363,10 +3469,12 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { // The landingpad is now unreachable. Zap it. BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); return true; } -bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { +static bool removeEmptyCleanup(CleanupReturnInst *RI) { // If this is a trivial cleanup pad that executes no instructions, it can be // eliminated. If the cleanup pad continues to the caller, any predecessor // that is an EH pad will be updated to continue to the caller and any @@ -3381,12 +3489,29 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { // This isn't an empty cleanup. return false; - // Check that there are no other instructions except for debug intrinsics. + // We cannot kill the pad if it has multiple uses. This typically arises + // from unreachable basic blocks. + if (!CPInst->hasOneUse()) + return false; + + // Check that there are no other instructions except for benign intrinsics. BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) + while (++I != E) { + auto *II = dyn_cast<IntrinsicInst>(I); + if (!II) return false; + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + // If the cleanup return we are simplifying unwinds to the caller, this will // set UnwindDest to nullptr. BasicBlock *UnwindDest = RI->getUnwindDest(); @@ -3430,7 +3555,7 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { // removing, we need to merge that PHI node's incoming values into // DestPN. for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); - SrcIdx != SrcE; ++SrcIdx) { + SrcIdx != SrcE; ++SrcIdx) { DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), SrcPN->getIncomingBlock(SrcIdx)); } @@ -3484,13 +3609,63 @@ bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { return true; } +// Try to merge two cleanuppads together. +static bool mergeCleanupPad(CleanupReturnInst *RI) { + // Skip any cleanuprets which unwind to caller, there is nothing to merge + // with. + BasicBlock *UnwindDest = RI->getUnwindDest(); + if (!UnwindDest) + return false; + + // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't + // be safe to merge without code duplication. + if (UnwindDest->getSinglePredecessor() != RI->getParent()) + return false; + + // Verify that our cleanuppad's unwind destination is another cleanuppad. + auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front()); + if (!SuccessorCleanupPad) + return false; + + CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad(); + // Replace any uses of the successor cleanupad with the predecessor pad + // The only cleanuppad uses should be this cleanupret, it's cleanupret and + // funclet bundle operands. + SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad); + // Remove the old cleanuppad. + SuccessorCleanupPad->eraseFromParent(); + // Now, we simply replace the cleanupret with a branch to the unwind + // destination. + BranchInst::Create(UnwindDest, RI->getParent()); + RI->eraseFromParent(); + + return true; +} + +bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { + // It is possible to transiantly have an undef cleanuppad operand because we + // have deleted some, but not all, dead blocks. + // Eventually, this block will be deleted. + if (isa<UndefValue>(RI->getOperand(0))) + return false; + + if (mergeCleanupPad(RI)) + return true; + + if (removeEmptyCleanup(RI)) + return true; + + return false; +} + bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); - if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) + return false; // Find predecessors that end with branches. - SmallVector<BasicBlock*, 8> UncondBranchPreds; - SmallVector<BranchInst*, 8> CondBranchPreds; + SmallVector<BasicBlock *, 8> UncondBranchPreds; + SmallVector<BranchInst *, 8> CondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; TerminatorInst *PTI = P->getTerminator(); @@ -3507,14 +3682,17 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); + << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } // If we eliminated all predecessors of the block, delete the block now. - if (pred_empty(BB)) + if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); + } return true; } @@ -3547,7 +3725,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // Do not delete instructions that can have side effects which might cause // the unreachable to not be reachable; specifically, calls and volatile // operations may have this effect. - if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; + if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) + break; if (BBI->mayHaveSideEffects()) { if (auto *SI = dyn_cast<StoreInst>(BBI)) { @@ -3589,9 +3768,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. - if (&BB->front() != UI) return Changed; + if (&BB->front() != UI) + return Changed; - SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); IRBuilder<> Builder(TI); @@ -3613,12 +3793,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) if (i.getCaseSuccessor() == BB) { BB->removePredecessor(SI->getParent()); SI->removeCase(i); - --i; --e; + --i; + --e; Changed = true; } } else if (auto *II = dyn_cast<InvokeInst>(TI)) { @@ -3667,10 +3848,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } // If this block is now dead, remove it. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) { + if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. BB->eraseFromParent(); + if (LoopHeaders) + LoopHeaders->erase(BB); return true; } @@ -3699,25 +3881,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; - SmallVector <ConstantInt *, 16> CasesA; - SmallVector <ConstantInt *, 16> CasesB; + SmallVector<ConstantInt *, 16> CasesA; + SmallVector<ConstantInt *, 16> CasesB; for (SwitchInst::CaseIt I : SI->cases()) { BasicBlock *Dest = I.getCaseSuccessor(); - if (!DestA) DestA = Dest; + if (!DestA) + DestA = Dest; if (Dest == DestA) { CasesA.push_back(I.getCaseValue()); continue; } - if (!DestB) DestB = Dest; + if (!DestB) + DestB = Dest; if (Dest == DestB) { CasesB.push_back(I.getCaseValue()); continue; } - return false; // More than two destinations. + return false; // More than two destinations. } - assert(DestA && DestB && "Single-destination switch should have been folded."); + assert(DestA && DestB && + "Single-destination switch should have been folded."); assert(DestA != DestB); assert(DestB != SI->getDefaultDest()); assert(!CasesB.empty() && "There must be non-default cases."); @@ -3741,7 +3926,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { // Start building the compare and branch. Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); - Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size()); + Constant *NumCases = + ConstantInt::get(Offset->getType(), ContiguousCases->size()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) @@ -3773,21 +3959,24 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { FalseWeight /= 2; } NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getContext()).createBranchWeights( - (uint32_t)TrueWeight, (uint32_t)FalseWeight)); + MDBuilder(SI->getContext()) + .createBranchWeights((uint32_t)TrueWeight, + (uint32_t)FalseWeight)); } } // Prune obsolete incoming values off the successors' PHI nodes. for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { unsigned PreviousEdges = ContiguousCases->size(); - if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges; + if (ContiguousDest == SI->getDefaultDest()) + ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); - if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; + if (OtherDest == SI->getDefaultDest()) + ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } @@ -3807,32 +3996,38 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, APInt KnownZero(Bits, 0), KnownOne(Bits, 0); computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + // We can also eliminate cases by determining that their values are outside of + // the limited range of the condition based on how many significant (non-sign) + // bits are in the condition value. + unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; + unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; + // Gather dead cases. - SmallVector<ConstantInt*, 8> DeadCases; - for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { - if ((I.getCaseValue()->getValue() & KnownZero) != 0 || - (I.getCaseValue()->getValue() & KnownOne) != KnownOne) { - DeadCases.push_back(I.getCaseValue()); - DEBUG(dbgs() << "SimplifyCFG: switch case '" - << I.getCaseValue() << "' is dead.\n"); + SmallVector<ConstantInt *, 8> DeadCases; + for (auto &Case : SI->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { + DeadCases.push_back(Case.getCaseValue()); + DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } } - // If we can prove that the cases must cover all possible values, the - // default destination becomes dead and we can remove it. If we know some + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some // of the bits in the value, we can use that to more precisely compute the // number of possible unique case values. bool HasDefault = - !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const unsigned NumUnknownBits = Bits - - (KnownZero.Or(KnownOne)).countPopulation(); + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const unsigned NumUnknownBits = + Bits - (KnownZero.Or(KnownOne)).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && + NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(), - SI->getParent(), ""); + BasicBlock *NewDefault = + SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); SI->setDefaultDest(&*NewDefault); SplitBlock(&*NewDefault, &NewDefault->front()); auto *OldTI = NewDefault->getTerminator(); @@ -3849,12 +4044,12 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, } // Remove dead cases from the switch. - for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { - SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); + for (ConstantInt *DeadCase : DeadCases) { + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase); assert(Case != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); if (HasWeight) { - std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + std::swap(Weights[Case.getCaseIndex() + 1], Weights.back()); Weights.pop_back(); } @@ -3865,8 +4060,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasWeight && Weights.size() >= 2) { SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getParent()->getContext()). - createBranchWeights(MDWeights)); + MDBuilder(SI->getParent()->getContext()) + .createBranchWeights(MDWeights)); } return !DeadCases.empty(); @@ -3878,8 +4073,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, /// block and see if the incoming value is equal to CaseValue. If so, return /// the phi node, and set PhiIndex to BB's index in the phi node. static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, - BasicBlock *BB, - int *PhiIndex) { + BasicBlock *BB, int *PhiIndex) { if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) return nullptr; // BB must be empty to be a candidate for simplification. if (!BB->getSinglePredecessor()) @@ -3897,7 +4091,8 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, assert(Idx >= 0 && "PHI has no entry for predecessor?"); Value *InValue = PHI->getIncomingValue(Idx); - if (InValue != CaseValue) continue; + if (InValue != CaseValue) + continue; *PhiIndex = Idx; return PHI; @@ -3911,17 +4106,19 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, /// blocks of the switch can be folded away. /// Returns true if a change is made. static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { - typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; + typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap; ForwardingNodesMap ForwardingNodes; - for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; + ++I) { ConstantInt *CaseValue = I.getCaseValue(); BasicBlock *CaseDest = I.getCaseSuccessor(); int PhiIndex; - PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, - &PhiIndex); - if (!PHI) continue; + PHINode *PHI = + FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex); + if (!PHI) + continue; ForwardingNodes[PHI].push_back(PhiIndex); } @@ -3929,11 +4126,13 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { bool Changed = false; for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), - E = ForwardingNodes.end(); I != E; ++I) { + E = ForwardingNodes.end(); + I != E; ++I) { PHINode *Phi = I->first; SmallVectorImpl<int> &Indexes = I->second; - if (Indexes.size() < 2) continue; + if (Indexes.size() < 2) + continue; for (size_t I = 0, E = Indexes.size(); I != E; ++I) Phi->setIncomingValue(Indexes[I], SI->getCondition()); @@ -3954,17 +4153,16 @@ static bool ValidLookupTableConstant(Constant *C) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) return CE->isGEPWithNoNotionalOverIndexing(); - return isa<ConstantFP>(C) || - isa<ConstantInt>(C) || - isa<ConstantPointerNull>(C) || - isa<GlobalValue>(C) || - isa<UndefValue>(C); + return isa<ConstantFP>(C) || isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || isa<GlobalValue>(C) || + isa<UndefValue>(C); } /// If V is a Constant, return it. Otherwise, try to look up /// its constant value in ConstantPool, returning 0 if it's not there. -static Constant *LookupConstant(Value *V, - const SmallDenseMap<Value*, Constant*>& ConstantPool) { +static Constant * +LookupConstant(Value *V, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { if (Constant *C = dyn_cast<Constant>(V)) return C; return ConstantPool.lookup(V); @@ -4001,7 +4199,7 @@ ConstantFold(Instruction *I, const DataLayout &DL, COps[1], DL); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); + return ConstantFoldInstOperands(I, COps, DL); } /// Try to determine the resulting constant values in phi nodes @@ -4018,7 +4216,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, // If CaseDest is empty except for some side-effect free instructions through // which we can constant-propagate the CaseVal, continue to its successor. - SmallDenseMap<Value*, Constant*> ConstantPool; + SmallDenseMap<Value *, Constant *> ConstantPool; ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; ++I) { @@ -4068,8 +4266,8 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, if (Idx == -1) continue; - Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), - ConstantPool); + Constant *ConstVal = + LookupConstant(PHI->getIncomingValue(Idx), ConstantPool); if (!ConstVal) return false; @@ -4086,16 +4284,16 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, // Helper function used to add CaseVal to the list of cases that generate // Result. static void MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); return; } } - UniqueResults.push_back(std::make_pair(Result, - SmallVector<ConstantInt*, 4>(1, CaseVal))); + UniqueResults.push_back( + std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); } // Helper function that initializes a map containing @@ -4137,7 +4335,7 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, DefaultResult = DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr; if ((!DefaultResult && - !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()))) + !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()))) return false; return true; @@ -4154,12 +4352,11 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, // default: // return 4; // } -static Value * -ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, - Constant *DefaultResult, Value *Condition, - IRBuilder<> &Builder) { +static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, + Constant *DefaultResult, Value *Condition, + IRBuilder<> &Builder) { assert(ResultVector.size() == 2 && - "We should have exactly two unique results at this point"); + "We should have exactly two unique results at this point"); // If we are selecting between only two cases transform into a simple // select or a two-way select if default is possible. if (ResultVector[0].second.size() == 1 && @@ -4177,8 +4374,8 @@ ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, } Value *const ValueCompare = Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); - return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue, - "switch.select"); + return Builder.CreateSelect(ValueCompare, ResultVector[0].first, + SelectValue, "switch.select"); } return nullptr; @@ -4227,9 +4424,8 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, assert(PHI != nullptr && "PHI for value select not found"); Builder.SetInsertPoint(SI); - Value *SelectValue = ConvertTwoCaseSwitch( - UniqueResults, - DefaultResult, Cond, Builder); + Value *SelectValue = + ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); if (SelectValue) { RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder); return true; @@ -4239,62 +4435,62 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, } namespace { - /// This class represents a lookup table that can be used to replace a switch. - class SwitchLookupTable { - public: - /// Create a lookup table to use as a switch replacement with the contents - /// of Values, using DefaultValue to fill any holes in the table. - SwitchLookupTable( - Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL); - - /// Build instructions with Builder to retrieve the value at - /// the position given by Index in the lookup table. - Value *BuildLookup(Value *Index, IRBuilder<> &Builder); - - /// Return true if a table with TableSize elements of - /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - Type *ElementType); - - private: - // Depending on the contents of the table, it can be represented in - // different ways. - enum { - // For tables where each element contains the same value, we just have to - // store that single value and return it for each lookup. - SingleValueKind, - - // For tables where there is a linear relationship between table index - // and values. We calculate the result with a simple multiplication - // and addition instead of a table lookup. - LinearMapKind, - - // For small tables with integer elements, we can pack them into a bitmap - // that fits into a target-legal register. Values are retrieved by - // shift and mask operations. - BitMapKind, - - // The table is stored as an array of values. Values are retrieved by load - // instructions from the table. - ArrayKind - } Kind; - - // For SingleValueKind, this is the single value. - Constant *SingleValue; - - // For BitMapKind, this is the bitmap. - ConstantInt *BitMap; - IntegerType *BitMapElementTy; - - // For LinearMapKind, these are the constants used to derive the value. - ConstantInt *LinearOffset; - ConstantInt *LinearMultiplier; - - // For ArrayKind, this is the array. - GlobalVariable *Array; - }; +/// This class represents a lookup table that can be used to replace a switch. +class SwitchLookupTable { +public: + /// Create a lookup table to use as a switch replacement with the contents + /// of Values, using DefaultValue to fill any holes in the table. + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL); + + /// Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, + Type *ElementType); + +private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For tables where there is a linear relationship between table index + // and values. We calculate the result with a simple multiplication + // and addition instead of a table lookup. + LinearMapKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For LinearMapKind, these are the constants used to derive the value. + ConstantInt *LinearOffset; + ConstantInt *LinearMultiplier; + + // For ArrayKind, this is the array. + GlobalVariable *Array; +}; } SwitchLookupTable::SwitchLookupTable( @@ -4312,14 +4508,13 @@ SwitchLookupTable::SwitchLookupTable( Type *ValueType = Values.begin()->second->getType(); // Build up the table contents. - SmallVector<Constant*, 64> TableContents(TableSize); + SmallVector<Constant *, 64> TableContents(TableSize); for (size_t I = 0, E = Values.size(); I != E; ++I) { ConstantInt *CaseVal = Values[I].first; Constant *CaseRes = Values[I].second; assert(CaseRes->getType() == ValueType); - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) - .getLimitedValue(); + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) @@ -4407,65 +4602,62 @@ SwitchLookupTable::SwitchLookupTable( ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, + Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, + GlobalVariable::PrivateLinkage, Initializer, "switch.table"); - Array->setUnnamedAddr(true); + Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); Kind = ArrayKind; } Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { switch (Kind) { - case SingleValueKind: - return SingleValue; - case LinearMapKind: { - // Derive the result value from the input value. - Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), - false, "switch.idx.cast"); - if (!LinearMultiplier->isOne()) - Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); - if (!LinearOffset->isZero()) - Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); - return Result; - } - case BitMapKind: { - // Type of the bitmap (e.g. i59). - IntegerType *MapTy = BitMap->getType(); - - // Cast Index to the same type as the bitmap. - // Note: The Index is <= the number of elements in the table, so - // truncating it to the width of the bitmask is safe. - Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); - - // Multiply the shift amount by the element width. - ShiftAmt = Builder.CreateMul(ShiftAmt, - ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), - "switch.shiftamt"); - - // Shift down. - Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, - "switch.downshift"); - // Mask off. - return Builder.CreateTrunc(DownShifted, BitMapElementTy, - "switch.masked"); - } - case ArrayKind: { - // Make sure the table index will not overflow when treated as signed. - IntegerType *IT = cast<IntegerType>(Index->getType()); - uint64_t TableSize = Array->getInitializer()->getType() - ->getArrayNumElements(); - if (TableSize > (1ULL << (IT->getBitWidth() - 1))) - Index = Builder.CreateZExt(Index, - IntegerType::get(IT->getContext(), - IT->getBitWidth() + 1), - "switch.tableidx.zext"); - - Value *GEPIndices[] = { Builder.getInt32(0), Index }; - Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, - GEPIndices, "switch.gep"); - return Builder.CreateLoad(GEP, "switch.load"); - } + case SingleValueKind: + return SingleValue; + case LinearMapKind: { + // Derive the result value from the input value. + Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), + false, "switch.idx.cast"); + if (!LinearMultiplier->isOne()) + Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); + if (!LinearOffset->isZero()) + Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); + return Result; + } + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul( + ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = + Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); + } + case ArrayKind: { + // Make sure the table index will not overflow when treated as signed. + IntegerType *IT = cast<IntegerType>(Index->getType()); + uint64_t TableSize = + Array->getInitializer()->getType()->getArrayNumElements(); + if (TableSize > (1ULL << (IT->getBitWidth() - 1))) + Index = Builder.CreateZExt( + Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), + "switch.tableidx.zext"); + + Value *GEPIndices[] = {Builder.getInt32(0), Index}; + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } } llvm_unreachable("Unknown lookup table kind!"); } @@ -4480,7 +4672,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, // are <= 15, we could try to narrow the type. // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. - if (TableSize >= UINT_MAX/IT->getBitWidth()) + if (TableSize >= UINT_MAX / IT->getBitWidth()) return false; return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } @@ -4503,8 +4695,9 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); // Saturate this flag to false. - AllTablesFitInRegister = AllTablesFitInRegister && - SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); + AllTablesFitInRegister = + AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); // If both flags saturate, we're done. NOTE: This *only* works with // saturating flags, and all flags have to saturate first due to the @@ -4547,9 +4740,10 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, /// ... /// \endcode /// Jump threading will then eliminate the second if(cond). -static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, - BranchInst *RangeCheckBranch, Constant *DefaultValue, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values) { +static void reuseTableCompare( + User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, + Constant *DefaultValue, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); if (!CmpInst) @@ -4578,13 +4772,13 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, // compare result. for (auto ValuePair : Values) { Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), - ValuePair.second, CmpOp1, true); + ValuePair.second, CmpOp1, true); if (!CaseConst || CaseConst == DefaultConst) return; assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); } - + // Check if the branch instruction dominates the phi node. It's a simple // dominance check, but sufficient for our needs. // Although this check is invariant in the calling loops, it's better to do it @@ -4602,9 +4796,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, ++NumTableCmpReuses; } else { // The compare yields the same result, just inverted. We can replace it. - Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp, - ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", - RangeCheckBranch); + Value *InvertedTableCmp = BinaryOperator::CreateXor( + RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", + RangeCheckBranch); CmpInst->replaceAllUsesWith(InvertedTableCmp); ++NumTableCmpReuses; } @@ -4629,7 +4823,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore switches with less than three cases. Lookup tables will not make them + // Ignore switches with less than three cases. Lookup tables will not make + // them // faster, so we don't analyze them. if (SI->getNumCases() < 3) return false; @@ -4642,11 +4837,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, ConstantInt *MaxCaseVal = CI.getCaseValue(); BasicBlock *CommonDest = nullptr; - typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; - SmallDenseMap<PHINode*, ResultListTy> ResultLists; - SmallDenseMap<PHINode*, Constant*> DefaultResults; - SmallDenseMap<PHINode*, Type*> ResultTypes; - SmallVector<PHINode*, 4> PHIs; + typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy; + SmallDenseMap<PHINode *, ResultListTy> ResultLists; + SmallDenseMap<PHINode *, Constant *> DefaultResults; + SmallDenseMap<PHINode *, Type *> ResultTypes; + SmallVector<PHINode *, 4> PHIs; for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { ConstantInt *CaseVal = CI.getCaseValue(); @@ -4656,7 +4851,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. - typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, Results, DL)) @@ -4684,14 +4879,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. - SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. - if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). + if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). return false; if (!DL.fitsInLegalInteger(TableSize)) return false; @@ -4708,15 +4903,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); - BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), - "switch.lookup", - CommonDest->getParent(), - CommonDest); + BasicBlock *LookupBB = BasicBlock::Create( + Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); // Compute the table index value. Builder.SetInsertPoint(SI); - Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, - "switch.tableidx"); + Value *TableIndex = + Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -4739,9 +4932,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { - Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( - MinCaseVal->getType(), TableSize)); - RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + Value *Cmp = Builder.CreateICmpULT( + TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); + RangeCheckBranch = + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); } // Populate the BB that does the lookups. @@ -4753,10 +4947,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // and we create a new LookupBB. BasicBlock *MaskBB = LookupBB; MaskBB->setName("switch.hole_check"); - LookupBB = BasicBlock::Create(Mod.getContext(), - "switch.lookup", - CommonDest->getParent(), - CommonDest); + LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", + CommonDest->getParent(), CommonDest); // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid // unnecessary illegal types. @@ -4766,8 +4958,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Build bitmask; fill in a 1 bit for every case. const ResultListTy &ResultList = ResultLists[PHIs[0]]; for (size_t I = 0, E = ResultList.size(); I != E; ++I) { - uint64_t Idx = (ResultList[I].first->getValue() - - MinCaseVal->getValue()).getLimitedValue(); + uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue()) + .getLimitedValue(); MaskInt |= One << Idx; } ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); @@ -4776,13 +4968,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If this bit is 0 (meaning hole) jump to the default destination, // else continue with table lookup. IntegerType *MapTy = TableMask->getType(); - Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy, - "switch.maskindex"); - Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, - "switch.shifted"); - Value *LoBit = Builder.CreateTrunc(Shifted, - Type::getInt1Ty(Mod.getContext()), - "switch.lobit"); + Value *MaskIndex = + Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); + Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); + Value *LoBit = Builder.CreateTrunc( + Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); Builder.SetInsertPoint(LookupBB); @@ -4905,7 +5095,8 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { Dest->removePredecessor(BB); IBI->removeDestination(i); - --i; --e; + --i; + --e; Changed = true; } } @@ -4968,27 +5159,28 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); if (!LPad2 || !LPad2->isIdenticalTo(LPad)) continue; - for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + for (++I; isa<DbgInfoIntrinsic>(I); ++I) { + } BranchInst *BI2 = dyn_cast<BranchInst>(I); if (!BI2 || !BI2->isIdenticalTo(BI)) continue; - // We've found an identical block. Update our predeccessors to take that + // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. SmallSet<BasicBlock *, 16> Preds; Preds.insert(pred_begin(BB), pred_end(BB)); for (BasicBlock *Pred : Preds) { InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); - assert(II->getNormalDest() != BB && - II->getUnwindDest() == BB && "unexpected successor"); + assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && + "unexpected successor"); II->setUnwindDest(OtherPred); } // The debug info in OtherPred doesn't cover the merged control flow that // used to go through BB. We need to delete it or update it. - for (auto I = OtherPred->begin(), E = OtherPred->end(); - I != E;) { - Instruction &Inst = *I; I++; + for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { + Instruction &Inst = *I; + I++; if (isa<DbgInfoIntrinsic>(Inst)) Inst.eraseFromParent(); } @@ -5007,15 +5199,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, return false; } -bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, + IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. + // if LoopHeader is provided, check if the block is a loop header + // (This is for early invocations before loop simplify and vectorization + // to keep canonical loop forms for nested loops. + // These blocks can be eliminated when the pass is invoked later + // in the back-end.) BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + (!LoopHeaders || !LoopHeaders->count(BB)) && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; @@ -5034,9 +5233,9 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // See if we can merge an empty landing pad block with another which is // equivalent. if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { - for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} - if (I->isTerminator() && - TryToMergeLandingPad(LPad, BI, BB)) + for (++I; isa<DbgInfoIntrinsic>(I); ++I) { + } + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) return true; } @@ -5081,7 +5280,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - } else if (&*I == cast<Instruction>(BI->getCondition())){ + } else if (&*I == cast<Instruction>(BI->getCondition())) { ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) @@ -5095,6 +5294,30 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SimplifyBranchOnICmpChain(BI, Builder, DL)) return true; + // If this basic block has a single dominating predecessor block and the + // dominating block's condition implies BI's condition, we know the direction + // of the BI branch. + if (BasicBlock *Dom = BB->getSinglePredecessor()) { + auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator()); + if (PBI && PBI->isConditional() && + PBI->getSuccessor(0) != PBI->getSuccessor(1) && + (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + bool CondIsFalse = PBI->getSuccessor(1) == BB; + Optional<bool> Implication = isImpliedCondition( + PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); + if (Implication) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + ConstantInt *CI = *Implication + ? ConstantInt::getTrue(BB->getContext()) + : ConstantInt::getFalse(BB->getContext()); + BI->setCondition(CI); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + } + } + } + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. @@ -5149,7 +5372,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PBI != BI && PBI->isConditional()) if (mergeConditionalStores(PBI, BI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - + return false; } @@ -5162,7 +5385,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { if (I->use_empty()) return false; - if (C->isNullValue()) { + if (C->isNullValue() || isa<UndefValue>(C)) { // Only look at the first use, avoid hurting compile time with long uselists User *Use = *I->user_begin(); @@ -5189,7 +5412,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Store to null is undefined. if (StoreInst *SI = dyn_cast<StoreInst>(Use)) if (!SI->isVolatile()) - return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + return SI->getPointerAddressSpace() == 0 && + SI->getPointerOperand() == I; + + // A call to null is undefined. + if (auto CS = CallSite(Use)) + return CS.getCalledValue() == I; } return false; } @@ -5210,8 +5438,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { if (BI->isUnconditional()) Builder.CreateUnreachable(); else - Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : - BI->getSuccessor(0)); + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) + : BI->getSuccessor(0)); BI->eraseFromParent(); return true; } @@ -5229,8 +5457,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Remove basic blocks that have no predecessors (except the entry block)... // or that just have themself as a predecessor. These are unreachable. - if ((pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) || + if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); @@ -5265,25 +5492,33 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { if (BI->isUnconditional()) { - if (SimplifyUncondBranch(BI, Builder)) return true; + if (SimplifyUncondBranch(BI, Builder)) + return true; } else { - if (SimplifyCondBranch(BI, Builder)) return true; + if (SimplifyCondBranch(BI, Builder)) + return true; } } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (SimplifyReturn(RI, Builder)) return true; + if (SimplifyReturn(RI, Builder)) + return true; } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { - if (SimplifyResume(RI, Builder)) return true; + if (SimplifyResume(RI, Builder)) + return true; } else if (CleanupReturnInst *RI = - dyn_cast<CleanupReturnInst>(BB->getTerminator())) { - if (SimplifyCleanupReturn(RI)) return true; + dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (SimplifyCleanupReturn(RI)) + return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (SimplifySwitch(SI, Builder)) return true; + if (SimplifySwitch(SI, Builder)) + return true; } else if (UnreachableInst *UI = - dyn_cast<UnreachableInst>(BB->getTerminator())) { - if (SimplifyUnreachable(UI)) return true; + dyn_cast<UnreachableInst>(BB->getTerminator())) { + if (SimplifyUnreachable(UI)) + return true; } else if (IndirectBrInst *IBI = - dyn_cast<IndirectBrInst>(BB->getTerminator())) { - if (SimplifyIndirectBr(IBI)) return true; + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + if (SimplifyIndirectBr(IBI)) + return true; } return Changed; @@ -5295,7 +5530,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, AssumptionCache *AC) { + unsigned BonusInstThreshold, AssumptionCache *AC, + SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC).run(BB); + BonusInstThreshold, AC, LoopHeaders) + .run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ddd8775a8431d..6b1d3dc413305 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -71,14 +70,12 @@ namespace { bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); - - Instruction *splitOverflowIntrinsic(Instruction *IVUser, - const DominatorTree *DT); }; } @@ -183,9 +180,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { DeadInsts.emplace_back(ICmp); DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (isa<PHINode>(IVOperand) && - SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop, - InvariantPredicate, InvariantLHS, - InvariantRHS)) { + SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, + InvariantLHS, InvariantRHS)) { // Rewrite the comparison to a loop invariant comparison if it can be done // cheaply, where cheaply means "we don't need to emit any new @@ -201,9 +197,48 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { NewRHS = ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx)); - for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) { - if (NewLHS && NewRHS) - break; + auto *PN = cast<PHINode>(IVOperand); + for (unsigned i = 0, e = PN->getNumIncomingValues(); + i != e && (!NewLHS || !NewRHS); + ++i) { + + // If this is a value incoming from the backedge, then it cannot be a loop + // invariant value (since we know that IVOperand is an induction variable). + if (L->contains(PN->getIncomingBlock(i))) + continue; + + // NB! This following assert does not fundamentally have to be true, but + // it is true today given how SCEV analyzes induction variables. + // Specifically, today SCEV will *not* recognize %iv as an induction + // variable in the following case: + // + // define void @f(i32 %k) { + // entry: + // br i1 undef, label %r, label %l + // + // l: + // %k.inc.l = add i32 %k, 1 + // br label %loop + // + // r: + // %k.inc.r = add i32 %k, 1 + // br label %loop + // + // loop: + // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ] + // %iv.inc = add i32 %iv, 1 + // br label %loop + // } + // + // but if it starts to, at some point, then the assertion below will have + // to be changed to a runtime check. + + Value *Incoming = PN->getIncomingValue(i); + +#ifndef NDEBUG + if (auto *I = dyn_cast<Instruction>(Incoming)) + assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!"); +#endif const SCEV *IncomingS = SE->getSCEV(Incoming); @@ -280,6 +315,108 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, DeadInsts.emplace_back(Rem); } +bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { + auto *F = CI->getCalledFunction(); + if (!F) + return false; + + typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( + const SCEV *, const SCEV *, SCEV::NoWrapFlags); + typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( + const SCEV *, Type *); + + OperationFunctionTy Operation; + ExtensionFunctionTy Extension; + + Instruction::BinaryOps RawOp; + + // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we + // have nuw. + bool NoSignedOverflow; + + switch (F->getIntrinsicID()) { + default: + return false; + + case Intrinsic::sadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = true; + break; + + case Intrinsic::uadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = false; + break; + + case Intrinsic::ssub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = true; + break; + + case Intrinsic::usub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = false; + break; + } + + const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0)); + const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1)); + + auto *NarrowTy = cast<IntegerType>(LHS->getType()); + auto *WideTy = + IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); + + const SCEV *A = + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + const SCEV *B = + (SE->*Operation)((SE->*Extension)(LHS, WideTy), + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + + if (A != B) + return false; + + // Proved no overflow, nuke the overflow check and, if possible, the overflow + // intrinsic as well. + + BinaryOperator *NewResult = BinaryOperator::Create( + RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI); + + if (NoSignedOverflow) + NewResult->setHasNoSignedWrap(true); + else + NewResult->setHasNoUnsignedWrap(true); + + SmallVector<ExtractValueInst *, 4> ToDelete; + + for (auto *U : CI->users()) { + if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + if (EVI->getIndices()[0] == 1) + EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext())); + else { + assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); + EVI->replaceAllUsesWith(NewResult); + } + ToDelete.push_back(EVI); + } + } + + for (auto *EVI : ToDelete) + EVI->eraseFromParent(); + + if (CI->use_empty()) + CI->eraseFromParent(); + + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -297,6 +434,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, } } + if (auto *CI = dyn_cast<CallInst>(UseInst)) + if (eliminateOverflowIntrinsic(CI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; @@ -408,69 +549,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return Changed; } -/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow -/// analysis and optimization. -/// -/// \return A new value representing the non-overflowing add if possible, -/// otherwise return the original value. -Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, - const DominatorTree *DT) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser); - if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) - return IVUser; - - // Find a branch guarded by the overflow check. - BranchInst *Branch = nullptr; - Instruction *AddVal = nullptr; - for (User *U : II->users()) { - if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) { - if (ExtractInst->getNumIndices() != 1) - continue; - if (ExtractInst->getIndices()[0] == 0) - AddVal = ExtractInst; - else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) - Branch = dyn_cast<BranchInst>(ExtractInst->user_back()); - } - } - if (!AddVal || !Branch) - return IVUser; - - BasicBlock *ContinueBB = Branch->getSuccessor(1); - if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) - return IVUser; - - // Check if all users of the add are provably NSW. - bool AllNSW = true; - for (Use &U : AddVal->uses()) { - if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) { - BasicBlock *UseBB = UseInst->getParent(); - if (PHINode *PHI = dyn_cast<PHINode>(UseInst)) - UseBB = PHI->getIncomingBlock(U); - if (!DT->dominates(ContinueBB, UseBB)) { - AllNSW = false; - break; - } - } - } - if (!AllNSW) - return IVUser; - - // Go for it... - IRBuilder<> Builder(IVUser); - Instruction *AddInst = dyn_cast<Instruction>( - Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); - - // The caller expects the new add to have the same form as the intrinsic. The - // IV operand position must be the same. - assert((AddInst->getOpcode() == Instruction::Add && - AddInst->getOperand(0) == II->getOperand(0)) && - "Bad add instruction created from overflow intrinsic."); - - AddVal->replaceAllUsesWith(AddInst); - DeadInsts.emplace_back(AddVal); - return AddInst; -} - /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, @@ -545,12 +623,6 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; - if (V && V->shouldSplitOverflowInstrinsics()) { - UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree()); - if (!UseInst) - continue; - } - Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { assert(N <= Simplified.size() && "runaway iteration"); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index d5377f9a4c1ff..df299067094f6 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -14,7 +14,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SimplifyInstructions.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -27,12 +27,60 @@ #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; #define DEBUG_TYPE "instsimplify" STATISTIC(NumSimplified, "Number of redundant instructions removed"); +static bool runImpl(Function &F, const DominatorTree *DT, const TargetLibraryInfo *TLI, + AssumptionCache *AC) { + const DataLayout &DL = F.getParent()->getDataLayout(); + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + bool Changed = false; + + do { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) + // Here be subtlety: the iterator must be incremented before the loop + // body (not sure why), so a range-for loop won't work here. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = &*BI++; + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + // Don't waste time simplifying unused instructions. + if (!I->use_empty()) + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + // Mark all uses for resimplification next time round the loop. + for (User *U : I->users()) + Next->insert(cast<Instruction>(U)); + I->replaceAllUsesWith(V); + ++NumSimplified; + Changed = true; + } + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + Changed |= res; + } + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + } while (!ToSimplify->empty()); + + return Changed; +} + namespace { struct InstSimplifier : public FunctionPass { static char ID; // Pass identification, replacement for typeid @@ -48,56 +96,17 @@ namespace { /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; - bool Changed = false; - - do { - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) - // Here be subtlety: the iterator must be incremented before the loop - // body (not sure why), so a range-for loop won't work here. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = &*BI++; - // The first time through the loop ToSimplify is empty and we try to - // simplify all instructions. On later iterations ToSimplify is not - // empty and we only bother simplifying instructions that are in it. - if (!ToSimplify->empty() && !ToSimplify->count(I)) - continue; - // Don't waste time simplifying unused instructions. - if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { - // Mark all uses for resimplification next time round the loop. - for (User *U : I->users()) - Next->insert(cast<Instruction>(U)); - I->replaceAllUsesWith(V); - ++NumSimplified; - Changed = true; - } - bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); - if (res) { - // RecursivelyDeleteTriviallyDeadInstruction can remove - // more than one instruction, so simply incrementing the - // iterator does not work. When instructions get deleted - // re-iterate instead. - BI = BB->begin(); BE = BB->end(); - Changed |= res; - } - } - - // Place the list of instructions to simplify on the next loop iteration - // into ToSimplify. - std::swap(ToSimplify, Next); - Next->clear(); - } while (!ToSimplify->empty()); - - return Changed; + return runImpl(F, DT, TLI, AC); } }; } @@ -115,3 +124,15 @@ char &llvm::InstructionSimplifierID = InstSimplifier::ID; FunctionPass *llvm::createInstructionSimplifierPass() { return new InstSimplifier(); } + +PreservedAnalyses InstSimplifierPass::run(Function &F, + AnalysisManager<Function> &AM) { + auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + bool Changed = runImpl(F, DT, &TLI, &AC); + if (!Changed) + return PreservedAnalyses::all(); + // FIXME: This should also 'preserve the CFG'. + return PreservedAnalyses::none(); +} diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2f3c31128cf03..c2986951e48fd 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -104,101 +103,11 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } -/// \brief Check whether we can use unsafe floating point math for -/// the function passed as input. -static bool canUseUnsafeFPMath(Function *F) { - - // FIXME: For finer-grain optimization, we need intrinsics to have the same - // fast-math flag decorations that are applied to FP instructions. For now, - // we have to rely on the function-level unsafe-fp-math attribute to do this - // optimization because there's no other way to express that the call can be - // relaxed. - if (F->hasFnAttribute("unsafe-fp-math")) { - Attribute Attr = F->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() == "true") - return true; - } - return false; -} - -/// \brief Returns whether \p F matches the signature expected for the -/// string/memory copying library function \p Func. -/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. -/// Their fortified (_chk) counterparts are also accepted. -static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) { - const DataLayout &DL = F->getParent()->getDataLayout(); - FunctionType *FT = F->getFunctionType(); - LLVMContext &Context = F->getContext(); - Type *PCharTy = Type::getInt8PtrTy(Context); - Type *SizeTTy = DL.getIntPtrType(Context); - unsigned NumParams = FT->getNumParams(); - - // All string libfuncs return the same type as the first parameter. - if (FT->getReturnType() != FT->getParamType(0)) - return false; - - switch (Func) { - default: - llvm_unreachable("Can't check signature for non-string-copy libfunc."); - case LibFunc::stpncpy_chk: - case LibFunc::strncpy_chk: - --NumParams; // fallthrough - case LibFunc::stpncpy: - case LibFunc::strncpy: { - if (NumParams != 3 || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PCharTy || !FT->getParamType(2)->isIntegerTy()) - return false; - break; - } - case LibFunc::strcpy_chk: - case LibFunc::stpcpy_chk: - --NumParams; // fallthrough - case LibFunc::stpcpy: - case LibFunc::strcpy: { - if (NumParams != 2 || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != PCharTy) - return false; - break; - } - case LibFunc::memmove_chk: - case LibFunc::memcpy_chk: - --NumParams; // fallthrough - case LibFunc::memmove: - case LibFunc::memcpy: { - if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != SizeTTy) - return false; - break; - } - case LibFunc::memset_chk: - --NumParams; // fallthrough - case LibFunc::memset: { - if (NumParams != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != SizeTTy) - return false; - break; - } - } - // If this is a fortified libcall, the last parameter is a size_t. - if (NumParams == FT->getNumParams() - 1) - return FT->getParamType(FT->getNumParams() - 1) == SizeTTy; - return true; -} - //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Verify the "strcat" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2|| - FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - FT->getParamType(1) != FT->getReturnType()) - return nullptr; - // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -220,7 +129,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, DL, TLI); + Value *DstLen = emitStrLen(Dst, B, DL, TLI); if (!DstLen) return nullptr; @@ -238,15 +147,6 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, } Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Verify the "strncat" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - FT->getParamType(1) != FT->getReturnType() || - !FT->getParamType(2)->isIntegerTy()) - return nullptr; - // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -281,13 +181,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Verify the "strchr" function prototype. FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - !FT->getParamType(1)->isIntegerTy(32)) - return nullptr; - Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length @@ -298,7 +192,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; - return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B, DL, TLI); } @@ -308,7 +202,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), + return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -326,14 +220,6 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Verify the "strrchr" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || - FT->getParamType(0) != FT->getReturnType() || - !FT->getParamType(1)->isIntegerTy(32)) - return nullptr; - Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); @@ -345,7 +231,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TLI); + return emitStrChr(SrcStr, '\0', B, TLI); return nullptr; } @@ -361,14 +247,6 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Verify the "strcmp" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) - return nullptr; - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -392,7 +270,7 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { - return EmitMemCmp(Str1P, Str2P, + return emitMemCmp(Str1P, Str2P, ConstantInt::get(DL.getIntPtrType(CI->getContext()), std::min(Len1, Len2)), B, DL, TLI); @@ -402,15 +280,6 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Verify the "strncmp" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getParamType(2)->isIntegerTy()) - return nullptr; - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -426,7 +295,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), 0); if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); + return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -450,11 +319,6 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy)) - return nullptr; - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; @@ -473,12 +337,9 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) - return nullptr; - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = EmitStrLen(Src, B, DL, TLI); + Value *StrLen = emitStrLen(Src, B, DL, TLI); return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } @@ -500,9 +361,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) - return nullptr; - Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); Value *LenOp = CI->getArgOperand(2); @@ -540,18 +398,63 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len - 1); + // If s is a constant pointer pointing to a string literal, we can fold + // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // [0, strlen(s)] or the string has a single null terminator '\0' at the end. + // We only try to simplify strlen when the pointer s points to an array + // of i8. Otherwise, we would need to scale the offset x before doing the + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is + // very uncommon. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { + if (!isGEPBasedOnPointerToString(GEP)) + return nullptr; + + StringRef Str; + if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { + size_t NullTermIdx = Str.find('\0'); + + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == StringRef::npos) + return nullptr; + + Value *Offset = GEP->getOperand(2); + unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, + nullptr); + KnownZero.flipAllBits(); + size_t ArrSize = + cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); + + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent + // bits unknown in Offset. Therefore, Offset is known to be in range + // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // unsigned-less-than NullTermIdx. + // + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) + // is a pointer to an object whose memory extent is NullTermIdx+1. + if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && + NullTermIdx == ArrSize - 1)) + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + Offset); + } + + return nullptr; + } + // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { uint64_t LenTrue = GetStringLength(SI->getTrueValue()); @@ -576,13 +479,6 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - FT->getReturnType() != FT->getParamType(0)) - return nullptr; - StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); @@ -604,19 +500,12 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { // strpbrk(s, "a") -> strchr(s, 'a') if (HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI); + return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI); return nullptr; } Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy()) - return nullptr; - Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. @@ -628,13 +517,6 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); @@ -656,13 +538,6 @@ Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); @@ -681,29 +556,22 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { // strcspn(s, "") -> strlen(s) if (HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); + return emitStrLen(CI->getArgOperand(0), B, DL, TLI); return nullptr; } Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isPointerTy()) - return nullptr; - // fold strstr(x, x) -> x. if (CI->getArgOperand(0) == CI->getArgOperand(1)) return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); + Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) return nullptr; - Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), StrLen, B, DL, TLI); if (!StrNCmp) return nullptr; @@ -734,28 +602,20 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getArgOperand(0), B); + Value *Result = castToCStr(CI->getArgOperand(0), B); Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); + Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } return nullptr; } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy(32) || - !FT->getParamType(2)->isIntegerTy() || - !FT->getReturnType()->isPointerTy()) - return nullptr; - Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); @@ -834,13 +694,6 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy(32)) - return nullptr; - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); if (LHS == RHS) // memcmp(s,s,x) -> 0 @@ -857,9 +710,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"), CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"), CI->getType(), "rhsv"); return B.CreateSub(LHSV, RHSV, "chardiff"); } @@ -909,11 +762,6 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy)) - return nullptr; - // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); @@ -921,23 +769,81 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove)) - return nullptr; - // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } -Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); +// TODO: Does this belong in BuildLibCalls or should all of those similar +// functions be moved here? +static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs, + IRBuilder<> &B, const TargetLibraryInfo &TLI) { + LibFunc::Func Func; + if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + const DataLayout &DL = M->getDataLayout(); + IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); + Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), + PtrType, PtrType, nullptr); + CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc"); + + if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset)) +/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). +static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, + const TargetLibraryInfo &TLI) { + // This has to be a memset of zeros (bzero). + auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1)); + if (!FillValue || FillValue->getZExtValue() != 0) return nullptr; + // TODO: We should handle the case where the malloc has more than one use. + // This is necessary to optimize common patterns such as when the result of + // the malloc is checked against null or when a memset intrinsic is used in + // place of a memset library call. + auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0)); + if (!Malloc || !Malloc->hasOneUse()) + return nullptr; + + // Is the inner call really malloc()? + Function *InnerCallee = Malloc->getCalledFunction(); + LibFunc::Func Func; + if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || + Func != LibFunc::malloc) + return nullptr; + + // The memset must cover the same number of bytes that are malloc'd. + if (Memset->getArgOperand(2) != Malloc->getArgOperand(0)) + return nullptr; + + // Replace the malloc with a calloc. We need the data layout to know what the + // actual size of a 'size_t' parameter is. + B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); + const DataLayout &DL = Malloc->getModule()->getDataLayout(); + IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); + Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), + Malloc->getArgOperand(0), Malloc->getAttributes(), + B, TLI); + if (!Calloc) + return nullptr; + + Malloc->replaceAllUsesWith(Calloc); + Malloc->eraseFromParent(); + + return Calloc; +} + +Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { + if (auto *Calloc = foldMallocMemset(CI, B, *TLI)) + return Calloc; + // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); @@ -970,34 +876,12 @@ static Value *valueHasFloatPrecision(Value *Val) { return nullptr; } -/// Any floating-point library function that we're trying to simplify will have -/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). -/// CheckDoubleTy indicates that 'fptype' must be 'double'. -static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, - bool CheckDoubleTy) { - FunctionType *FT = F->getFunctionType(); - if (FT->getNumParams() != NumParams) - return false; - - // The return type must match what we're looking for. - Type *RetTy = FT->getReturnType(); - if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) - return false; - - // Each parameter must match the return type, and therefore, match every other - // parameter too. - for (const Type *ParamTy : FT->params()) - if (ParamTy != RetTy) - return false; - - return true; -} - /// Shrink double -> float for unary functions like 'floor'. static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType) { Function *Callee = CI->getCalledFunction(); - if (!matchesFPLibFunctionSignature(Callee, 1, true)) + // We know this libcall has a valid prototype, but we don't know which. + if (!CI->getType()->isDoubleTy()) return nullptr; if (CheckRetType) { @@ -1026,7 +910,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, V = B.CreateCall(F, V); } else { // The call is a library call rather than an intrinsic. - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); } return B.CreateFPExt(V, B.getDoubleTy()); @@ -1035,7 +919,8 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, /// Shrink double -> float for binary functions like 'fmin/fmax'. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!matchesFPLibFunctionSignature(Callee, 2, true)) + // We know this libcall has a valid prototype, but we don't know which. + if (!CI->getType()->isDoubleTy()) return nullptr; // If this is something like 'fmin((double)floatval1, (double)floatval2)', @@ -1054,7 +939,7 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). - Value *V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, + Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B, Callee->getAttributes()); return B.CreateFPExt(V, B.getDoubleTy()); } @@ -1066,13 +951,6 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); if (BinaryOperator::isFNeg(Op1)) { @@ -1114,14 +992,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { // pow(1.0, x) -> 1.0 @@ -1131,19 +1001,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op1C->isExactlyValue(2.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, LibFunc::exp2l)) - return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B, + return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B, Callee->getAttributes()); // pow(10.0, x) -> exp10(x) if (Op1C->isExactlyValue(10.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, LibFunc::exp10l)) - return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, + return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, Callee->getAttributes()); } - // FIXME: Use instruction-level FMF. - bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); - // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) // We enable these only with fast-math. Besides rounding differences, the @@ -1159,7 +1026,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); - return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, + return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, OpCCallee->getAttributes()); } } @@ -1181,7 +1048,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (CI->hasUnsafeAlgebra()) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); - return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); } @@ -1191,9 +1058,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // TODO: In finite-only mode, this could be just fabs(sqrt(x)). Value *Inf = ConstantFP::getInfinity(CI->getType()); Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); - Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); + Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); Value *FAbs = - EmitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); + emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); return Sel; @@ -1207,7 +1074,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (UnsafeFPMath) { + if (CI->hasUnsafeAlgebra()) { APFloat V = abs(Op2C->getValueAPF()); // We limit to a max of 7 fmul(s). Thus max exponent is 32. // This transformation applies to integer exponents only. @@ -1224,6 +1091,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // So we first convert V to something which could be converted to double. bool ignored; V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + + // TODO: Should the new instructions propagate the 'fast' flag of the pow()? Value *FMul = getPow(InnerChain, V.convertToDouble(), B); // For negative exponents simply compute the reciprocal. if (Op2C->isNegative()) @@ -1236,19 +1105,11 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Function *Caller = CI->getParent()->getParent(); Value *Ret = nullptr; StringRef Name = Callee->getName(); if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 @@ -1273,11 +1134,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { if (!Op->getType()->isFloatTy()) One = ConstantExpr::getFPExtend(One, Op->getType()); - Module *M = Caller->getParent(); - Value *Callee = + Module *M = CI->getModule(); + Value *NewCallee = M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty(), nullptr); - CallInst *CI = B.CreateCall(Callee, {One, LdExpArg}); + CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1294,12 +1155,6 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { if (Name == "fabs" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, false); - FunctionType *FT = Callee->getFunctionType(); - // Make sure this has 1 argument of FP type which matches the result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - Value *Op = CI->getArgOperand(0); if (Instruction *I = dyn_cast<Instruction>(Op)) { // Fold fabs(x * x) -> x * x; any squared FP value must already be positive. @@ -1311,21 +1166,14 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double // function, do that first. - Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; - // Make sure this has 2 arguments of FP type which match the result type. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) - return nullptr; - IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; if (CI->hasUnsafeAlgebra()) { @@ -1360,13 +1208,6 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { StringRef Name = Callee->getName(); if (UnsafeFPShrink && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - FunctionType *FT = Callee->getFunctionType(); - - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; if (!CI->hasUnsafeAlgebra()) return Ret; @@ -1392,7 +1233,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow)) return B.CreateFMul(OpC->getArgOperand(1), - EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, + emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, Callee->getAttributes()), "mul"); // log(exp2(y)) -> y*log(2) @@ -1400,7 +1241,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { TLI->has(Func) && Func == LibFunc::exp2) return B.CreateFMul( OpC->getArgOperand(0), - EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), + emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), Callee->getName(), B, Callee->getAttributes()), "logmul"); return Ret; @@ -1408,21 +1249,11 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - // FIXME: Refactor - this check is repeated all over this file and even in the - // preceding call to shrink double -> float. - - // Make sure this has 1 argument of FP type, which matches the result type. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; - if (!CI->hasUnsafeAlgebra()) return Ret; @@ -1489,13 +1320,6 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { StringRef Name = Callee->getName(); if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - FunctionType *FT = Callee->getFunctionType(); - - // Just make sure this has 1 argument of FP type, which matches the - // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPointTy()) - return Ret; Value *Op1 = CI->getArgOperand(0); auto *OpC = dyn_cast<CallInst>(Op1); @@ -1519,13 +1343,65 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { return Ret; } -static bool isTrigLibCall(CallInst *CI); +static bool isTrigLibCall(CallInst *CI) { + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + // We already checked the prototype. + return CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); +} + static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, bool UseFloat, Value *&Sin, Value *&Cos, - Value *&SinCos); + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; -Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospif_stret"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, nullptr); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, nullptr); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } +} +Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { // Make sure the prototype is as expected, otherwise the rest of the // function is probably invalid and likely to abort. if (!isTrigLibCall(CI)) @@ -1541,9 +1417,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { // Look for all compatible sinpi, cospi and sincospi calls with the same // argument. If there are enough (in some sense) we can make the // substitution. + Function *F = CI->getFunction(); for (User *U : Arg->users()) - classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls, - SinCosCalls); + classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); // It's only worthwhile if both sinpi and cospi are actually used. if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) @@ -1559,35 +1435,23 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { return nullptr; } -static bool isTrigLibCall(CallInst *CI) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - - // We can only hope to do anything useful if we can ignore things like errno - // and floating-point exceptions. - bool AttributesSafe = - CI->hasFnAttr(Attribute::NoUnwind) && CI->hasFnAttr(Attribute::ReadNone); - - // Other than that we need float(float) or double(double) - return AttributesSafe && FT->getNumParams() == 1 && - FT->getReturnType() == FT->getParamType(0) && - (FT->getParamType(0)->isFloatTy() || - FT->getParamType(0)->isDoubleTy()); -} - -void -LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, - SmallVectorImpl<CallInst *> &SinCalls, - SmallVectorImpl<CallInst *> &CosCalls, - SmallVectorImpl<CallInst *> &SinCosCalls) { +void LibCallSimplifier::classifyArgUse( + Value *Val, Function *F, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { CallInst *CI = dyn_cast<CallInst>(Val); if (!CI) return; + // Don't consider calls in other functions. + if (CI->getFunction() != F) + return; + Function *Callee = CI->getCalledFunction(); LibFunc::Func Func; - if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) || + if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) || !isTrigLibCall(CI)) return; @@ -1614,69 +1478,12 @@ void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls, replaceAllUsesWith(C, Res); } -void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, - bool UseFloat, Value *&Sin, Value *&Cos, Value *&SinCos) { - Type *ArgTy = Arg->getType(); - Type *ResTy; - StringRef Name; - - Triple T(OrigCallee->getParent()->getTargetTriple()); - if (UseFloat) { - Name = "__sincospif_stret"; - - assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); - // x86_64 can't use {float, float} since that would be returned in both - // xmm0 and xmm1, which isn't what a real struct would do. - ResTy = T.getArch() == Triple::x86_64 - ? static_cast<Type *>(VectorType::get(ArgTy, 2)) - : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr)); - } else { - Name = "__sincospi_stret"; - ResTy = StructType::get(ArgTy, ArgTy, nullptr); - } - - Module *M = OrigCallee->getParent(); - Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), - ResTy, ArgTy, nullptr); - - if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { - // If the argument is an instruction, it must dominate all uses so put our - // sincos call there. - B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); - } else { - // Otherwise (e.g. for a constant) the beginning of the function is as - // good a place as any. - BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); - B.SetInsertPoint(&EntryBB, EntryBB.begin()); - } - - SinCos = B.CreateCall(Callee, Arg, "sincospi"); - - if (SinCos->getType()->isStructTy()) { - Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); - Cos = B.CreateExtractValue(SinCos, 1, "cospi"); - } else { - Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), - "sinpi"); - Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), - "cospi"); - } -} - //===----------------------------------------------------------------------===// // Integer Library Call Optimizations //===----------------------------------------------------------------------===// -static bool checkIntUnaryReturnAndParam(Function *Callee) { - FunctionType *FT = Callee->getFunctionType(); - return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0)->isIntegerTy(); -} - Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkIntUnaryReturnAndParam(Callee)) - return nullptr; Value *Op = CI->getArgOperand(0); // Constant fold. @@ -1700,13 +1507,6 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require integer(integer) where the types agree. - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - FT->getParamType(0) != FT->getReturnType()) - return nullptr; - // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getArgOperand(0); Value *Pos = @@ -1716,9 +1516,6 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { - if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) - return nullptr; - // isdigit(c) -> (c-'0') <u 10 Value *Op = CI->getArgOperand(0); Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); @@ -1727,9 +1524,6 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { - if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) - return nullptr; - // isascii(c) -> c <u 128 Value *Op = CI->getArgOperand(0); Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); @@ -1737,9 +1531,6 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { - if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) - return nullptr; - // toascii(c) -> c & 0x7f return B.CreateAnd(CI->getArgOperand(0), ConstantInt::get(CI->getType(), 0x7F)); @@ -1753,6 +1544,7 @@ static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg); Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, int StreamArg) { + Function *Callee = CI->getCalledFunction(); // Error reporting calls should be cold, mark them as such. // This applies even to non-builtin calls: it is only a hint and applies to // functions that the frontend might not understand as builtins. @@ -1761,8 +1553,6 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, // Improving Static Branch Prediction in a Compiler // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu // Proceedings of PACT'98, Oct. 1998, IEEE - Function *Callee = CI->getCalledFunction(); - if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI, StreamArg)) { CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); @@ -1808,12 +1598,18 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { if (!CI->use_empty()) return nullptr; - // printf("x") -> putchar('x'), even for '%'. - if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI); - if (CI->use_empty() || !Res) - return Res; - return B.CreateIntCast(Res, CI->getType(), true); + // printf("x") -> putchar('x'), even for "%" and "%%". + if (FormatStr.size() == 1 || FormatStr == "%%") + return emitPutChar(B.getInt32(FormatStr[0]), B, TLI); + + // printf("%s", "a") --> putchar('a') + if (FormatStr == "%s" && CI->getNumArgOperands() > 1) { + StringRef ChrStr; + if (!getConstantStringInfo(CI->getOperand(1), ChrStr)) + return nullptr; + if (ChrStr.size() != 1) + return nullptr; + return emitPutChar(B.getInt32(ChrStr[0]), B, TLI); } // printf("foo\n") --> puts("foo") @@ -1823,40 +1619,26 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, TLI); - return (CI->use_empty() || !NewCI) - ? NewCI - : ConstantInt::get(CI->getType(), FormatStr.size() + 1); + return emitPutS(GV, B, TLI); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI); - - if (CI->use_empty() || !Res) - return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } + CI->getArgOperand(1)->getType()->isIntegerTy()) + return emitPutChar(CI->getArgOperand(1), B, TLI); // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, TLI); - } + CI->getArgOperand(1)->getType()->isPointerTy()) + return emitPutS(CI->getArgOperand(1), B, TLI); return nullptr; } Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Require one fixed pointer argument and an integer/void result. FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return nullptr; - if (Value *V = optimizePrintFString(CI, B)) { return V; } @@ -1909,7 +1691,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + Value *Ptr = castToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); @@ -1922,7 +1704,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI); + Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) return nullptr; Value *IncLen = @@ -1937,13 +1719,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Require two fixed pointer arguments and an integer result. FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - if (Value *V = optimizeSPrintFString(CI, B)) { return V; } @@ -1982,7 +1758,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return nullptr; // We found a format specifier. - return EmitFWrite( + return emitFWrite( CI->getArgOperand(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), CI->getArgOperand(0), B, DL, TLI); @@ -1999,27 +1775,21 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; - return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); + return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } return nullptr; } Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Require two fixed paramters as pointers and integer result. FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - if (Value *V = optimizeFPrintFString(CI, B)) { return V; } @@ -2041,16 +1811,6 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { optimizeErrorReporting(CI, B, 3); - Function *Callee = CI->getCalledFunction(); - // Require a pointer, an integer, an integer, a pointer, returning integer. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - !FT->getParamType(2)->isIntegerTy() || - !FT->getParamType(3)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return nullptr; - // Get the element size and count. ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); @@ -2065,8 +1825,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // If this is writing one byte, turn it into fputc. // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI); + Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -2076,12 +1836,13 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { optimizeErrorReporting(CI, B, 1); - Function *Callee = CI->getCalledFunction(); + // Don't rewrite fputs to fwrite when optimising for size because fwrite + // requires more arguments and thus extra MOVs are required. + if (CI->getParent()->getParent()->optForSize()) + return nullptr; - // Require two pointers. Also, we can't optimize if return value is used. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) + // We can't optimize if return value is used. + if (!CI->use_empty()) return nullptr; // fputs(s,F) --> fwrite(s,1,strlen(s),F) @@ -2090,20 +1851,13 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { return nullptr; // Known to have no uses (see above). - return EmitFWrite( + return emitFWrite( CI->getArgOperand(0), ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), CI->getArgOperand(1), B, DL, TLI); } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - // Require one fixed pointer argument and an integer/void result. - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return nullptr; - // Check for a constant string. StringRef Str; if (!getConstantStringInfo(CI->getArgOperand(0), Str)) @@ -2111,7 +1865,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI); + Value *Res = emitPutChar(B.getInt32('\n'), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -2133,10 +1887,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &Builder) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); - // Check for string/memory library functions. - if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // Make sure we never change the calling convention. assert((ignoreCallingConv(Func) || CI->getCallingConv() == llvm::CallingConv::C) && @@ -2208,10 +1960,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; - // Command-line parameter overrides function attribute. + // Command-line parameter overrides instruction attribute. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; - else if (canUseUnsafeFPMath(Callee)) + else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra()) UnsafeFPShrink = true; // First, check for intrinsics. @@ -2229,6 +1981,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); + // TODO: Use foldMallocMemset() with memset intrinsic. default: return nullptr; } @@ -2253,7 +2006,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { } // Then check for known library functions. - if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // We never change the calling convention. if (!ignoreCallingConv(Func) && !isCallingConvC) return nullptr; @@ -2457,11 +2210,6 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) - return nullptr; - if (isFortifiedCallFoldable(CI, 3, 2, false)) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); @@ -2472,11 +2220,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) - return nullptr; - if (isFortifiedCallFoldable(CI, 3, 2, false)) { B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); @@ -2487,10 +2230,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) - return nullptr; + // TODO: Try foldMallocMemset() here. if (isFortifiedCallFoldable(CI, 3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); @@ -2506,16 +2246,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); const DataLayout &DL = CI->getModule()->getDataLayout(); - - if (!checkStringCopyLibFuncSignature(Callee, Func)) - return nullptr; - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), *ObjSize = CI->getArgOperand(2); // __stpcpy_chk(x,x,...) -> x+strlen(x) if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { - Value *StrLen = EmitStrLen(Src, B, DL, TLI); + Value *StrLen = emitStrLen(Src, B, DL, TLI); return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } @@ -2525,7 +2261,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFortifiedCallFoldable(CI, 2, 1, true)) - return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); + return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); if (OnlyLowerUnknownSize) return nullptr; @@ -2537,7 +2273,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, Type *SizeTTy = DL.getIntPtrType(CI->getContext()); Value *LenV = ConstantInt::get(SizeTTy, Len); - Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); // If the function was an __stpcpy_chk, and we were able to fold it into // a __memcpy_chk, we still need to return the correct end pointer. if (Ret && Func == LibFunc::stpcpy_chk) @@ -2550,11 +2286,8 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - - if (!checkStringCopyLibFuncSignature(Callee, Func)) - return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); return Ret; } @@ -2577,15 +2310,15 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); SmallVector<OperandBundleDef, 2> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; - // First, check that this is a known library functions. - if (!TLI->getLibFunc(FuncName, Func)) + // First, check that this is a known library functions and that the prototype + // is correct. + if (!TLI->getLibFunc(*Callee, Func)) return nullptr; // We never change the calling convention. diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index ad6b782caf8b5..e9a368f4faa4e 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -13,19 +13,184 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "split-module" + #include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include <queue> using namespace llvm; +namespace { +typedef EquivalenceClasses<const GlobalValue *> ClusterMapType; +typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType; +typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType; +} + +static void addNonConstUser(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const User *U) { + assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); + + if (const Instruction *I = dyn_cast<Instruction>(U)) { + const GlobalValue *F = I->getParent()->getParent(); + GVtoClusterMap.unionSets(GV, F); + } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) || + isa<GlobalVariable>(U)) { + GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U)); + } else { + llvm_unreachable("Underimplemented use case"); + } +} + +// Adds all GlobalValue users of V to the same cluster as GV. +static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, + const GlobalValue *GV, const Value *V) { + for (auto *U : V->users()) { + SmallVector<const User *, 4> Worklist; + Worklist.push_back(U); + while (!Worklist.empty()) { + const User *UU = Worklist.pop_back_val(); + // For each constant that is not a GV (a pure const) recurse. + if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { + Worklist.append(UU->user_begin(), UU->user_end()); + continue; + } + addNonConstUser(GVtoClusterMap, GV, UU); + } + } +} + +// Find partitions for module in the way that no locals need to be +// globalized. +// Try to balance pack those partitions into N files since this roughly equals +// thread balancing for the backend codegen step. +static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, + unsigned N) { + // At this point module should have the proper mix of globals and locals. + // As we attempt to partition this module, we must not change any + // locals to globals. + DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); + ClusterMapType GVtoClusterMap; + ComdatMembersType ComdatMembers; + + auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { + if (GV.isDeclaration()) + return; + + if (!GV.hasName()) + GV.setName("__llvmsplit_unnamed"); + + // Comdat groups must not be partitioned. For comdat groups that contain + // locals, record all their members here so we can keep them together. + // Comdat groups that only contain external globals are already handled by + // the MD5-based partitioning. + if (const Comdat *C = GV.getComdat()) { + auto &Member = ComdatMembers[C]; + if (Member) + GVtoClusterMap.unionSets(Member, &GV); + else + Member = &GV; + } + + // For aliases we should not separate them from their aliasees regardless + // of linkage. + if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) { + if (const GlobalObject *Base = GIS->getBaseObject()) + GVtoClusterMap.unionSets(&GV, Base); + } + + if (const Function *F = dyn_cast<Function>(&GV)) { + for (const BasicBlock &BB : *F) { + BlockAddress *BA = BlockAddress::lookup(&BB); + if (!BA || !BA->isConstantUsed()) + continue; + addAllGlobalValueUsers(GVtoClusterMap, F, BA); + } + } + + if (GV.hasLocalLinkage()) + addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); + }; + + std::for_each(M->begin(), M->end(), recordGVSet); + std::for_each(M->global_begin(), M->global_end(), recordGVSet); + std::for_each(M->alias_begin(), M->alias_end(), recordGVSet); + + // Assigned all GVs to merged clusters while balancing number of objects in + // each. + auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, + const std::pair<unsigned, unsigned> &b) { + if (a.second || b.second) + return a.second > b.second; + else + return a.first > b.first; + }; + + std::priority_queue<std::pair<unsigned, unsigned>, + std::vector<std::pair<unsigned, unsigned>>, + decltype(CompareClusters)> + BalancinQueue(CompareClusters); + // Pre-populate priority queue with N slot blanks. + for (unsigned i = 0; i < N; ++i) + BalancinQueue.push(std::make_pair(i, 0)); + + typedef std::pair<unsigned, ClusterMapType::iterator> SortType; + SmallVector<SortType, 64> Sets; + SmallPtrSet<const GlobalValue *, 32> Visited; + + // To guarantee determinism, we have to sort SCC according to size. + // When size is the same, use leader's name. + for (ClusterMapType::iterator I = GVtoClusterMap.begin(), + E = GVtoClusterMap.end(); I != E; ++I) + if (I->isLeader()) + Sets.push_back( + std::make_pair(std::distance(GVtoClusterMap.member_begin(I), + GVtoClusterMap.member_end()), I)); + + std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { + if (a.first == b.first) + return a.second->getData()->getName() > b.second->getData()->getName(); + else + return a.first > b.first; + }); + + for (auto &I : Sets) { + unsigned CurrentClusterID = BalancinQueue.top().first; + unsigned CurrentClusterSize = BalancinQueue.top().second; + BalancinQueue.pop(); + + DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first + << ") ----> " << I.second->getData()->getName() << "\n"); + + for (ClusterMapType::member_iterator MI = + GVtoClusterMap.findLeader(I.second); + MI != GVtoClusterMap.member_end(); ++MI) { + if (!Visited.insert(*MI).second) + continue; + DEBUG(dbgs() << "----> " << (*MI)->getName() + << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); + Visited.insert(*MI); + ClusterIDMap[*MI] = CurrentClusterID; + CurrentClusterSize++; + } + // Add this set size to the number of entries in this cluster. + BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); + } +} + static void externalize(GlobalValue *GV) { if (GV->hasLocalLinkage()) { GV->setLinkage(GlobalValue::ExternalLinkage); @@ -40,8 +205,8 @@ static void externalize(GlobalValue *GV) { // Returns whether GV should be in partition (0-based) I of N. static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { - if (auto GA = dyn_cast<GlobalAlias>(GV)) - if (const GlobalObject *Base = GA->getBaseObject()) + if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV)) + if (const GlobalObject *Base = GIS->getBaseObject()) GV = Base; StringRef Name; @@ -62,21 +227,34 @@ static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { void llvm::SplitModule( std::unique_ptr<Module> M, unsigned N, - std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) { - for (Function &F : *M) - externalize(&F); - for (GlobalVariable &GV : M->globals()) - externalize(&GV); - for (GlobalAlias &GA : M->aliases()) - externalize(&GA); + function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, + bool PreserveLocals) { + if (!PreserveLocals) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + for (GlobalIFunc &GIF : M->ifuncs()) + externalize(&GIF); + } + + // This performs splitting without a need for externalization, which might not + // always be possible. + ClusterIDMapType ClusterIDMap; + findPartitions(M.get(), ClusterIDMap, N); // FIXME: We should be able to reuse M as the last partition instead of // cloning it. - for (unsigned I = 0; I != N; ++I) { + for (unsigned I = 0; I < N; ++I) { ValueToValueMapTy VMap; std::unique_ptr<Module> MPart( - CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { - return isInPartition(GV, I, N); + CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { + if (ClusterIDMap.count(GV)) + return (ClusterIDMap[GV] == I); + else + return isInPartition(GV, I, N); })); if (I != 0) MPart->setModuleInlineAsm(""); diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 1d1f602b041dc..7523ca527b680 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -58,7 +58,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "symbol-rewriter" -#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 6b1d1dae5f01b..9385f825523cd 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -66,9 +66,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); - for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), - E = UnreachableBlocks.end(); I != E; ++I) { - BasicBlock *BB = *I; + for (BasicBlock *BB : UnreachableBlocks) { BB->getInstList().pop_back(); // Remove the unreachable inst. BranchInst::Create(UnreachableBlock, BB); } @@ -104,10 +102,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. // - for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), - E = ReturningBlocks.end(); I != E; ++I) { - BasicBlock *BB = *I; - + for (BasicBlock *BB : ReturningBlocks) { // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... if (PN) diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index ed4f45c6a615d..8f85f19efe38b 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -21,17 +21,20 @@ using namespace llvm; /// initializeTransformUtils - Initialize all passes in the TransformUtils /// library. void llvm::initializeTransformUtils(PassRegistry &Registry) { - initializeAddDiscriminatorsPass(Registry); + initializeAddDiscriminatorsLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); initializeInstNamerPass(Registry); - initializeLCSSAPass(Registry); + initializeLCSSAWrapperPassPass(Registry); initializeLoopSimplifyPass(Registry); initializeLowerInvokePass(Registry); initializeLowerSwitchPass(Registry); - initializePromotePassPass(Registry); + initializeNameAnonFunctionPass(Registry); + initializePromoteLegacyPassPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); initializeInstSimplifierPass(Registry); initializeMetaRenamerPass(Registry); + initializeMemorySSAWrapperPassPass(Registry); + initializeMemorySSAPrinterLegacyPassPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index f47ddb9f064f5..2eade8cbe8efd 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,9 +13,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" @@ -25,25 +29,326 @@ using namespace llvm; // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} void ValueMaterializer::anchor() {} -void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) { -} -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - ValueToValueMapTy::iterator I = VM.find(V); - +namespace { + +/// A basic block used in a BlockAddress whose function body is not yet +/// materialized. +struct DelayedBasicBlock { + BasicBlock *OldBB; + std::unique_ptr<BasicBlock> TempBB; + + // Explicit move for MSVC. + DelayedBasicBlock(DelayedBasicBlock &&X) + : OldBB(std::move(X.OldBB)), TempBB(std::move(X.TempBB)) {} + DelayedBasicBlock &operator=(DelayedBasicBlock &&X) { + OldBB = std::move(X.OldBB); + TempBB = std::move(X.TempBB); + return *this; + } + + DelayedBasicBlock(const BlockAddress &Old) + : OldBB(Old.getBasicBlock()), + TempBB(BasicBlock::Create(Old.getContext())) {} +}; + +struct WorklistEntry { + enum EntryKind { + MapGlobalInit, + MapAppendingVar, + MapGlobalAliasee, + RemapFunction + }; + struct GVInitTy { + GlobalVariable *GV; + Constant *Init; + }; + struct AppendingGVTy { + GlobalVariable *GV; + Constant *InitPrefix; + }; + struct GlobalAliaseeTy { + GlobalAlias *GA; + Constant *Aliasee; + }; + + unsigned Kind : 2; + unsigned MCID : 29; + unsigned AppendingGVIsOldCtorDtor : 1; + unsigned AppendingGVNumNewMembers; + union { + GVInitTy GVInit; + AppendingGVTy AppendingGV; + GlobalAliaseeTy GlobalAliasee; + Function *RemapF; + } Data; +}; + +struct MappingContext { + ValueToValueMapTy *VM; + ValueMaterializer *Materializer = nullptr; + + /// Construct a MappingContext with a value map and materializer. + explicit MappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) + : VM(&VM), Materializer(Materializer) {} +}; + +class MDNodeMapper; +class Mapper { + friend class MDNodeMapper; + +#ifndef NDEBUG + DenseSet<GlobalValue *> AlreadyScheduled; +#endif + + RemapFlags Flags; + ValueMapTypeRemapper *TypeMapper; + unsigned CurrentMCID = 0; + SmallVector<MappingContext, 2> MCs; + SmallVector<WorklistEntry, 4> Worklist; + SmallVector<DelayedBasicBlock, 1> DelayedBBs; + SmallVector<Constant *, 16> AppendingInits; + +public: + Mapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) + : Flags(Flags), TypeMapper(TypeMapper), + MCs(1, MappingContext(VM, Materializer)) {} + + /// ValueMapper should explicitly call \a flush() before destruction. + ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); } + + bool hasWorkToDo() const { return !Worklist.empty(); } + + unsigned + registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer = nullptr) { + MCs.push_back(MappingContext(VM, Materializer)); + return MCs.size() - 1; + } + + void addFlags(RemapFlags Flags); + + Value *mapValue(const Value *V); + void remapInstruction(Instruction *I); + void remapFunction(Function &F); + + Constant *mapConstant(const Constant *C) { + return cast_or_null<Constant>(mapValue(C)); + } + + /// Map metadata. + /// + /// Find the mapping for MD. Guarantees that the return will be resolved + /// (not an MDNode, or MDNode::isResolved() returns true). + Metadata *mapMetadata(const Metadata *MD); + + void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID); + void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID); + void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID); + void scheduleRemapFunction(Function &F, unsigned MCID); + + void flush(); + +private: + void mapGlobalInitializer(GlobalVariable &GV, Constant &Init); + void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers); + void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); + void remapFunction(Function &F, ValueToValueMapTy &VM); + + ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } + ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; } + + Value *mapBlockAddress(const BlockAddress &BA); + + /// Map metadata that doesn't require visiting operands. + Optional<Metadata *> mapSimpleMetadata(const Metadata *MD); + + Metadata *mapToMetadata(const Metadata *Key, Metadata *Val); + Metadata *mapToSelf(const Metadata *MD); +}; + +class MDNodeMapper { + Mapper &M; + + /// Data about a node in \a UniquedGraph. + struct Data { + bool HasChanged = false; + unsigned ID = ~0u; + TempMDNode Placeholder; + + Data() {} + Data(Data &&X) + : HasChanged(std::move(X.HasChanged)), ID(std::move(X.ID)), + Placeholder(std::move(X.Placeholder)) {} + Data &operator=(Data &&X) { + HasChanged = std::move(X.HasChanged); + ID = std::move(X.ID); + Placeholder = std::move(X.Placeholder); + return *this; + } + }; + + /// A graph of uniqued nodes. + struct UniquedGraph { + SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties. + SmallVector<MDNode *, 16> POT; // Post-order traversal. + + /// Propagate changed operands through the post-order traversal. + /// + /// Iteratively update \a Data::HasChanged for each node based on \a + /// Data::HasChanged of its operands, until fixed point. + void propagateChanges(); + + /// Get a forward reference to a node to use as an operand. + Metadata &getFwdReference(MDNode &Op); + }; + + /// Worklist of distinct nodes whose operands need to be remapped. + SmallVector<MDNode *, 16> DistinctWorklist; + + // Storage for a UniquedGraph. + SmallDenseMap<const Metadata *, Data, 32> InfoStorage; + SmallVector<MDNode *, 16> POTStorage; + +public: + MDNodeMapper(Mapper &M) : M(M) {} + + /// Map a metadata node (and its transitive operands). + /// + /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative + /// algorithm handles distinct nodes and uniqued node subgraphs using + /// different strategies. + /// + /// Distinct nodes are immediately mapped and added to \a DistinctWorklist + /// using \a mapDistinctNode(). Their mapping can always be computed + /// immediately without visiting operands, even if their operands change. + /// + /// The mapping for uniqued nodes depends on whether their operands change. + /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of + /// a node to calculate uniqued node mappings in bulk. Distinct leafs are + /// added to \a DistinctWorklist with \a mapDistinctNode(). + /// + /// After mapping \c N itself, this function remaps the operands of the + /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c + /// N has been mapped. + Metadata *map(const MDNode &N); + +private: + /// Map a top-level uniqued node and the uniqued subgraph underneath it. + /// + /// This builds up a post-order traversal of the (unmapped) uniqued subgraph + /// underneath \c FirstN and calculates the nodes' mapping. Each node uses + /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its + /// operands uses the identity mapping. + /// + /// The algorithm works as follows: + /// + /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and + /// save the post-order traversal in the given \a UniquedGraph, tracking + /// nodes' operands change. + /// + /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands + /// through the \a UniquedGraph until fixed point, following the rule + /// that if a node changes, any node that references must also change. + /// + /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes + /// (referencing new operands) where necessary. + Metadata *mapTopLevelUniquedNode(const MDNode &FirstN); + + /// Try to map the operand of an \a MDNode. + /// + /// If \c Op is already mapped, return the mapping. If it's not an \a + /// MDNode, compute and return the mapping. If it's a distinct \a MDNode, + /// return the result of \a mapDistinctNode(). + /// + /// \return None if \c Op is an unmapped uniqued \a MDNode. + /// \post getMappedOp(Op) only returns None if this returns None. + Optional<Metadata *> tryToMapOperand(const Metadata *Op); + + /// Map a distinct node. + /// + /// Return the mapping for the distinct node \c N, saving the result in \a + /// DistinctWorklist for later remapping. + /// + /// \pre \c N is not yet mapped. + /// \pre \c N.isDistinct(). + MDNode *mapDistinctNode(const MDNode &N); + + /// Get a previously mapped node. + Optional<Metadata *> getMappedOp(const Metadata *Op) const; + + /// Create a post-order traversal of an unmapped uniqued node subgraph. + /// + /// This traverses the metadata graph deeply enough to map \c FirstN. It + /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any + /// metadata that has already been mapped will not be part of the POT. + /// + /// Each node that has a changed operand from outside the graph (e.g., a + /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata) + /// is marked with \a Data::HasChanged. + /// + /// \return \c true if any nodes in \c G have \a Data::HasChanged. + /// \post \c G.POT is a post-order traversal ending with \c FirstN. + /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs + /// to change because of operands outside the graph. + bool createPOT(UniquedGraph &G, const MDNode &FirstN); + + /// Visit the operands of a uniqued node in the POT. + /// + /// Visit the operands in the range from \c I to \c E, returning the first + /// uniqued node we find that isn't yet in \c G. \c I is always advanced to + /// where to continue the loop through the operands. + /// + /// This sets \c HasChanged if any of the visited operands change. + MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged); + + /// Map all the nodes in the given uniqued graph. + /// + /// This visits all the nodes in \c G in post-order, using the identity + /// mapping or creating a new node depending on \a Data::HasChanged. + /// + /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of + /// their operands outside of \c G. + /// \pre \a Data::HasChanged is true for a node in \c G iff any of its + /// operands have changed. + /// \post \a getMappedOp() returns the mapped node for every node in \c G. + void mapNodesInPOT(UniquedGraph &G); + + /// Remap a node's operands using the given functor. + /// + /// Iterate through the operands of \c N and update them in place using \c + /// mapOperand. + /// + /// \pre N.isDistinct() or N.isTemporary(). + template <class OperandMapper> + void remapOperands(MDNode &N, OperandMapper mapOperand); +}; + +} // end namespace + +Value *Mapper::mapValue(const Value *V) { + ValueToValueMapTy::iterator I = getVM().find(V); + // If the value already exists in the map, use it. - if (I != VM.end() && I->second) return I->second; - + if (I != getVM().end()) { + assert(I->second && "Unexpected null mapping"); + return I->second; + } + // If we have a materializer and it can materialize a value, use that. - if (Materializer) { - if (Value *NewV = - Materializer->materializeDeclFor(const_cast<Value *>(V))) { - VM[V] = NewV; - if (auto *NewGV = dyn_cast<GlobalValue>(NewV)) - Materializer->materializeInitFor( - NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V))); + if (auto *Materializer = getMaterializer()) { + if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) { + getVM()[V] = NewV; return NewV; } } @@ -51,13 +356,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Global values do not need to be seeded into the VM if they // are using the identity mapping. if (isa<GlobalValue>(V)) { - if (Flags & RF_NullMapMissingGlobalValues) { - assert(!(Flags & RF_IgnoreMissingEntries) && - "Illegal to specify both RF_NullMapMissingGlobalValues and " - "RF_IgnoreMissingEntries"); + if (Flags & RF_NullMapMissingGlobalValues) return nullptr; - } - return VM[V] = const_cast<Value*>(V); + return getVM()[V] = const_cast<Value *>(V); } if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { @@ -70,28 +371,39 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), IA->hasSideEffects(), IA->isAlignStack()); } - - return VM[V] = const_cast<Value*>(V); + + return getVM()[V] = const_cast<Value *>(V); } if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) { const Metadata *MD = MDV->getMetadata(); + + if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) { + // Look through to grab the local value. + if (Value *LV = mapValue(LAM->getValue())) { + if (V == LAM->getValue()) + return const_cast<Value *>(V); + return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV)); + } + + // FIXME: always return nullptr once Verifier::verifyDominatesUse() + // ensures metadata operands only reference defined SSA values. + return (Flags & RF_IgnoreMissingLocals) + ? nullptr + : MetadataAsValue::get(V->getContext(), + MDTuple::get(V->getContext(), None)); + } + // If this is a module-level metadata and we know that nothing at the module // level is changing, then use an identity mapping. - if (!isa<LocalAsMetadata>(MD) && (Flags & RF_NoModuleLevelChanges)) - return VM[V] = const_cast<Value *>(V); - - auto *MappedMD = MapMetadata(MD, VM, Flags, TypeMapper, Materializer); - if (MD == MappedMD || (!MappedMD && (Flags & RF_IgnoreMissingEntries))) - return VM[V] = const_cast<Value *>(V); - - // FIXME: This assert crashes during bootstrap, but I think it should be - // correct. For now, just match behaviour from before the metadata/value - // split. - // - // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) && - // "Referenced metadata value not in value map"); - return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD); + if (Flags & RF_NoModuleLevelChanges) + return getVM()[V] = const_cast<Value *>(V); + + // Map the metadata and turn it into a value. + auto *MappedMD = mapMetadata(MD); + if (MD == MappedMD) + return getVM()[V] = const_cast<Value *>(V); + return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD); } // Okay, this either must be a constant (which may or may not be mappable) or @@ -99,25 +411,31 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); if (!C) return nullptr; - - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - Function *F = - cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer)); - BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags, TypeMapper, Materializer)); - return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); - } - + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return mapBlockAddress(*BA); + + auto mapValueOrNull = [this](Value *V) { + auto Mapped = mapValue(V); + assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) && + "Unexpected null mapping for constant operand without " + "NullMapMissingGlobalValues flag"); + return Mapped; + }; + // Otherwise, we have some other constant to remap. Start by checking to see // if all operands have an identity remapping. unsigned OpNo = 0, NumOperands = C->getNumOperands(); Value *Mapped = nullptr; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); - Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); - if (Mapped != C) break; + Mapped = mapValueOrNull(Op); + if (!Mapped) + return nullptr; + if (Mapped != Op) + break; } - + // See if the type mapper wants to remap the type as well. Type *NewTy = C->getType(); if (TypeMapper) @@ -126,23 +444,26 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // If the result type and all operands match up, then just insert an identity // mapping. if (OpNo == NumOperands && NewTy == C->getType()) - return VM[V] = C; - + return getVM()[V] = C; + // Okay, we need to create a new constant. We've already processed some or // all of the operands, set them all up now. SmallVector<Constant*, 8> Ops; Ops.reserve(NumOperands); for (unsigned j = 0; j != OpNo; ++j) Ops.push_back(cast<Constant>(C->getOperand(j))); - + // If one of the operands mismatch, push it and the other mapped operands. if (OpNo != NumOperands) { Ops.push_back(cast<Constant>(Mapped)); - + // Map the rest of the operands that aren't processed yet. - for (++OpNo; OpNo != NumOperands; ++OpNo) - Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, - Flags, TypeMapper, Materializer)); + for (++OpNo; OpNo != NumOperands; ++OpNo) { + Mapped = mapValueOrNull(C->getOperand(OpNo)); + if (!Mapped) + return nullptr; + Ops.push_back(cast<Constant>(Mapped)); + } } Type *NewSrcTy = nullptr; if (TypeMapper) @@ -150,309 +471,407 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); + return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); if (isa<ConstantArray>(C)) - return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); if (isa<ConstantStruct>(C)) - return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); if (isa<ConstantVector>(C)) - return VM[V] = ConstantVector::get(Ops); + return getVM()[V] = ConstantVector::get(Ops); // If this is a no-operand constant, it must be because the type was remapped. if (isa<UndefValue>(C)) - return VM[V] = UndefValue::get(NewTy); + return getVM()[V] = UndefValue::get(NewTy); if (isa<ConstantAggregateZero>(C)) - return VM[V] = ConstantAggregateZero::get(NewTy); + return getVM()[V] = ConstantAggregateZero::get(NewTy); assert(isa<ConstantPointerNull>(C)); - return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); -} - -static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key, - Metadata *Val, ValueMaterializer *Materializer, - RemapFlags Flags) { - VM.MD()[Key].reset(Val); - if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) { - auto *N = dyn_cast_or_null<MDNode>(Val); - // Need to invoke this once we have non-temporary MD. - if (!N || !N->isTemporary()) - Materializer->replaceTemporaryMetadata(Key, Val); + return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); +} + +Value *Mapper::mapBlockAddress(const BlockAddress &BA) { + Function *F = cast<Function>(mapValue(BA.getFunction())); + + // F may not have materialized its initializer. In that case, create a + // dummy basic block for now, and replace it once we've materialized all + // the initializers. + BasicBlock *BB; + if (F->empty()) { + DelayedBBs.push_back(DelayedBasicBlock(BA)); + BB = DelayedBBs.back().TempBB.get(); + } else { + BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock())); } - return Val; + + return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock()); } -static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD, - ValueMaterializer *Materializer, RemapFlags Flags) { - return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags); +Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) { + getVM().MD()[Key].reset(Val); + return Val; } -static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer); +Metadata *Mapper::mapToSelf(const Metadata *MD) { + return mapToMetadata(MD, const_cast<Metadata *>(MD)); +} -static Metadata *mapMetadataOp(Metadata *Op, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { +Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { if (!Op) return nullptr; - if (Materializer && !Materializer->isMetadataNeeded(Op)) + if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) { +#ifndef NDEBUG + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + assert((!*MappedOp || M.getVM().count(CMD->getValue()) || + M.getVM().getMappedMD(Op)) && + "Expected Value to be memoized"); + else + assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) && + "Expected result to be memoized"); +#endif + return *MappedOp; + } + + const MDNode &N = *cast<MDNode>(Op); + if (N.isDistinct()) + return mapDistinctNode(N); + return None; +} + +MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { + assert(N.isDistinct() && "Expected a distinct node"); + assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); + DistinctWorklist.push_back(cast<MDNode>( + (M.Flags & RF_MoveDistinctMDs) + ? M.mapToSelf(&N) + : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone())))); + return DistinctWorklist.back(); +} + +static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD, + Value *MappedV) { + if (CMD.getValue() == MappedV) + return const_cast<ConstantAsMetadata *>(&CMD); + return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr; +} + +Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const { + if (!Op) return nullptr; - if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags, - TypeMapper, Materializer)) - return MappedOp; - // Use identity map if MappedOp is null and we can ignore missing entries. - if (Flags & RF_IgnoreMissingEntries) + if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op)) + return *MappedOp; + + if (isa<MDString>(Op)) + return const_cast<Metadata *>(Op); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) + return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue())); + + return None; +} + +Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) { + auto Where = Info.find(&Op); + assert(Where != Info.end() && "Expected a valid reference"); + + auto &OpD = Where->second; + if (!OpD.HasChanged) return Op; - // FIXME: This assert crashes during bootstrap, but I think it should be - // correct. For now, just match behaviour from before the metadata/value - // split. - // - // assert((Flags & RF_NullMapMissingGlobalValues) && - // "Referenced metadata not in value map!"); - return nullptr; + // Lazily construct a temporary node. + if (!OpD.Placeholder) + OpD.Placeholder = Op.clone(); + + return *OpD.Placeholder; } -/// Resolve uniquing cycles involving the given metadata. -static void resolveCycles(Metadata *MD, bool AllowTemps) { - if (auto *N = dyn_cast_or_null<MDNode>(MD)) { - if (AllowTemps && N->isTemporary()) - return; - if (!N->isResolved()) { - if (AllowTemps) - // Note that this will drop RAUW support on any temporaries, which - // blocks uniquing. If this ends up being an issue, in the future - // we can experiment with delaying resolving these nodes until - // after metadata is fully materialized (i.e. when linking metadata - // as a postpass after function importing). - N->resolveNonTemporaries(); - else - N->resolveCycles(); - } +template <class OperandMapper> +void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { + assert(!N.isUniqued() && "Expected distinct or temporary nodes"); + for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) { + Metadata *Old = N.getOperand(I); + Metadata *New = mapOperand(Old); + + if (Old != New) + N.replaceOperandWith(I, New); } } -/// Remap the operands of an MDNode. -/// -/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is -/// distinct, uniquing cycles are resolved as they're found. -/// -/// \pre \c Node.isDistinct() or \c Node.isTemporary(). -static bool remapOperands(MDNode &Node, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(!Node.isUniqued() && "Expected temporary or distinct node"); - const bool IsDistinct = Node.isDistinct(); - - bool AnyChanged = false; - for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) { - Metadata *Old = Node.getOperand(I); - Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper, - Materializer); - if (Old != New) { - AnyChanged = true; - Node.replaceOperandWith(I, New); - - // Resolve uniquing cycles underneath distinct nodes on the fly so they - // don't infect later operands. - if (IsDistinct) - resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata); +namespace { +/// An entry in the worklist for the post-order traversal. +struct POTWorklistEntry { + MDNode *N; ///< Current node. + MDNode::op_iterator Op; ///< Current operand of \c N. + + /// Keep a flag of whether operands have changed in the worklist to avoid + /// hitting the map in \a UniquedGraph. + bool HasChanged = false; + + POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} +}; +} // end namespace + +bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { + assert(G.Info.empty() && "Expected a fresh traversal"); + assert(FirstN.isUniqued() && "Expected uniqued node in POT"); + + // Construct a post-order traversal of the uniqued subgraph under FirstN. + bool AnyChanges = false; + SmallVector<POTWorklistEntry, 16> Worklist; + Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN))); + (void)G.Info[&FirstN]; + while (!Worklist.empty()) { + // Start or continue the traversal through the this node's operands. + auto &WE = Worklist.back(); + if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) { + // Push a new node to traverse first. + Worklist.push_back(POTWorklistEntry(*N)); + continue; } + + // Push the node onto the POT. + assert(WE.N->isUniqued() && "Expected only uniqued nodes"); + assert(WE.Op == WE.N->op_end() && "Expected to visit all operands"); + auto &D = G.Info[WE.N]; + AnyChanges |= D.HasChanged = WE.HasChanged; + D.ID = G.POT.size(); + G.POT.push_back(WE.N); + + // Pop the node off the worklist. + Worklist.pop_back(); } + return AnyChanges; +} - return AnyChanged; -} - -/// Map a distinct MDNode. -/// -/// Whether distinct nodes change is independent of their operands. If \a -/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in -/// place; effectively, they're moved from one graph to another. Otherwise, -/// they're cloned/duplicated, and the new copy's operands are remapped. -static Metadata *mapDistinctNode(const MDNode *Node, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(Node->isDistinct() && "Expected distinct node"); - - MDNode *NewMD; - if (Flags & RF_MoveDistinctMDs) - NewMD = const_cast<MDNode *>(Node); - else - NewMD = MDNode::replaceWithDistinct(Node->clone()); - - // Remap operands later. - DistinctWorklist.push_back(NewMD); - return mapToMetadata(VM, Node, NewMD, Materializer, Flags); -} - -/// \brief Map a uniqued MDNode. -/// -/// Uniqued nodes may not need to be recreated (they may map to themselves). -static Metadata *mapUniquedNode(const MDNode *Node, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) && - "Expected uniqued node"); - - // Create a temporary node and map it upfront in case we have a uniquing - // cycle. If necessary, this mapping will get updated by RAUW logic before - // returning. - auto ClonedMD = Node->clone(); - mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags); - if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper, - Materializer)) { - // No operands changed, so use the original. - ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node)); - // Even though replaceAllUsesWith would have replaced the value map - // entry, we need to explictly map with the final non-temporary node - // to replace any temporary metadata via the callback. - return mapToSelf(VM, Node, Materializer, Flags); +MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I, + MDNode::op_iterator E, bool &HasChanged) { + while (I != E) { + Metadata *Op = *I++; // Increment even on early return. + if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) { + // Check if the operand changes. + HasChanged |= Op != *MappedOp; + continue; + } + + // A uniqued metadata node. + MDNode &OpN = *cast<MDNode>(Op); + assert(OpN.isUniqued() && + "Only uniqued operands cannot be mapped immediately"); + if (G.Info.insert(std::make_pair(&OpN, Data())).second) + return &OpN; // This is a new one. Return it. } + return nullptr; +} - // Uniquify the cloned node. Explicitly map it with the final non-temporary - // node so that replacement of temporary metadata via the callback occurs. - return mapToMetadata(VM, Node, - MDNode::replaceWithUniqued(std::move(ClonedMD)), - Materializer, Flags); +void MDNodeMapper::UniquedGraph::propagateChanges() { + bool AnyChanges; + do { + AnyChanges = false; + for (MDNode *N : POT) { + auto &D = Info[N]; + if (D.HasChanged) + continue; + + if (!llvm::any_of(N->operands(), [&](const Metadata *Op) { + auto Where = Info.find(Op); + return Where != Info.end() && Where->second.HasChanged; + })) + continue; + + AnyChanges = D.HasChanged = true; + } + } while (AnyChanges); } -static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<MDNode *> &DistinctWorklist, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - // If the value already exists in the map, use it. - if (Metadata *NewMD = VM.MD().lookup(MD).get()) - return NewMD; +void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { + // Construct uniqued nodes, building forward references as necessary. + SmallVector<MDNode *, 16> CyclicNodes; + for (auto *N : G.POT) { + auto &D = G.Info[N]; + if (!D.HasChanged) { + // The node hasn't changed. + M.mapToSelf(N); + continue; + } - if (isa<MDString>(MD)) - return mapToSelf(VM, MD, Materializer, Flags); - - if (isa<ConstantAsMetadata>(MD)) - if ((Flags & RF_NoModuleLevelChanges)) - return mapToSelf(VM, MD, Materializer, Flags); - - if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) { - Value *MappedV = - MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer); - if (VMD->getValue() == MappedV || - (!MappedV && (Flags & RF_IgnoreMissingEntries))) - return mapToSelf(VM, MD, Materializer, Flags); - - // FIXME: This assert crashes during bootstrap, but I think it should be - // correct. For now, just match behaviour from before the metadata/value - // split. - // - // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) && - // "Referenced metadata not in value map!"); - if (MappedV) - return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer, - Flags); - return nullptr; + // Remember whether this node had a placeholder. + bool HadPlaceholder(D.Placeholder); + + // Clone the uniqued node and remap the operands. + TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone(); + remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) { + if (Optional<Metadata *> MappedOp = getMappedOp(Old)) + return *MappedOp; + assert(G.Info[Old].ID > D.ID && "Expected a forward reference"); + return &G.getFwdReference(*cast<MDNode>(Old)); + }); + + auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN)); + M.mapToMetadata(N, NewN); + + // Nodes that were referenced out of order in the POT are involved in a + // uniquing cycle. + if (HadPlaceholder) + CyclicNodes.push_back(NewN); } - // Note: this cast precedes the Flags check so we always get its associated - // assertion. - const MDNode *Node = cast<MDNode>(MD); + // Resolve cycles. + for (auto *N : CyclicNodes) + if (!N->isResolved()) + N->resolveCycles(); +} - // If this is a module-level metadata and we know that nothing at the - // module level is changing, then use an identity mapping. - if (Flags & RF_NoModuleLevelChanges) - return mapToSelf(VM, MD, Materializer, Flags); +Metadata *MDNodeMapper::map(const MDNode &N) { + assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive"); + assert(!(M.Flags & RF_NoModuleLevelChanges) && + "MDNodeMapper::map assumes module-level changes"); // Require resolved nodes whenever metadata might be remapped. - assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) && - "Unexpected unresolved node"); - - if (Materializer && Node->isTemporary()) { - assert(Flags & RF_HaveUnmaterializedMetadata); - Metadata *TempMD = - Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD)); - // If the above callback returned an existing temporary node, use it - // instead of the current temporary node. This happens when earlier - // function importing passes already created and saved a temporary - // metadata node for the same value id. - if (TempMD) { - mapToMetadata(VM, MD, TempMD, Materializer, Flags); - return TempMD; - } + assert(N.isResolved() && "Unexpected unresolved node"); + + Metadata *MappedN = + N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N); + while (!DistinctWorklist.empty()) + remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) { + if (Optional<Metadata *> MappedOp = tryToMapOperand(Old)) + return *MappedOp; + return mapTopLevelUniquedNode(*cast<MDNode>(Old)); + }); + return MappedN; +} + +Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { + assert(FirstN.isUniqued() && "Expected uniqued node"); + + // Create a post-order traversal of uniqued nodes under FirstN. + UniquedGraph G; + if (!createPOT(G, FirstN)) { + // Return early if no nodes have changed. + for (const MDNode *N : G.POT) + M.mapToSelf(N); + return &const_cast<MDNode &>(FirstN); } - if (Node->isDistinct()) - return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper, - Materializer); + // Update graph with all nodes that have changed. + G.propagateChanges(); - return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper, - Materializer); + // Map all the nodes in the graph. + mapNodesInPOT(G); + + // Return the original node, remapped. + return *getMappedOp(&FirstN); } -Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - SmallVector<MDNode *, 8> DistinctWorklist; - Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper, - Materializer); +namespace { - // When there are no module-level changes, it's possible that the metadata - // graph has temporaries. Skip the logic to resolve cycles, since it's - // unnecessary (and invalid) in that case. - if (Flags & RF_NoModuleLevelChanges) - return NewMD; +struct MapMetadataDisabler { + ValueToValueMapTy &VM; - // Resolve cycles involving the entry metadata. - resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata); + MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { + VM.disableMapMetadata(); + } + ~MapMetadataDisabler() { VM.enableMapMetadata(); } +}; - // Remap the operands of distinct MDNodes. - while (!DistinctWorklist.empty()) - remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags, - TypeMapper, Materializer); +} // end namespace - return NewMD; +Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { + // If the value already exists in the map, use it. + if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD)) + return *NewMD; + + if (isa<MDString>(MD)) + return const_cast<Metadata *>(MD); + + // This is a module-level metadata. If nothing at the module level is + // changing, use an identity mapping. + if ((Flags & RF_NoModuleLevelChanges)) + return const_cast<Metadata *>(MD); + + if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) { + // Disallow recursion into metadata mapping through mapValue. + MapMetadataDisabler MMD(getVM()); + + // Don't memoize ConstantAsMetadata. Instead of lasting until the + // LLVMContext is destroyed, they can be deleted when the GlobalValue they + // reference is destructed. These aren't super common, so the extra + // indirection isn't that expensive. + return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue())); + } + + assert(isa<MDNode>(MD) && "Expected a metadata node"); + + return None; } -MDNode *llvm::MapMetadata(const MDNode *MD, ValueToValueMapTy &VM, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - return cast<MDNode>(MapMetadata(static_cast<const Metadata *>(MD), VM, Flags, - TypeMapper, Materializer)); +Metadata *Mapper::mapMetadata(const Metadata *MD) { + assert(MD && "Expected valid metadata"); + assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata"); + + if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD)) + return *NewMD; + + return MDNodeMapper(*this).map(*cast<MDNode>(MD)); +} + +void Mapper::flush() { + // Flush out the worklist of global values. + while (!Worklist.empty()) { + WorklistEntry E = Worklist.pop_back_val(); + CurrentMCID = E.MCID; + switch (E.Kind) { + case WorklistEntry::MapGlobalInit: + E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); + break; + case WorklistEntry::MapAppendingVar: { + unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; + mapAppendingVariable(*E.Data.AppendingGV.GV, + E.Data.AppendingGV.InitPrefix, + E.AppendingGVIsOldCtorDtor, + makeArrayRef(AppendingInits).slice(PrefixSize)); + AppendingInits.resize(PrefixSize); + break; + } + case WorklistEntry::MapGlobalAliasee: + E.Data.GlobalAliasee.GA->setAliasee( + mapConstant(E.Data.GlobalAliasee.Aliasee)); + break; + case WorklistEntry::RemapFunction: + remapFunction(*E.Data.RemapF); + break; + } + } + CurrentMCID = 0; + + // Finish logic for block addresses now that all global values have been + // handled. + while (!DelayedBBs.empty()) { + DelayedBasicBlock DBB = DelayedBBs.pop_back_val(); + BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB)); + DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB); + } } -/// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by VMap. -/// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer){ +void Mapper::remapInstruction(Instruction *I) { // Remap operands. - for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); + for (Use &Op : I->operands()) { + Value *V = mapValue(Op); // If we aren't ignoring missing entries, assert that something happened. if (V) - *op = V; + Op = V; else - assert((Flags & RF_IgnoreMissingEntries) && + assert((Flags & RF_IgnoreMissingLocals) && "Referenced value not in value map!"); } // Remap phi nodes' incoming blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); + Value *V = mapValue(PN->getIncomingBlock(i)); // If we aren't ignoring missing entries, assert that something happened. if (V) PN->setIncomingBlock(i, cast<BasicBlock>(V)); else - assert((Flags & RF_IgnoreMissingEntries) && + assert((Flags & RF_IgnoreMissingLocals) && "Referenced block not in value map!"); } } @@ -462,11 +881,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, I->getAllMetadata(MDs); for (const auto &MI : MDs) { MDNode *Old = MI.second; - MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer); + MDNode *New = cast_or_null<MDNode>(mapMetadata(Old)); if (New != Old) I->setMetadata(MI.first, New); } - + if (!TypeMapper) return; @@ -491,3 +910,213 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, } I->mutateType(TypeMapper->remapType(I->getType())); } + +void Mapper::remapFunction(Function &F) { + // Remap the operands. + for (Use &Op : F.operands()) + if (Op) + Op = mapValue(Op); + + // Remap the metadata attachments. + SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; + F.getAllMetadata(MDs); + F.clearMetadata(); + for (const auto &I : MDs) + F.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); + + // Remap the argument types. + if (TypeMapper) + for (Argument &A : F.args()) + A.mutateType(TypeMapper->remapType(A.getType())); + + // Remap the instructions. + for (BasicBlock &BB : F) + for (Instruction &I : BB) + remapInstruction(&I); +} + +void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers) { + SmallVector<Constant *, 16> Elements; + if (InitPrefix) { + unsigned NumElements = + cast<ArrayType>(InitPrefix->getType())->getNumElements(); + for (unsigned I = 0; I != NumElements; ++I) + Elements.push_back(InitPrefix->getAggregateElement(I)); + } + + PointerType *VoidPtrTy; + Type *EltTy; + if (IsOldCtorDtor) { + // FIXME: This upgrade is done during linking to support the C API. See + // also IRLinker::linkAppendingVarProto() in IRMover.cpp. + VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo(); + auto &ST = *cast<StructType>(NewMembers.front()->getType()); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(GV.getContext(), Tys, false); + } + + for (auto *V : NewMembers) { + Constant *NewV; + if (IsOldCtorDtor) { + auto *S = cast<ConstantStruct>(V); + auto *E1 = mapValue(S->getOperand(0)); + auto *E2 = mapValue(S->getOperand(1)); + Value *Null = Constant::getNullValue(VoidPtrTy); + NewV = + ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr); + } else { + NewV = cast_or_null<Constant>(mapValue(V)); + } + Elements.push_back(NewV); + } + + GV.setInitializer(ConstantArray::get( + cast<ArrayType>(GV.getType()->getElementType()), Elements)); +} + +void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapGlobalInit; + WE.MCID = MCID; + WE.Data.GVInit.GV = &GV; + WE.Data.GVInit.Init = &Init; + Worklist.push_back(WE); +} + +void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapAppendingVar; + WE.MCID = MCID; + WE.Data.AppendingGV.GV = &GV; + WE.Data.AppendingGV.InitPrefix = InitPrefix; + WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor; + WE.AppendingGVNumNewMembers = NewMembers.size(); + Worklist.push_back(WE); + AppendingInits.append(NewMembers.begin(), NewMembers.end()); +} + +void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID) { + assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::MapGlobalAliasee; + WE.MCID = MCID; + WE.Data.GlobalAliasee.GA = &GA; + WE.Data.GlobalAliasee.Aliasee = &Aliasee; + Worklist.push_back(WE); +} + +void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) { + assert(AlreadyScheduled.insert(&F).second && "Should not reschedule"); + assert(MCID < MCs.size() && "Invalid mapping context"); + + WorklistEntry WE; + WE.Kind = WorklistEntry::RemapFunction; + WE.MCID = MCID; + WE.Data.RemapF = &F; + Worklist.push_back(WE); +} + +void Mapper::addFlags(RemapFlags Flags) { + assert(!hasWorkToDo() && "Expected to have flushed the worklist"); + this->Flags = this->Flags | Flags; +} + +static Mapper *getAsMapper(void *pImpl) { + return reinterpret_cast<Mapper *>(pImpl); +} + +namespace { + +class FlushingMapper { + Mapper &M; + +public: + explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { + assert(!M.hasWorkToDo() && "Expected to be flushed"); + } + ~FlushingMapper() { M.flush(); } + Mapper *operator->() const { return &M; } +}; + +} // end namespace + +ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) + : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {} + +ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); } + +unsigned +ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM, + ValueMaterializer *Materializer) { + return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer); +} + +void ValueMapper::addFlags(RemapFlags Flags) { + FlushingMapper(pImpl)->addFlags(Flags); +} + +Value *ValueMapper::mapValue(const Value &V) { + return FlushingMapper(pImpl)->mapValue(&V); +} + +Constant *ValueMapper::mapConstant(const Constant &C) { + return cast_or_null<Constant>(mapValue(C)); +} + +Metadata *ValueMapper::mapMetadata(const Metadata &MD) { + return FlushingMapper(pImpl)->mapMetadata(&MD); +} + +MDNode *ValueMapper::mapMDNode(const MDNode &N) { + return cast_or_null<MDNode>(mapMetadata(N)); +} + +void ValueMapper::remapInstruction(Instruction &I) { + FlushingMapper(pImpl)->remapInstruction(&I); +} + +void ValueMapper::remapFunction(Function &F) { + FlushingMapper(pImpl)->remapFunction(F); +} + +void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV, + Constant &Init, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID); +} + +void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, + Constant *InitPrefix, + bool IsOldCtorDtor, + ArrayRef<Constant *> NewMembers, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapAppendingVariable( + GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); +} + +void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); +} + +void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { + getAsMapper(pImpl)->scheduleRemapFunction(F, MCID); +} |