diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
55 files changed, 6045 insertions, 3153 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp index 84a66e1e96d2..ccdcf7cbce38 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -17,9 +17,6 @@ #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IRBuilder.h" - -#include <iostream> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 7ff73fcdada7..3daff3b4430b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -52,6 +52,7 @@ namespace { bool isUsefullToPreserve(Attribute::AttrKind Kind) { switch (Kind) { case Attribute::NonNull: + case Attribute::NoUndef: case Attribute::Alignment: case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: @@ -69,7 +70,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) { default: return RK; case Attribute::NonNull: - RK.WasOn = GetUnderlyingObject(RK.WasOn, M->getDataLayout()); + RK.WasOn = getUnderlyingObject(RK.WasOn); return RK; case Attribute::Alignment: { Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) { @@ -145,7 +146,7 @@ struct AssumeBuilderState { if (!RK.WasOn) return true; if (RK.WasOn->getType()->isPointerTy()) { - Value *UnderlyingPtr = GetUnderlyingObject(RK.WasOn, M->getDataLayout()); + Value *UnderlyingPtr = getUnderlyingObject(RK.WasOn); if (isa<AllocaInst>(UnderlyingPtr) || isa<GlobalValue>(UnderlyingPtr)) return false; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 085d91031cf9..6bcd42c4c6d8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -105,7 +105,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); if (DTU) - DTU->applyUpdatesPermissive(Updates); + DTU->applyUpdates(Updates); for (BasicBlock *BB : BBs) if (DTU) @@ -136,9 +136,10 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, return !DeadBlocks.empty(); } -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, +bool llvm::FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep) { - if (!isa<PHINode>(BB->begin())) return; + if (!isa<PHINode>(BB->begin())) + return false; while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) @@ -151,6 +152,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, PN->eraseFromParent(); } + return true; } bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI, @@ -228,19 +230,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { - Updates.reserve(1 + (2 * succ_size(BB))); + SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB), + succ_end(BB)); + Updates.reserve(1 + (2 * UniqueSuccessors.size())); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) // This successor of BB may already have PredBB as a predecessor. - if (llvm::find(successors(PredBB), *I) == succ_end(PredBB)) - Updates.push_back({DominatorTree::Insert, PredBB, *I}); - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - Updates.push_back({DominatorTree::Delete, BB, *I}); + if (!llvm::is_contained(successors(PredBB), UniqueSuccessor)) + Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor}); + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -285,11 +289,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Add unreachable to now empty BB. new UnreachableInst(BB->getContext(), BB); - // Eliminate duplicate/redundant dbg.values. This seems to be a good place to - // do that since we might end up with redundant dbg.values describing the - // entry PHI node post-splice. - RemoveRedundantDbgInstrs(PredBB); - // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); @@ -306,7 +305,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, isa<UnreachableInst>(BB->getTerminator()) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - DTU->applyUpdatesPermissive(Updates); + DTU->applyUpdates(Updates); DTU->deleteBB(BB); } else { BB->eraseFromParent(); // Nuke BB if DTU is nullptr. @@ -498,14 +497,16 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { } BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU) { + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. Instruction *LatchTerm = BB->getTerminator(); if (SplitCriticalEdge( LatchTerm, SuccNum, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA())) + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(), + BBName)) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a @@ -515,14 +516,15 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU); + return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName, + /*Before=*/true); } // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU); + return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); } unsigned @@ -540,9 +542,16 @@ llvm::SplitAllCriticalEdges(Function &F, return NumBroken; } -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, const Twine &BBName) { +static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + const Twine &BBName, bool Before) { + if (Before) { + DomTreeUpdater LocalDTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + return splitBlockBefore(Old, SplitPt, + DTU ? DTU : (DT ? &LocalDTU : nullptr), LI, MSSAU, + BBName); + } BasicBlock::iterator SplitIt = SplitPt->getIterator(); while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) ++SplitIt; @@ -556,7 +565,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, *LI); - if (DT) + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + // Old dominates New. New node dominates all other nodes dominated by Old. + SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), + succ_end(New)); + Updates.push_back({DominatorTree::Insert, Old, New}); + Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); + for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { + Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); + } + + DTU->applyUpdates(Updates); + } else if (DT) // Old dominates New. New node dominates all other nodes dominated by Old. if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); @@ -574,14 +596,94 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, return New; } +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, /*DTU=*/nullptr, DT, LI, MSSAU, BBName, + Before); +} +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName, + bool Before) { + return SplitBlockImpl(Old, SplitPt, DTU, /*DT=*/nullptr, LI, MSSAU, BBName, + Before); +} + +BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + const Twine &BBName) { + + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) + ++SplitIt; + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name, + /* Before=*/true); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. + if (LI) + if (Loop *L = LI->getLoopFor(Old)) + L->addBasicBlockToLoop(New, *LI); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> DTUpdates; + // New dominates Old. The predecessor nodes of the Old node dominate + // New node. + SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), + pred_end(New)); + DTUpdates.push_back({DominatorTree::Insert, New, Old}); + DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); + for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { + DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); + } + + DTU->applyUpdates(DTUpdates); + + // Move MemoryAccesses still tracked in Old, but part of New now. + // Update accesses in successor blocks accordingly. + if (MSSAU) { + MSSAU->applyUpdates(DTUpdates, DTU->getDomTree()); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } + } + return New; +} + /// Update DominatorTree, LoopInfo, and LCCSA analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA, bool &HasLoopExit) { // Update dominator tree if available. - if (DT) { + if (DTU) { + // Recalculation of DomTree is needed when updating a forward DomTree and + // the Entry BB is replaced. + if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) { + // The entry block was removed and there is no external interface for + // the dominator tree to be notified of this change. In this corner-case + // we recalculate the entire tree. + DTU->recalculate(*NewBB->getParent()); + } else { + // Split block expects NewBB to have a non-empty set of predecessors. + SmallVector<DominatorTree::UpdateType, 8> Updates; + SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); + Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); + Updates.reserve(Updates.size() + 2 * UniquePreds.size()); + for (auto *UniquePred : UniquePreds) { + Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); + Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); + } + DTU->applyUpdates(Updates); + } + } else if (DT) { if (OldBB == DT->getRootNode()->getBlock()) { assert(NewBB == &NewBB->getParent()->getEntryBlock()); DT->setNewRoot(NewBB); @@ -599,6 +701,8 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!LI) return; + if (DTU && DTU->hasDomTree()) + DT = &DTU->getDomTree(); assert(DT && "DT should be available to update LoopInfo!"); Loop *L = LI->getLoopFor(OldBB); @@ -732,11 +836,17 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, } } -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA); + +static BasicBlock * +SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, + const char *Suffix, DomTreeUpdater *DTU, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { // Do not attempt to split that which cannot be split. if (!BB->canSplitPredecessors()) return nullptr; @@ -747,8 +857,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, SmallVector<BasicBlock*, 2> NewBBs; std::string NewName = std::string(Suffix) + ".split-lp"; - SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, - LI, MSSAU, PreserveLCSSA); + SplitLandingPadPredecessorsImpl(BB, Preds, Suffix, NewName.c_str(), NewBBs, + DTU, DT, LI, MSSAU, PreserveLCSSA); return NewBBs[0]; } @@ -758,12 +868,22 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); + + Loop *L = nullptr; + BasicBlock *OldLatch = nullptr; // Splitting the predecessors of a loop header creates a preheader block. - if (LI && LI->isLoopHeader(BB)) + if (LI && LI->isLoopHeader(BB)) { + L = LI->getLoopFor(BB); // Using the loop start line number prevents debuggers stepping into the // loop body for this instruction. - BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); - else + BI->setDebugLoc(L->getStartLoc()); + + // If BB is the header of the Loop, it is possible that the loop is + // modified, such that the current latch does not remain the latch of the + // loop. If that is the case, the loop metadata from the current latch needs + // to be applied to the new latch. + OldLatch = L->getLoopLatch(); + } else BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); // Move the edges from Preds to point to NewBB instead of BB. @@ -790,7 +910,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA, + UpdateAnalysisInformation(BB, NewBB, Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, HasLoopExit); if (!Preds.empty()) { @@ -798,16 +918,41 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); } + if (OldLatch) { + BasicBlock *NewLatch = L->getLoopLatch(); + if (NewLatch != OldLatch) { + MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop"); + NewLatch->getTerminator()->setMetadata("llvm.loop", MD); + OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr); + } + } + return NewBB; } -void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix1, const char *Suffix2, - SmallVectorImpl<BasicBlock *> &NewBBs, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, /*DTU=*/nullptr, DT, LI, + MSSAU, PreserveLCSSA); +} +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitBlockPredecessorsImpl(BB, Preds, Suffix, DTU, + /*DT=*/nullptr, LI, MSSAU, PreserveLCSSA); +} + +static void SplitLandingPadPredecessorsImpl( + BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, + const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -832,8 +977,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA, - HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DTU, DT, LI, MSSAU, + PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); @@ -868,7 +1013,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU, + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DTU, DT, LI, MSSAU, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. @@ -905,6 +1050,29 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } } +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl( + OrigBB, Preds, Suffix1, Suffix2, NewBBs, + /*DTU=*/nullptr, DT, LI, MSSAU, PreserveLCSSA); +} +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix1, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + return SplitLandingPadPredecessorsImpl(OrigBB, Preds, Suffix1, Suffix2, + NewBBs, DTU, /*DT=*/nullptr, LI, MSSAU, + PreserveLCSSA); +} + ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU) { @@ -964,14 +1132,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } -Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, - bool Unreachable, - MDNode *BranchWeights, - DominatorTree *DT, LoopInfo *LI, - BasicBlock *ThenBlock) { +static Instruction * +SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DomTreeUpdater *DTU, DominatorTree *DT, + LoopInfo *LI, BasicBlock *ThenBlock) { + SmallVector<DominatorTree::UpdateType, 8> Updates; BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); + if (DTU) { + SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), + succ_end(Tail)); + Updates.push_back({DominatorTree::Insert, Head, Tail}); + Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); + for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { + Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); + } + } Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); Instruction *CheckTerm; @@ -980,17 +1158,24 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); if (Unreachable) CheckTerm = new UnreachableInst(C, ThenBlock); - else + else { CheckTerm = BranchInst::Create(Tail, ThenBlock); + if (DTU) + Updates.push_back({DominatorTree::Insert, ThenBlock, Tail}); + } CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); } else CheckTerm = ThenBlock->getTerminator(); BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); + BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond); + if (DTU) + Updates.push_back({DominatorTree::Insert, Head, ThenBlock}); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - if (DT) { + if (DTU) + DTU->applyUpdates(Updates); + else if (DT) { if (DomTreeNode *OldNode = DT->getNode(Head)) { std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); @@ -1016,6 +1201,27 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, return CheckTerm; } +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DominatorTree *DT, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, + /*DTU=*/nullptr, DT, LI, ThenBlock); +} +Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DomTreeUpdater *DTU, LoopInfo *LI, + BasicBlock *ThenBlock) { + return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable, + BranchWeights, DTU, /*DT=*/nullptr, LI, + ThenBlock); +} + void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, @@ -1326,11 +1532,11 @@ BasicBlock *llvm::CreateControlFlowHub( SmallVector<DominatorTree::UpdateType, 16> Updates; if (DTU) { for (auto In : Incoming) { + Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); for (auto Succ : successors(In)) { if (Outgoing.count(Succ)) Updates.push_back({DominatorTree::Delete, In, Succ}); } - Updates.push_back({DominatorTree::Insert, In, FirstGuardBlock}); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 39fb504cf7b7..939a1a3a868d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -134,9 +134,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, } } -BasicBlock * -llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, - const CriticalEdgeSplittingOptions &Options) { +BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, + const CriticalEdgeSplittingOptions &Options, + const Twine &BBName) { if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) return nullptr; @@ -158,22 +158,21 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, SmallVector<BasicBlock *, 4> LoopPreds; // Check if extra modifications will be required to preserve loop-simplify // form after splitting. If it would require splitting blocks with IndirectBr - // terminators, bail out if preserving loop-simplify form is requested. + // or CallBr terminators, bail out if preserving loop-simplify form is + // requested. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { - // The only that we can break LoopSimplify form by splitting a critical - // edge is if after the split there exists some edge from TIL to DestBB - // *and* the only edge into DestBB from outside of TIL is that of + // The only way that we can break LoopSimplify form by splitting a + // critical edge is if after the split there exists some edge from TIL to + // DestBB *and* the only edge into DestBB from outside of TIL is that of // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB // is the new exit block and it has no non-loop predecessors. If the // second isn't true, then DestBB was not in LoopSimplify form prior to // the split as it had a non-loop predecessor. In both of these cases, // the predecessor must be directly in TIL, not in a subloop, or again // LoopSimplify doesn't hold. - for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; - ++I) { - BasicBlock *P = *I; + for (BasicBlock *P : predecessors(DestBB)) { if (P == TIBB) continue; // The new block is known. if (LI->getLoopFor(P) != TIL) { @@ -186,7 +185,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, // Loop-simplify form can be preserved, if we can split all in-loop // predecessors. if (any_of(LoopPreds, [](BasicBlock *Pred) { - return isa<IndirectBrInst>(Pred->getTerminator()); + const Instruction *T = Pred->getTerminator(); + if (const auto *CBR = dyn_cast<CallBrInst>(T)) + return CBR->getDefaultDest() != Pred; + return isa<IndirectBrInst>(T); })) { if (Options.PreserveLoopSimplify) return nullptr; @@ -196,8 +198,13 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, } // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), - TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); + BasicBlock *NewBB = nullptr; + if (BBName.str() != "") + NewBB = BasicBlock::Create(TI->getContext(), BBName); + else + NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + + DestBB->getName() + + "_crit_edge"); // Create our unconditional branch. BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); NewBI->setDebugLoc(TI->getDebugLoc()); @@ -270,7 +277,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, SmallVector<DominatorTree::UpdateType, 3> Updates; Updates.push_back({DominatorTree::Insert, TIBB, NewBB}); Updates.push_back({DominatorTree::Insert, NewBB, DestBB}); - if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB)) + if (!llvm::is_contained(successors(TIBB), DestBB)) Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); if (DT) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index c64ad147fdfe..f4afa3ad4623 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -31,14 +31,22 @@ using namespace llvm; //- Infer Attributes ---------------------------------------------------------// STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumInaccessibleMemOnly, + "Number of functions inferred as inaccessiblememonly"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); +STATISTIC(NumInaccessibleMemOrArgMemOnly, + "Number of functions inferred as inaccessiblemem_or_argmemonly"); STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly"); +STATISTIC(NumSExtArg, "Number of arguments inferred as signext"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns"); STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); STATISTIC(NumReturnedArg, "Number of arguments inferred as returned"); +STATISTIC(NumWillReturn, "Number of functions inferred as willreturn"); static bool setDoesNotAccessMemory(Function &F) { if (F.doesNotAccessMemory()) @@ -48,6 +56,14 @@ static bool setDoesNotAccessMemory(Function &F) { return true; } +static bool setOnlyAccessesInaccessibleMemory(Function &F) { + if (F.onlyAccessesInaccessibleMemory()) + return false; + F.setOnlyAccessesInaccessibleMemory(); + ++NumInaccessibleMemOnly; + return true; +} + static bool setOnlyReadsMemory(Function &F) { if (F.onlyReadsMemory()) return false; @@ -64,6 +80,14 @@ static bool setOnlyAccessesArgMemory(Function &F) { return true; } +static bool setOnlyAccessesInaccessibleMemOrArgMem(Function &F) { + if (F.onlyAccessesInaccessibleMemOrArgMem()) + return false; + F.setOnlyAccessesInaccessibleMemOrArgMem(); + ++NumInaccessibleMemOrArgMemOnly; + return true; +} + static bool setDoesNotThrow(Function &F) { if (F.doesNotThrow()) return false; @@ -104,6 +128,48 @@ static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { return true; } +static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::WriteOnly)) + return false; + F.addParamAttr(ArgNo, Attribute::WriteOnly); + ++NumWriteOnlyArg; + return true; +} + +static bool setSignExtendedArg(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::SExt)) + return false; + F.addParamAttr(ArgNo, Attribute::SExt); + ++NumSExtArg; + return true; +} + +static bool setRetNoUndef(Function &F) { + if (!F.getReturnType()->isVoidTy() && + !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) { + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); + ++NumNoUndef; + return true; + } + return false; +} + +static bool setArgsNoUndef(Function &F) { + bool Changed = false; + for (unsigned ArgNo = 0; ArgNo < F.arg_size(); ++ArgNo) { + if (!F.hasParamAttribute(ArgNo, Attribute::NoUndef)) { + F.addParamAttr(ArgNo, Attribute::NoUndef); + ++NumNoUndef; + Changed = true; + } + } + return Changed; +} + +static bool setRetAndArgsNoUndef(Function &F) { + return setRetNoUndef(F) | setArgsNoUndef(F); +} + static bool setRetNonNull(Function &F) { assert(F.getReturnType()->isPointerTy() && "nonnull applies only to pointers"); @@ -136,6 +202,14 @@ static bool setDoesNotFreeMemory(Function &F) { return true; } +static bool setWillReturn(Function &F) { + if (F.hasFnAttribute(Attribute::WillReturn)) + return false; + F.addFnAttr(Attribute::WillReturn); + ++NumWillReturn; + return true; +} + bool llvm::inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI) { Function *F = M->getFunction(Name); @@ -163,12 +237,15 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: case LibFunc_strrchr: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_strtol: case LibFunc_strtod: @@ -178,26 +255,31 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strtold: case LibFunc_strtoull: Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_strcpy: case LibFunc_strncpy: - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotAlias(F, 1); - LLVM_FALLTHROUGH; case LibFunc_strcat: case LibFunc_strncat: + Changed |= setWillReturn(F); Changed |= setReturnedArg(F, 0); LLVM_FALLTHROUGH; case LibFunc_stpcpy: case LibFunc_stpncpy: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); return Changed; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -206,51 +288,70 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strspn: // 0,1 case LibFunc_strncmp: // 0,1 case LibFunc_strcspn: // 0,1 - case LibFunc_strcoll: // 0,1 + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc_strcoll: case LibFunc_strcasecmp: // 0,1 case LibFunc_strncasecmp: // + // Those functions may depend on the locale, which may be accessed through + // global memory. Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strstr: case LibFunc_strpbrk: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_scanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strdup: case LibFunc_strndup: + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat: case LibFunc_statvfs: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_sscanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -258,70 +359,95 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_sprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: + case LibFunc_vec_malloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_memcmp: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memchr: case LibFunc_memrchr: - Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_modf: case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotAlias(F, 1); Changed |= setReturnedArg(F, 0); - Changed |= setDoesNotThrow(F); + Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memmove: - Changed |= setReturnedArg(F, 0); Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setReturnedArg(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotAlias(F, 1); - Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; @@ -329,38 +455,57 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotThrow(F); return Changed; case LibFunc_memalign: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); + Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_mkdir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: + case LibFunc_vec_realloc: + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; + case LibFunc_reallocf: + Changed |= setRetNoUndef(F); + Changed |= setWillReturn(F); + return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_rewind: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -368,6 +513,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_readlink: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -375,35 +521,52 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_aligned_alloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); + Changed |= setOnlyWritesMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_bzero: Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyWritesMemory(F, 0); return Changed; case LibFunc_calloc: + case LibFunc_vec_calloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_chmod: case LibFunc_chown: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -411,6 +574,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -420,14 +584,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fopen: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -436,13 +603,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fdopen: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_feof: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_free: + case LibFunc_vec_free: + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: @@ -456,10 +635,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_flockfile: case LibFunc_funlockfile: case LibFunc_ftrylockfile: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_ferror: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); @@ -467,26 +648,38 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_fputc: case LibFunc_fputc_unlocked: case LibFunc_fstat: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; case LibFunc_frexp: case LibFunc_frexpf: case LibFunc_frexpl: + Changed |= setDoesNotThrow(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; case LibFunc_fstatvfs: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: case LibFunc_fgets_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: case LibFunc_fread_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: case LibFunc_fwrite_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); @@ -494,6 +687,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fputs: case LibFunc_fputs_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -501,23 +695,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fscanf: case LibFunc_fprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fgetpos: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getc: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_getlogin_r: + Changed |= setRetAndArgsNoUndef(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; case LibFunc_getc_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -525,37 +731,45 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_gets: case LibFunc_getchar: case LibFunc_getchar_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); return Changed; case LibFunc_getitimer: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ungetc: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -564,30 +778,36 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_putc: case LibFunc_putc_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: case LibFunc_putchar_unlocked: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -596,15 +816,18 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_pclose: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_vscanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vsscanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -612,28 +835,35 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vfscanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; case LibFunc_vprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vfprintf: case LibFunc_vsprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vsnprintf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); @@ -641,20 +871,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; @@ -666,24 +900,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotAccessMemory(F); return Changed; case LibFunc_lstat: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. + // Cannot give undef pointer/size + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -693,14 +932,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_under_IO_getc: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_under_IO_putc: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_dunder_isoc99_scanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); @@ -708,12 +950,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_isoc99_sscanf: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -721,6 +965,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fopen64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); @@ -730,20 +975,24 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_fseeko64: case LibFunc_ftello64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_fstat64: case LibFunc_fstatvfs64: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); return Changed; @@ -751,21 +1000,67 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; + case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow) + case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow) + case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow) + case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow) + Changed |= setDoesNotThrow(F); + LLVM_FALLTHROUGH; + case LibFunc_ZdlPv: // delete(void*) + case LibFunc_ZdlPvj: // delete(void*, unsigned int) + case LibFunc_ZdlPvm: // delete(void*, unsigned long) + case LibFunc_ZdaPv: // delete[](void*) + case LibFunc_ZdaPvj: // delete[](void*, unsigned int) + case LibFunc_ZdaPvm: // delete[](void*, unsigned long) + case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t) + case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t) + case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t) + case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t) + case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t) + case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t); + Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); + Changed |= setArgsNoUndef(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotCapture(F, 0); + return Changed; + case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow) + case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow) + case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow) + case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow) + case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow) + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow) + case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow) + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow) + // Nothrow operator new may return null pointer + Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesInaccessibleMemory(F); + Changed |= setRetNoUndef(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); + return Changed; case LibFunc_Znwj: // new(unsigned int) case LibFunc_Znwm: // new(unsigned long) case LibFunc_Znaj: // new[](unsigned int) case LibFunc_Znam: // new[](unsigned long) + case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t) + case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t) + case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t) + case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t) case LibFunc_msvc_new_int: // new(unsigned int) case LibFunc_msvc_new_longlong: // new(unsigned long long) case LibFunc_msvc_new_array_int: // new[](unsigned int) case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) + Changed |= setOnlyAccessesInaccessibleMemory(F); // Operator new always returns a nonnull noalias pointer + Changed |= setRetNoUndef(F); Changed |= setRetNonNull(F); Changed |= setRetDoesNotAlias(F); + Changed |= setWillReturn(F); return Changed; // TODO: add LibFunc entries for: // case LibFunc_memset_pattern4: @@ -773,15 +1068,155 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; + case LibFunc_memset: + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setWillReturn(F); + Changed |= setDoesNotThrow(F); + Changed |= setOnlyWritesMemory(F, 0); + return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: + Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); return Changed; - + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + Changed |= setSignExtendedArg(F, 1); + Changed |= setWillReturn(F); + return Changed; + case LibFunc_abs: + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosh: + case LibFunc_acoshf: + case LibFunc_acoshl: + case LibFunc_acosl: + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinh: + case LibFunc_asinhf: + case LibFunc_asinhl: + case LibFunc_asinl: + case LibFunc_atan: + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + case LibFunc_atanf: + case LibFunc_atanh: + case LibFunc_atanhf: + case LibFunc_atanhl: + case LibFunc_atanl: + case LibFunc_cbrt: + case LibFunc_cbrtf: + case LibFunc_cbrtl: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + case LibFunc_cos: + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_cosf: + case LibFunc_cosl: + case LibFunc_cospi: + case LibFunc_cospif: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_expm1: + case LibFunc_expm1f: + case LibFunc_expm1l: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + case LibFunc_fmod: + case LibFunc_fmodf: + case LibFunc_fmodl: + case LibFunc_isascii: + case LibFunc_isdigit: + case LibFunc_labs: + case LibFunc_llabs: + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10l: + case LibFunc_log1p: + case LibFunc_log1pf: + case LibFunc_log1pl: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: + case LibFunc_logb: + case LibFunc_logbf: + case LibFunc_logbl: + case LibFunc_logf: + case LibFunc_logl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_powl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + case LibFunc_sin: + case LibFunc_sincospif_stret: + case LibFunc_sinf: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + case LibFunc_sinl: + case LibFunc_sinpi: + case LibFunc_sinpif: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_strnlen: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + case LibFunc_tanl: + case LibFunc_toascii: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotFreeMemory(F); + Changed |= setWillReturn(F); + return Changed; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. @@ -930,6 +1365,15 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return CI; } +Value *llvm::emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_mempcpy, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {Dst, Src, Len}, B, TLI); +} + Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); @@ -969,7 +1413,7 @@ Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; - Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); + llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_snprintf, B.getInt32Ty(), {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()}, Args, B, TLI, /*IsVaArgs=*/true); @@ -979,7 +1423,7 @@ Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs, IRBuilderBase &B, const TargetLibraryInfo *TLI) { SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; - Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); + llvm::append_range(Args, VariadicArgs); return emitLibCall(LibFunc_sprintf, B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI, /*IsVaArgs=*/true); @@ -1087,12 +1531,15 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, StringRef Name, IRBuilderBase &B, - const AttributeList &Attrs) { + const AttributeList &Attrs, + const TargetLibraryInfo *TLI = nullptr) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); Module *M = B.GetInsertBlock()->getModule(); FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), Op2->getType()); + if (TLI != nullptr) + inferLibFuncAttributes(M, Name, *TLI); CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); // The incoming attribute set may have come from a speculatable intrinsic, but @@ -1128,7 +1575,7 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name = getFloatFnName(TLI, Op1->getType(), DoubleFn, FloatFn, LongDoubleFn); - return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI); } Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index 52e859361c59..b2763900e154 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -96,11 +96,12 @@ void CallGraphUpdater::reanalyzeFunction(Function &Fn) { } } -void CallGraphUpdater::registerOutlinedFunction(Function &NewFn) { +void CallGraphUpdater::registerOutlinedFunction(Function &OriginalFn, + Function &NewFn) { if (CG) CG->addToCallGraph(&NewFn); else if (LCG) - LCG->addNewFunctionIntoSCC(NewFn, *SCC); + LCG->addSplitFunction(OriginalFn, NewFn); } void CallGraphUpdater::removeFunction(Function &DeadFn) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 5a47c1fd0b6c..bf08bf274737 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -112,9 +112,7 @@ static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, Builder.SetInsertPoint(&MergeBlock->front()); PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); - SmallVector<User *, 16> UsersToUpdate; - for (User *U : OrigInst->users()) - UsersToUpdate.push_back(U); + SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); for (User *U : UsersToUpdate) U->replaceUsesOfWith(OrigInst, Phi); Phi->addIncoming(OrigInst, OrigInst->getParent()); @@ -165,9 +163,7 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { // Save the users of the calling instruction. These uses will be changed to // use the bitcast after we create it. - SmallVector<User *, 16> UsersToUpdate; - for (User *U : CB.users()) - UsersToUpdate.push_back(U); + SmallVector<User *, 16> UsersToUpdate(CB.users()); // Determine an appropriate location to create the bitcast for the return // value. The location depends on if we have a call or invoke instruction. @@ -430,10 +426,11 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, } } for (; I < NumArgs; I++) { - // Vararg functions can have more arguments than paramters. + // Vararg functions can have more arguments than parameters. assert(Callee->isVarArg()); if (CB.paramHasAttr(I, Attribute::StructRet)) { - *FailureReason = "SRet arg to vararg function"; + if (FailureReason) + *FailureReason = "SRet arg to vararg function"; return false; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index 1ae17c64b8f6..1f649fe6c748 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -109,7 +109,7 @@ void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { auto *ValueToFr = U.get(); assert(L->contains(UserI->getParent()) && "Should not process an instruction that isn't inside the loop"); - if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, UserI, &DT)) + if (isGuaranteedNotToBeUndefOrPoison(ValueToFr, nullptr, UserI, &DT)) return; LLVM_DEBUG(dbgs() << "canonfr: inserting freeze:\n"); @@ -176,7 +176,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { assert(StepI && "Step instruction should have been found"); // Drop flags from the step instruction. - if (!isGuaranteedNotToBeUndefOrPoison(StepI, StepI, &DT)) { + if (!isGuaranteedNotToBeUndefOrPoison(StepI, nullptr, StepI, &DT)) { LLVM_DEBUG(dbgs() << "canonfr: drop flags: " << *StepI << "\n"); StepI->dropPoisonGeneratingFlags(); SE.forgetValue(StepI); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp index 788983c15690..51a49574e55d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -36,6 +37,8 @@ #include <map> using namespace llvm; +#define DEBUG_TYPE "clone-function" + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -137,15 +140,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, MD[SP].reset(SP); } - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); - } + // Everything else beyond this point deals with function instructions, + // so if we are dealing with a function declaration, we're done. + if (OldFunc->isDeclaration()) + return; // When we remap instructions, we want to avoid duplicating inlined // DISubprograms, so record all subprograms we find as we duplicate @@ -157,7 +155,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. - // for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { const BasicBlock &BB = *BI; @@ -196,6 +193,19 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (DIType *Type : DIFinder.types()) VMap.MD()[Type].reset(Type); + // Duplicate the metadata that is attached to the cloned function. + // Subprograms/CUs/types that were already mapped to themselves won't be + // duplicated. + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) { + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -426,9 +436,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, CodeInfo->OperandBundleCallSites.push_back(NewInst); // Recursively clone any reachable successor blocks. - const Instruction *TI = BB->getTerminator(); - for (const BasicBlock *Succ : successors(TI)) - ToClone.push_back(Succ); + append_range(ToClone, successors(BB->getTerminator())); } if (CodeInfo) { @@ -668,8 +676,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. - if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || - I->getSinglePredecessor() == &*I)) { + if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; @@ -877,3 +884,108 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween( return NewBB; } + +void llvm::cloneNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, + DenseMap<MDNode *, MDNode *> &ClonedScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *ScopeList : NoAliasDeclScopes) { + for (auto &MDOperand : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) + Name = (Twine(ScopeName) + ":" + Ext).str(); + else + Name = std::string(Ext); + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast<MDNode *>(SNANode.getDomain()), Name); + ClonedScopes.insert(std::make_pair(MD, NewScope)); + } + } + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes, + LLVMContext &Context) { + auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { + bool NeedsReplacement = false; + SmallVector<Metadata *, 8> NewScopeList; + for (auto &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { + if (auto *NewMD = ClonedScopes.lookup(MD)) { + NewScopeList.push_back(NewMD); + NeedsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (NeedsReplacement) + return MDNode::get(Context, NewScopeList); + return nullptr; + }; + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) + if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + Decl->setScopeList(NewScopeList); + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) + if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + I->setMetadata(MD_ID, NewScopeList); + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, + ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart, + Instruction *IEnd, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap<MDNode *, MDNode *> ClonedScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, Ext, Context); + // Identify instructions using metadata that needs adaptation + assert(IStart->getParent() == IEnd->getParent() && "different basic block ?"); + auto ItStart = IStart->getIterator(); + auto ItEnd = IEnd->getIterator(); + ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range + for (auto &I : llvm::make_range(ItStart, ItEnd)) + adaptNoAliasScopes(&I, ClonedScopes, Context); +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes) { + for (BasicBlock *BB : BBs) + for (Instruction &I : *BB) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp index 2c8c3abb2922..a6327bbf21bc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -117,10 +117,17 @@ std::unique_ptr<Module> llvm::CloneModule( // for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { + GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); + + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I->getAllMetadata(MDs); + for (auto MD : MDs) + GV->addMetadata(MD.first, + *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); + if (I->isDeclaration()) continue; - GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); if (!ShouldCloneDefinition(&*I)) { // Skip after setting the correct linkage for an external reference. GV->setLinkage(GlobalValue::ExternalLinkage); @@ -129,12 +136,6 @@ std::unique_ptr<Module> llvm::CloneModule( if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - I->getAllMetadata(MDs); - for (auto MD : MDs) - GV->addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); - copyComdat(GV, &*I); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8cdbb9d35652..390925a03b73 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -535,6 +535,46 @@ void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, continue; } + // Find bitcasts in the outlined region that have lifetime marker users + // outside that region. Replace the lifetime marker use with an + // outside region bitcast to avoid unnecessary alloca/reload instructions + // and extra lifetime markers. + SmallVector<Instruction *, 2> LifetimeBitcastUsers; + for (User *U : AI->users()) { + if (!definedInRegion(Blocks, U)) + continue; + + if (U->stripInBoundsConstantOffsets() != AI) + continue; + + Instruction *Bitcast = cast<Instruction>(U); + for (User *BU : Bitcast->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU); + if (!IntrInst) + continue; + + if (!IntrInst->isLifetimeStartOrEnd()) + continue; + + if (definedInRegion(Blocks, IntrInst)) + continue; + + LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast" + << *Bitcast << " in out-of-region lifetime marker " + << *IntrInst << "\n"); + LifetimeBitcastUsers.push_back(IntrInst); + } + } + + for (Instruction *I : LifetimeBitcastUsers) { + Module *M = AIFunc->getParent(); + LLVMContext &Ctx = M->getContext(); + auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + CastInst *CastI = + CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I); + I->replaceUsesOfWith(I->getOperand(1), CastI); + } + // Follow any bitcasts. SmallVector<Instruction *, 2> Bitcasts; SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; @@ -728,8 +768,7 @@ void CodeExtractor::severSplitPHINodesOfExits( NewBB = BasicBlock::Create(ExitBB->getContext(), ExitBB->getName() + ".split", ExitBB->getParent(), ExitBB); - SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB), - pred_end(ExitBB)); + SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB)); for (BasicBlock *PredBB : Preds) if (Blocks.count(PredBB)) PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB); @@ -895,6 +934,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::WriteOnly: case Attribute::ZExt: case Attribute::ImmArg: + case Attribute::ByRef: case Attribute::EndAttrKinds: case Attribute::EmptyKey: case Attribute::TombstoneKey: @@ -902,9 +942,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: case Attribute::Cold: + case Attribute::Hot: case Attribute::NoRecurse: case Attribute::InlineHint: case Attribute::MinSize: + case Attribute::NoCallback: case Attribute::NoDuplicate: case Attribute::NoFree: case Attribute::NoImplicitFloat: @@ -930,6 +972,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StrictFP: case Attribute::UWTable: case Attribute::NoCfCheck: + case Attribute::MustProgress: + case Attribute::NoProfile: break; } @@ -1434,7 +1478,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, // function arguments, as the parameters don't correspond to anything at the // source level. assert(OldSP->getUnit() && "Missing compile unit for subprogram"); - DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolvedNodes=*/false, + DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, OldSP->getUnit()); auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | @@ -1505,7 +1549,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, // function. for (Instruction &I : instructions(NewFunc)) { if (const DebugLoc &DL = I.getDebugLoc()) - I.setDebugLoc(DebugLoc::get(DL.getLine(), DL.getCol(), NewSP)); + I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP)); // Loop info metadata may contain line locations. Fix them up. auto updateLoopInfoLoc = [&Ctx, @@ -1516,7 +1560,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); } if (!TheCall.getDebugLoc()) - TheCall.setDebugLoc(DebugLoc::get(0, 0, OldSP)); + TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP)); eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); } @@ -1739,7 +1783,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, const Function &NewFunc, AssumptionCache *AC) { for (auto AssumeVH : AC->assumptions()) { - CallInst *I = dyn_cast_or_null<CallInst>(AssumeVH); + auto *I = dyn_cast_or_null<CallInst>(AssumeVH); if (!I) continue; @@ -1751,12 +1795,12 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc, // that were previously in the old function, but that have now been moved // to the new function. for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) { - CallInst *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH); + auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH); if (!AffectedCI) continue; if (AffectedCI->getFunction() != &OldFunc) return true; - auto *AssumedInst = dyn_cast<Instruction>(AffectedCI->getOperand(0)); + auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0)); if (AssumedInst->getFunction() != &OldFunc) return true; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp index 08047dc0f96e..ce982c7403aa 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -355,35 +355,32 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, // Check if there exists instructions which may throw, may synchonize, or may // never return, from I to InsertPoint. if (!isSafeToSpeculativelyExecute(&I)) - if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(), - [](Instruction *I) { - if (I->mayThrow()) - return true; - - const CallBase *CB = dyn_cast<CallBase>(I); - if (!CB) - return false; - if (!CB->hasFnAttr(Attribute::WillReturn)) - return true; - if (!CB->hasFnAttr(Attribute::NoSync)) - return true; - - return false; - })) { + if (llvm::any_of(InstsToCheck, [](Instruction *I) { + if (I->mayThrow()) + return true; + + const CallBase *CB = dyn_cast<CallBase>(I); + if (!CB) + return false; + if (!CB->hasFnAttr(Attribute::WillReturn)) + return true; + if (!CB->hasFnAttr(Attribute::NoSync)) + return true; + + return false; + })) { return reportInvalidCandidate(I, MayThrowException); } // Check if I has any output/flow/anti dependences with instructions from \p // StartInst to \p EndInst. - if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(), - [&DI, &I](Instruction *CurInst) { - auto DepResult = DI->depends(&I, CurInst, true); - if (DepResult && - (DepResult->isOutput() || DepResult->isFlow() || - DepResult->isAnti())) - return true; - return false; - })) + if (llvm::any_of(InstsToCheck, [&DI, &I](Instruction *CurInst) { + auto DepResult = DI->depends(&I, CurInst, true); + if (DepResult && (DepResult->isOutput() || DepResult->isFlow() || + DepResult->isAnti())) + return true; + return false; + })) return reportInvalidCandidate(I, HasDependences); return true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp index 8f98d81a3d79..3e4d53c10dc9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -198,6 +199,18 @@ bool llvm::applyDebugifyMetadata( return true; } +static bool applyDebugify(Function &F) { + Module &M = *F.getParent(); + auto FuncIt = F.getIterator(); + return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), + "FunctionDebugify: ", /*ApplyToMF=*/nullptr); +} + +static bool applyDebugify(Module &M) { + return applyDebugifyMetadata(M, M.functions(), + "ModuleDebugify: ", /*ApplyToMF=*/nullptr); +} + bool llvm::stripDebugifyMetadata(Module &M) { bool Changed = false; @@ -226,9 +239,7 @@ bool llvm::stripDebugifyMetadata(Module &M) { NamedMDNode *NMD = M.getModuleFlagsMetadata(); if (!NMD) return Changed; - SmallVector<MDNode *, 4> Flags; - for (MDNode *Flag : NMD->operands()) - Flags.push_back(Flag); + SmallVector<MDNode *, 4> Flags(NMD->operands()); NMD->clearOperands(); for (MDNode *Flag : Flags) { MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1)); @@ -383,10 +394,7 @@ bool checkDebugifyMetadata(Module &M, /// ModulePass for attaching synthetic debug info to everything, used with the /// legacy module pass manager. struct DebugifyModulePass : public ModulePass { - bool runOnModule(Module &M) override { - return applyDebugifyMetadata(M, M.functions(), - "ModuleDebugify: ", /*ApplyToMF*/ nullptr); - } + bool runOnModule(Module &M) override { return applyDebugify(M); } DebugifyModulePass() : ModulePass(ID) {} @@ -400,12 +408,7 @@ struct DebugifyModulePass : public ModulePass { /// FunctionPass for attaching synthetic debug info to instructions within a /// single function, used with the legacy module pass manager. struct DebugifyFunctionPass : public FunctionPass { - bool runOnFunction(Function &F) override { - Module &M = *F.getParent(); - auto FuncIt = F.getIterator(); - return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)), - "FunctionDebugify: ", /*ApplyToMF*/ nullptr); - } + bool runOnFunction(Function &F) override { return applyDebugify(F); } DebugifyFunctionPass() : FunctionPass(ID) {} @@ -472,9 +475,32 @@ private: } // end anonymous namespace -ModulePass *createDebugifyModulePass() { return new DebugifyModulePass(); } +void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) { + std::error_code EC; + raw_fd_ostream OS{Path, EC}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " << Path << '\n'; + return; + } + + OS << "Pass Name" << ',' << "# of missing debug values" << ',' + << "# of missing locations" << ',' << "Missing/Expected value ratio" << ',' + << "Missing/Expected location ratio" << '\n'; + for (const auto &Entry : Map) { + StringRef Pass = Entry.first; + DebugifyStatistics Stats = Entry.second; + + OS << Pass << ',' << Stats.NumDbgValuesMissing << ',' + << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ',' + << Stats.getEmptyLocationRatio() << '\n'; + } +} + +ModulePass *llvm::createDebugifyModulePass() { + return new DebugifyModulePass(); +} -FunctionPass *createDebugifyFunctionPass() { +FunctionPass *llvm::createDebugifyFunctionPass() { return new DebugifyFunctionPass(); } @@ -484,15 +510,15 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); } -ModulePass *createCheckDebugifyModulePass(bool Strip, - StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { +ModulePass *llvm::createCheckDebugifyModulePass(bool Strip, + StringRef NameOfWrappedPass, + DebugifyStatsMap *StatsMap) { return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap); } -FunctionPass *createCheckDebugifyFunctionPass(bool Strip, - StringRef NameOfWrappedPass, - DebugifyStatsMap *StatsMap) { +FunctionPass * +llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass, + DebugifyStatsMap *StatsMap) { return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap); } @@ -503,6 +529,41 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M, return PreservedAnalyses::all(); } +static bool isIgnoredPass(StringRef PassID) { + return isSpecialPass(PassID, {"PassManager", "PassAdaptor", + "AnalysisManagerProxy", "PrintFunctionPass", + "PrintModulePass", "BitcodeWriterPass", + "ThinLTOBitcodeWriterPass", "VerifierPass"}); +} + +void DebugifyEachInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) { + if (isIgnoredPass(P)) + return; + if (any_isa<const Function *>(IR)) + applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR))); + else if (any_isa<const Module *>(IR)) + applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR))); + }); + PIC.registerAfterPassCallback([this](StringRef P, Any IR, + const PreservedAnalyses &PassPA) { + if (isIgnoredPass(P)) + return; + if (any_isa<const Function *>(IR)) { + auto &F = *const_cast<Function *>(any_cast<const Function *>(IR)); + Module &M = *F.getParent(); + auto It = F.getIterator(); + checkDebugifyMetadata(M, make_range(It, std::next(It)), P, + "CheckFunctionDebugify", /*Strip=*/true, &StatsMap); + } else if (any_isa<const Module *>(IR)) { + auto &M = *const_cast<Module *>(any_cast<const Module *>(IR)); + checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify", + /*Strip=*/true, &StatsMap); + } + }); +} + char DebugifyModulePass::ID = 0; static RegisterPass<DebugifyModulePass> DM("debugify", "Attach debug info to everything"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index f84ff9e5aad1..26f8e21952cc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -83,7 +83,7 @@ static bool runOnFunction(Function &F, bool PostInlining) { if (!EntryFunc.empty()) { DebugLoc DL; if (auto SP = F.getSubprogram()) - DL = DebugLoc::get(SP->getScopeLine(), 0, SP); + DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP); insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); Changed = true; @@ -97,19 +97,14 @@ static bool runOnFunction(Function &F, bool PostInlining) { continue; // If T is preceded by a musttail call, that's the real terminator. - Instruction *Prev = T->getPrevNode(); - if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev)) - Prev = BCI->getPrevNode(); - if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) { - if (CI->isMustTailCall()) - T = CI; - } + if (CallInst *CI = BB.getTerminatingMustTailCall()) + T = CI; DebugLoc DL; if (DebugLoc TerminatorDL = T->getDebugLoc()) DL = TerminatorDL; else if (auto SP = F.getSubprogram()) - DL = DebugLoc::get(0, 0, SP); + DL = DILocation::get(SP->getContext(), 0, 0, SP); insertCall(F, ExitFunc, T, DL); Changed = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index cae9d9ee6d70..accedd5b4ee0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -41,6 +41,8 @@ IRBuilder<> *EscapeEnumerator::Next() { if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) continue; + if (CallInst *CI = CurBB->getTerminatingMustTailCall()) + TI = CI; Builder.SetInsertPoint(TI); return &Builder; } @@ -54,11 +56,12 @@ IRBuilder<> *EscapeEnumerator::Next() { return nullptr; // Find all 'call' instructions that may throw. + // We cannot tranform calls with musttail tag. SmallVector<Instruction *, 16> Calls; for (BasicBlock &BB : F) for (Instruction &II : BB) if (CallInst *CI = dyn_cast<CallInst>(&II)) - if (!CI->doesNotThrow()) + if (!CI->doesNotThrow() && !CI->isMustTailCall()) Calls.push_back(CI); if (Calls.empty()) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp index c5dfbf9d92d1..732b00635e29 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -183,11 +183,11 @@ evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, std::function<Constant *(Constant *)> Func) { Constant *Val; while (!(Val = Func(Ptr))) { - // If Ty is a struct, we can convert the pointer to the struct + // If Ty is a non-opaque struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. Type *Ty = cast<PointerType>(Ptr->getType())->getElementType(); - if (!isa<StructType>(Ty)) + if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isOpaque()) break; IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); @@ -210,11 +210,7 @@ static Constant *getInitializer(Constant *C) { Constant *Evaluator::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. - auto findMemLoc = [this](Constant *Ptr) { - DenseMap<Constant *, Constant *>::const_iterator I = - MutatedMemory.find(Ptr); - return I != MutatedMemory.end() ? I->second : nullptr; - }; + auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); }; if (Constant *Val = findMemLoc(P)) return Val; @@ -551,6 +547,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::pseudoprobe) { + LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); + ++CurInst; + continue; } LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 460ba9e97fc6..44af95eef67d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -66,6 +66,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/InitializePasses.h" @@ -104,7 +105,7 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible", "Convert irreducible control-flow into natural loops", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitch) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible", @@ -304,11 +305,9 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) { return Changed; } -bool FixIrreducible::runOnFunction(Function &F) { +static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " << F.getName() << "\n"); - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); bool Changed = false; SmallVector<Loop *, 8> WorkList; @@ -318,13 +317,10 @@ bool FixIrreducible::runOnFunction(Function &F) { // Any SCCs reduced are now already in the list of top-level loops, so simply // add them all to the worklist. - for (auto L : LI) { - WorkList.push_back(L); - } + append_range(WorkList, LI); while (!WorkList.empty()) { - auto L = WorkList.back(); - WorkList.pop_back(); + auto L = WorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "visiting loop with header " << L->getHeader()->getName() << "\n"); Changed |= makeReducible(LI, DT, *L); @@ -335,3 +331,21 @@ bool FixIrreducible::runOnFunction(Function &F) { return Changed; } + +bool FixIrreducible::runOnFunction(Function &F) { + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + return FixIrreducibleImpl(F, LI, DT); +} + +PreservedAnalyses FixIrreduciblePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + if (!FixIrreducibleImpl(F, LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 101cb232d8ae..2696557a719f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -124,12 +124,17 @@ int FunctionComparator::cmpAttrs(const AttributeList L, Type *TyL = LA.getValueAsType(); Type *TyR = RA.getValueAsType(); - if (TyL && TyR) - return cmpTypes(TyL, TyR); + if (TyL && TyR) { + if (int Res = cmpTypes(TyL, TyR)) + return Res; + continue; + } // Two pointers, at least one null, so the comparison result is // independent of the value of a real pointer. - return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); + if (int Res = cmpNumbers((uint64_t)TyL, (uint64_t)TyR)) + return Res; + continue; } if (LA < RA) return -1; @@ -286,6 +291,7 @@ int FunctionComparator::cmpConstants(const Constant *L, switch (L->getValueID()) { case Value::UndefValueVal: + case Value::PoisonValueVal: case Value::ConstantTokenNoneVal: return TypesRes; case Value::ConstantIntVal: { @@ -488,12 +494,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::ScalableVectorTyID: { auto *STyL = cast<VectorType>(TyL); auto *STyR = cast<VectorType>(TyR); - if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable) - return cmpNumbers(STyL->getElementCount().Scalable, - STyR->getElementCount().Scalable); - if (STyL->getElementCount().Min != STyR->getElementCount().Min) - return cmpNumbers(STyL->getElementCount().Min, - STyR->getElementCount().Min); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return cmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return cmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); return cmpTypes(STyL->getElementType(), STyR->getElementType()); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp index fe58f0e0fe40..f782396be7b6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -136,7 +136,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::Stored; } } - } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) || + isa<AddrSpaceCastInst>(I)) { // Skip over bitcasts and GEPs; we don't care about the type or offset // of the pointer. if (analyzeGlobalAux(I, GS, VisitedUsers)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp index 4cfc9358499a..4dbcbf80d3da 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp @@ -30,7 +30,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight( void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard, bool UseWC) { OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt)); - SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end()); + SmallVector<Value *, 4> Args(drop_begin(Guard->args())); auto *CheckBB = Guard->getParent(); auto *DeoptBlockTerm = diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp deleted file mode 100644 index ea93f99d69e3..000000000000 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ /dev/null @@ -1,202 +0,0 @@ -//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Generating inliner statistics for imported functions, mostly useful for -// ThinLTO. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <iomanip> -#include <sstream> -using namespace llvm; - -ImportedFunctionsInliningStatistics::InlineGraphNode & -ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { - - auto &ValueLookup = NodesMap[F.getName()]; - if (!ValueLookup) { - ValueLookup = std::make_unique<InlineGraphNode>(); - ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); - } - return *ValueLookup; -} - -void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, - const Function &Callee) { - - InlineGraphNode &CallerNode = createInlineGraphNode(Caller); - InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); - CalleeNode.NumberOfInlines++; - - if (!CallerNode.Imported && !CalleeNode.Imported) { - // Direct inline from not imported callee to not imported caller, so we - // don't have to add this to graph. It might be very helpful if you wanna - // get the inliner statistics in compile step where there are no imported - // functions. In this case the graph would be empty. - CalleeNode.NumberOfRealInlines++; - return; - } - - CallerNode.InlinedCallees.push_back(&CalleeNode); - if (!CallerNode.Imported) { - // We could avoid second lookup, but it would make the code ultra ugly. - auto It = NodesMap.find(Caller.getName()); - assert(It != NodesMap.end() && "The node should be already there."); - // Save Caller as a starting node for traversal. The string has to be one - // from map because Caller can disappear (and function name with it). - NonImportedCallers.push_back(It->first()); - } -} - -void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { - ModuleName = M.getName(); - for (const auto &F : M.functions()) { - if (F.isDeclaration()) - continue; - AllFunctions++; - ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); - } -} -static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, - const char *PercentageOfMsg, - bool LineEnd = true) { - double Result = 0; - if (All != 0) - Result = 100 * static_cast<double>(Fraction) / All; - - std::stringstream Str; - Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result - << "% of " << PercentageOfMsg << "]"; - if (LineEnd) - Str << "\n"; - return Str.str(); -} - -void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { - calculateRealInlines(); - NonImportedCallers.clear(); - - int32_t InlinedImportedFunctionsCount = 0; - int32_t InlinedNotImportedFunctionsCount = 0; - - int32_t InlinedImportedFunctionsToImportingModuleCount = 0; - int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; - - const auto SortedNodes = getSortedNodes(); - std::string Out; - Out.reserve(5000); - raw_string_ostream Ostream(Out); - - Ostream << "------- Dumping inliner stats for [" << ModuleName - << "] -------\n"; - - if (Verbose) - Ostream << "-- List of inlined functions:\n"; - - for (const auto &Node : SortedNodes) { - assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines); - if (Node->second->NumberOfInlines == 0) - continue; - - if (Node->second->Imported) { - InlinedImportedFunctionsCount++; - InlinedImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } else { - InlinedNotImportedFunctionsCount++; - InlinedNotImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } - - if (Verbose) - Ostream << "Inlined " - << (Node->second->Imported ? "imported " : "not imported ") - << "function [" << Node->first() << "]" - << ": #inlines = " << Node->second->NumberOfInlines - << ", #inlines_to_importing_module = " - << Node->second->NumberOfRealInlines << "\n"; - } - - auto InlinedFunctionsCount = - InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; - auto NotImportedFuncCount = AllFunctions - ImportedFunctions; - auto ImportedNotInlinedIntoModule = - ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; - - Ostream << "-- Summary:\n" - << "All functions: " << AllFunctions - << ", imported functions: " << ImportedFunctions << "\n" - << getStatString("inlined functions", InlinedFunctionsCount, - AllFunctions, "all functions") - << getStatString("imported functions inlined anywhere", - InlinedImportedFunctionsCount, ImportedFunctions, - "imported functions") - << getStatString("imported functions inlined into importing module", - InlinedImportedFunctionsToImportingModuleCount, - ImportedFunctions, "imported functions", - /*LineEnd=*/false) - << getStatString(", remaining", ImportedNotInlinedIntoModule, - ImportedFunctions, "imported functions") - << getStatString("non-imported functions inlined anywhere", - InlinedNotImportedFunctionsCount, - NotImportedFuncCount, "non-imported functions") - << getStatString( - "non-imported functions inlined into importing module", - InlinedNotImportedFunctionsToImportingModuleCount, - NotImportedFuncCount, "non-imported functions"); - Ostream.flush(); - dbgs() << Out; -} - -void ImportedFunctionsInliningStatistics::calculateRealInlines() { - // Removing duplicated Callers. - llvm::sort(NonImportedCallers); - NonImportedCallers.erase( - std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), - NonImportedCallers.end()); - - for (const auto &Name : NonImportedCallers) { - auto &Node = *NodesMap[Name]; - if (!Node.Visited) - dfs(Node); - } -} - -void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { - assert(!GraphNode.Visited); - GraphNode.Visited = true; - for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { - InlinedFunctionNode->NumberOfRealInlines++; - if (!InlinedFunctionNode->Visited) - dfs(*InlinedFunctionNode); - } -} - -ImportedFunctionsInliningStatistics::SortedNodesTy -ImportedFunctionsInliningStatistics::getSortedNodes() { - SortedNodesTy SortedNodes; - SortedNodes.reserve(NodesMap.size()); - for (const NodesMapTy::value_type& Node : NodesMap) - SortedNodes.push_back(&Node); - - llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs, - const SortedNodesTy::value_type &Rhs) { - if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) - return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; - if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) - return Lhs->second->NumberOfRealInlines > - Rhs->second->NumberOfRealInlines; - return Lhs->first() < Rhs->first(); - }); - return SortedNodes; -} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 9d8f59d62d6d..a2b72e4e7f03 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" @@ -77,7 +78,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { if (CI.isNoBuiltin() || !CI.getCalledFunction()) return; - const std::string ScalarName = std::string(CI.getCalledFunction()->getName()); + StringRef ScalarName = CI.getCalledFunction()->getName(); + // Nothing to be done if the TLI thinks the function is not // vectorizable. if (!TLI.isFunctionVectorizable(ScalarName)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index b0b7ca484798..0ac8fa537f4e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -79,6 +79,12 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt<bool> + UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Use the llvm.experimental.noalias.scope.decl " + "intrinsic during inlining.")); + // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. @@ -771,146 +777,158 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, UnwindDest->removePredecessor(InvokeBB); } -/// When inlining a call site that has !llvm.mem.parallel_loop_access or -/// llvm.access.group metadata, that metadata should be propagated to all -/// memory-accessing cloned instructions. -static void PropagateParallelLoopAccessMetadata(CallBase &CB, - ValueToValueMapTy &VMap) { - MDNode *M = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); - MDNode *CallAccessGroup = CB.getMetadata(LLVMContext::MD_access_group); - if (!M && !CallAccessGroup) +/// When inlining a call site that has !llvm.mem.parallel_loop_access, +/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should +/// be propagated to all memory-accessing cloned instructions. +static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { + MDNode *MemParallelLoopAccess = + CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); + MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); + MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); + MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); + if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { - if (!VMI->second) + // Check that key is an instruction, to skip the Argument mapping, which + // points to an instruction in the original function, not the inlined one. + if (!VMI->second || !isa<Instruction>(VMI->first)) continue; Instruction *NI = dyn_cast<Instruction>(VMI->second); if (!NI) continue; - if (M) { - if (MDNode *PM = - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { - M = MDNode::concatenate(PM, M); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); - } else if (NI->mayReadOrWriteMemory()) { - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); - } - } + // This metadata is only relevant for instructions that access memory. + if (!NI->mayReadOrWriteMemory()) + continue; - if (NI->mayReadOrWriteMemory()) { - MDNode *UnitedAccGroups = uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); - NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, + MemParallelLoopAccess); } + + if (AccessGroup) + NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + NI->getMetadata(LLVMContext::MD_access_group), AccessGroup)); + + if (AliasScope) + NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + + if (NoAlias) + NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_noalias), NoAlias)); } } -/// When inlining a function that contains noalias scope metadata, -/// this metadata needs to be cloned so that the inlined blocks -/// have different "unique scopes" at every call site. Were this not done, then -/// aliasing scopes from a function inlined into a caller multiple times could -/// not be differentiated (and this would lead to miscompiles because the -/// non-aliasing property communicated by the metadata could have -/// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { - const Function *CalledFunc = CB.getCalledFunction(); +/// Utility for cloning !noalias and !alias.scope metadata. When a code region +/// using scoped alias metadata is inlined, the aliasing relationships may not +/// hold between the two version. It is necessary to create a deep clone of the +/// metadata, putting the two versions in separate scope domains. +class ScopedAliasMetadataDeepCloner { + using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; SetVector<const MDNode *> MD; - - // Note: We could only clone the metadata if it is already used in the - // caller. I'm omitting that check here because it might confuse - // inter-procedural alias analysis passes. We can revisit this if it becomes - // an efficiency or overhead problem. - - for (const BasicBlock &I : *CalledFunc) - for (const Instruction &J : I) { - if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) + MetadataMap MDMap; + void addRecursiveMetadataUses(); + +public: + ScopedAliasMetadataDeepCloner(const Function *F); + + /// Create a new clone of the scoped alias metadata, which will be used by + /// subsequent remap() calls. + void clone(); + + /// Remap instructions in the given VMap from the original to the cloned + /// metadata. + void remap(ValueToValueMapTy &VMap); +}; + +ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( + const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); - if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); - } - if (MD.empty()) - return; + // We also need to clone the metadata in noalias intrinsics. + if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + MD.insert(Decl->getScopeList()); + } + } + addRecursiveMetadataUses(); +} - // Walk the existing metadata, adding the complete (perhaps cyclic) chain to - // the set. +void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); while (!Queue.empty()) { const MDNode *M = cast<MDNode>(Queue.pop_back_val()); - for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) - if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) - if (MD.insert(M1)) - Queue.push_back(M1); + for (const Metadata *Op : M->operands()) + if (const MDNode *OpMD = dyn_cast<MDNode>(Op)) + if (MD.insert(OpMD)) + Queue.push_back(OpMD); } +} + +void ScopedAliasMetadataDeepCloner::clone() { + assert(MDMap.empty() && "clone() already called ?"); - // Now we have a complete set of all metadata in the chains used to specify - // the noalias scopes and the lists of those scopes. SmallVector<TempMDTuple, 16> DummyNodes; - DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old // metadata references with either a dummy node or an already-created new // node. + SmallVector<Metadata *, 4> NewOps; for (const MDNode *I : MD) { - SmallVector<Metadata *, 4> NewOps; - for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { - const Metadata *V = I->getOperand(i); - if (const MDNode *M = dyn_cast<MDNode>(V)) + for (const Metadata *Op : I->operands()) { + if (const MDNode *M = dyn_cast<MDNode>(Op)) NewOps.push_back(MDMap[M]); else - NewOps.push_back(const_cast<Metadata *>(V)); + NewOps.push_back(const_cast<Metadata *>(Op)); } - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); + MDNode *NewM = MDNode::get(I->getContext(), NewOps); MDTuple *TempM = cast<MDTuple>(MDMap[I]); assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); + NewOps.clear(); } +} - // Now replace the metadata in the new inlined instructions with the - // repacements from the map. - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - if (!VMI->second) +void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { + if (MDMap.empty()) + return; // Nothing to do. + + for (auto Entry : VMap) { + // Check that key is an instruction, to skip the Argument mapping, which + // points to an instruction in the original function, not the inlined one. + if (!Entry->second || !isa<Instruction>(Entry->first)) continue; - Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) + Instruction *I = dyn_cast<Instruction>(Entry->second); + if (!I) continue; - if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had alias scope metadata (a list of scopes to - // which instructions inside it might belong), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); - } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope)) - NI->setMetadata(LLVMContext::MD_alias_scope, M); - } + if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) + I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]); - if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had noalias metadata (a list of scopes with - // which instructions inside it don't alias), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_noalias, NewMD); - } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias)) - NI->setMetadata(LLVMContext::MD_noalias, M); - } + if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) + I->setMetadata(LLVMContext::MD_noalias, MDMap[M]); + + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) + Decl->setScopeList(MDMap[Decl->getScopeList()]); } } @@ -967,6 +985,17 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, // property of the callee, but also all control dependencies in the caller. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); NewScopes.insert(std::make_pair(A, NewScope)); + + if (UseNoAliasIntrinsic) { + // Introduce a llvm.experimental.noalias.scope.decl for the noalias + // argument. + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); + // Ignore the result for now. The result will be used when the + // llvm.noalias intrinsic is introduced. + (void)NoAliasDecl; + } } // Iterate over all new instructions in the map; for all memory-access @@ -1037,7 +1066,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, SmallSetVector<const Argument *, 4> NAPtrArgs; for (const Value *V : PtrArgs) { SmallVector<const Value *, 4> Objects; - GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); + getUnderlyingObjects(V, Objects, /* LI = */ nullptr); for (const Value *O : Objects) ObjSet.insert(O); @@ -1245,7 +1274,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) { + if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; @@ -1448,8 +1477,8 @@ static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap<const MDNode *, MDNode *> &IANodes) { auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); - return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), - IA); + return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), + OrigDL.getScope(), IA); } /// Update inlined instructions' line numbers to @@ -1573,8 +1602,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; int64_t CallCount = - std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, - CalleeEntryCount.getCount()); + std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount()); updateProfileCallee(Callee, -CallCount, &VMap); } @@ -1765,6 +1793,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector<std::pair<Value*, Value*>, 4> ByValInit; + // When inlining a function that contains noalias scope metadata, + // this metadata needs to be cloned so that the inlined blocks + // have different "unique scopes" at every call site. + // Track the metadata that must be cloned. Do this before other changes to + // the function, so that we do not get in trouble when inlining caller == + // callee. + ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); + auto &DL = Caller->getParent()->getDataLayout(); // Calculate the vector of arguments to pass into the function cloner, which @@ -1855,11 +1891,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + ChildOB.Inputs.size()); - MergedDeoptArgs.insert(MergedDeoptArgs.end(), - ParentDeopt->Inputs.begin(), - ParentDeopt->Inputs.end()); - MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), - ChildOB.Inputs.end()); + llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); + llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); } @@ -1885,8 +1918,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, fixupLineNumbers(Caller, FirstNewBlock, &CB, CalledFunc->getSubprogram() != nullptr); - // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CB, VMap); + // Now clone the inlined noalias scope metadata. + SAMetadataCloner.clone(); + SAMetadataCloner.remap(VMap); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1895,8 +1929,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // function which feed into its return value. AddReturnAttributes(CB, VMap); - // Propagate llvm.mem.parallel_loop_access if necessary. - PropagateParallelLoopAccessMetadata(CB, VMap); + // Propagate metadata on the callsite if necessary. + PropagateCallSiteMetadata(CB, VMap); // Register any cloned assumptions. if (IFI.GetAssumptionCache) @@ -2061,7 +2095,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, dyn_cast<ConstantInt>(AI->getArraySize())) { auto &DL = Caller->getParent()->getDataLayout(); Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); // Don't add markers for zero-sized allocas. @@ -2070,9 +2104,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && + if (!AllocaTypeSize.isScalable() && + AllocaArraySize != std::numeric_limits<uint64_t>::max() && std::numeric_limits<uint64_t>::max() / AllocaArraySize >= - AllocaTypeSize) { + AllocaTypeSize.getFixedSize()) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } @@ -2198,10 +2233,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // match the callee's return type, we also need to change the return type of // the intrinsic. if (Caller->getReturnType() == CB.getType()) { - auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { + llvm::erase_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); - Returns.erase(NewEnd, Returns.end()); } else { SmallVector<ReturnInst *, 8> NormalReturns; Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( @@ -2225,8 +2259,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, auto *CurBB = RI->getParent(); RI->eraseFromParent(); - SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), - DeoptCall->arg_end()); + SmallVector<Value *, 4> CallArgs(DeoptCall->args()); SmallVector<OperandBundleDef, 1> OpBundles; DeoptCall->getOperandBundlesAsDefs(OpBundles); @@ -2463,7 +2496,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If we inlined any musttail calls and the original return is now // unreachable, delete it. It can only contain a bitcast and ret. - if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) + if (InlinedMustTailCalls && pred_empty(AfterCallBB)) AfterCallBB->eraseFromParent(); // We should always be able to fold the entry block of the function into the diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 8e339fe46d45..f3499c9c8aed 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -13,43 +13,52 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/InstructionNamer.h" #include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" + using namespace llvm; namespace { - struct InstNamer : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(ID) { - initializeInstNamerPass(*PassRegistry::getPassRegistry()); - } +void nameInstructions(Function &F) { + for (auto &Arg : F.args()) { + if (!Arg.hasName()) + Arg.setName("arg"); + } - void getAnalysisUsage(AnalysisUsage &Info) const override { - Info.setPreservesAll(); + for (BasicBlock &BB : F) { + if (!BB.hasName()) + BB.setName("bb"); + + for (Instruction &I : BB) { + if (!I.hasName() && !I.getType()->isVoidTy()) + I.setName("i"); } + } +} - bool runOnFunction(Function &F) override { - for (auto &Arg : F.args()) - if (!Arg.hasName()) - Arg.setName("arg"); +struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } - for (BasicBlock &BB : F) { - if (!BB.hasName()) - BB.setName("bb"); + void getAnalysisUsage(AnalysisUsage &Info) const override { + Info.setPreservesAll(); + } - for (Instruction &I : BB) - if (!I.hasName() && !I.getType()->isVoidTy()) - I.setName("i"); - } - return true; - } - }; + bool runOnFunction(Function &F) override { + nameInstructions(F); + return true; + } +}; char InstNamer::ID = 0; -} + } // namespace INITIALIZE_PASS(InstNamer, "instnamer", "Assign names to anonymous instructions", false, false) @@ -61,3 +70,9 @@ char &llvm::InstructionNamerID = InstNamer::ID; FunctionPass *llvm::createInstructionNamerPass() { return new InstNamer(); } + +PreservedAnalyses InstructionNamerPass::run(Function &F, + FunctionAnalysisManager &FAM) { + nameInstructions(F); + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp index b1a1c564d217..7437701f5339 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PredIteratorCache.h" @@ -77,12 +78,15 @@ static bool isExitBlock(BasicBlock *BB, /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE) { + ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl<PHINode *> *PHIsToRemove) { SmallVector<Use *, 16> UsesToRewrite; - SmallSetVector<PHINode *, 16> PHIsToRemove; + SmallSetVector<PHINode *, 16> LocalPHIsToRemove; PredIteratorCache PredCache; bool Changed = false; + IRBuilderBase::InsertPointGuard InsertPtGuard(Builder); + // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. @@ -107,6 +111,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); @@ -151,12 +159,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - - PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), - I->getName() + ".lcssa", &ExitBB->front()); + Builder.SetInsertPoint(&ExitBB->front()); + PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa"); // Get the debug location from the original instruction. PN->setDebugLoc(I->getDebugLoc()); - // Add inputs from inside the loop for this PHI. + + // Add inputs from inside the loop for this PHI. This is valid + // because `I` dominates `ExitBB` (checked above). This implies + // that every incoming block/edge is dominated by `I` as well, + // i.e. we can add uses of `I` to those incoming edges/append to the incoming + // blocks without violating the SSA dominance property. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); @@ -190,15 +203,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (Use *UseToRewrite : UsesToRewrite) { - // If this use is in an exit block, rewrite to use the newly inserted PHI. - // This is required for correctness because SSAUpdate doesn't handle uses - // in the same block. It assumes the PHI we inserted is at the end of the - // block. Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); + + // For practical purposes, we consider that the use in a PHI + // occurs in the respective predecessor block. For more info, + // see the `phi` doc in LangRef and the LCSSA doc. if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses + // in the same block. It assumes the PHI we inserted is at the end of the + // block. if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { UseToRewrite->set(&UserBB->front()); continue; @@ -248,27 +265,29 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have - // any uses rewritten. If the new PHI is used, store it so that we can - // try to propagate dbg.value intrinsics to it. - SmallVector<PHINode *, 2> NeedDbgValues; + // any uses rewritten. for (PHINode *PN : AddedPHIs) if (PN->use_empty()) - PHIsToRemove.insert(PN); - else - NeedDbgValues.push_back(PN); - insertDebugValuesForPHIs(InstBB, NeedDbgValues); + LocalPHIsToRemove.insert(PN); + Changed = true; } - // Remove PHI nodes that did not have any uses rewritten. We need to redo the - // use_empty() check here, because even if the PHI node wasn't used when added - // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is - // not guaranteed to handle trees/cycles of PHI nodes that only are used by - // each other. Such situations has only been noticed when the input IR - // contains unreachable code, and leaving some extra redundant PHI nodes in - // such situations is considered a minor problem. - for (PHINode *PN : PHIsToRemove) - if (PN->use_empty()) - PN->eraseFromParent(); + + // Remove PHI nodes that did not have any uses rewritten or add them to + // PHIsToRemove, so the caller can remove them after some additional cleanup. + // We need to redo the use_empty() check here, because even if the PHI node + // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be + // using it. This cleanup is not guaranteed to handle trees/cycles of PHI + // nodes that only are used by each other. Such situations has only been + // noticed when the input IR contains unreachable code, and leaving some extra + // redundant PHI nodes in such situations is considered a minor problem. + if (PHIsToRemove) { + PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end()); + } else { + for (PHINode *PN : LocalPHIsToRemove) + if (PN->use_empty()) + PN->eraseFromParent(); + } return Changed; } @@ -276,12 +295,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, static void computeBlocksDominatingExits( Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { - SmallVector<BasicBlock *, 8> BBWorklist; - // We start from the exit blocks, as every block trivially dominates itself // (not strictly). - for (BasicBlock *BB : ExitBlocks) - BBWorklist.push_back(BB); + SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks); while (!BBWorklist.empty()) { BasicBlock *BB = BBWorklist.pop_back_val(); @@ -369,7 +385,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, Worklist.push_back(&I); } } - Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE); + + IRBuilder<> Builder(L.getHeader()->getContext()); + Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder); // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index da40c342af3a..477ea458c763 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -91,6 +91,24 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "local" STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); +STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); + +static cl::opt<bool> PHICSEDebugHash( + "phicse-debug-hash", +#ifdef EXPENSIVE_CHECKS + cl::init(true), +#else + cl::init(false), +#endif + cl::Hidden, + cl::desc("Perform extra assertion checking to verify that PHINodes's hash " + "function is well-behaved w.r.t. its isEqual predicate")); + +static cl::opt<unsigned> PHICSENumPHISmallSize( + "phicse-num-phi-smallsize", cl::init(32), cl::Hidden, + cl::desc( + "When the basic block contains not more than this number of PHI nodes, " + "perform a (faster!) exhaustive search instead of set-driven one.")); // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms @@ -116,27 +134,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Branch - See if we are conditional jumping on constant if (auto *BI = dyn_cast<BranchInst>(T)) { if (BI->isUnconditional()) return false; // Can't optimize uncond branch + BasicBlock *Dest1 = BI->getSuccessor(0); BasicBlock *Dest2 = BI->getSuccessor(1); - if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { - // Are we branching on constant? - // YES. Change to unconditional branch... - BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; - BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; - - // Let the basic block know that we are letting go of it. Based on this, - // it will adjust it's PHI nodes. - OldDest->removePredecessor(BB); - - // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Destination); - BI->eraseFromParent(); - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}}); - return true; - } - if (Dest2 == Dest1) { // Conditional branch to same location? // This branch matches something like this: // br bool %cond, label %Dest, label %Dest @@ -154,6 +155,25 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } + + if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + OldDest->removePredecessor(BB); + + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}}); + return true; + } + return false; } @@ -170,6 +190,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, TheOnlyDest = SI->case_begin()->getCaseSuccessor(); } + bool Changed = false; + // Figure out which case it goes to. for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { // Found case matching a constant operand? @@ -208,9 +230,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); - if (DTU) - DTU->applyUpdatesPermissive( - {{DominatorTree::Delete, ParentBB, DefaultDest}}); + Changed = true; continue; } @@ -236,19 +256,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Insert the new branch. Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); - std::vector <DominatorTree::UpdateType> Updates; - if (DTU) - Updates.reserve(SI->getNumSuccessors() - 1); + + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; // Remove entries from PHI nodes which we no longer branch to... + BasicBlock *SuccToKeep = TheOnlyDest; for (BasicBlock *Succ : successors(SI)) { + if (DTU && Succ != TheOnlyDest) + RemovedSuccessors.insert(Succ); // Found case matching a constant operand? - if (Succ == TheOnlyDest) { - TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest + if (Succ == SuccToKeep) { + SuccToKeep = nullptr; // Don't modify the first branch to TheOnlyDest } else { Succ->removePredecessor(BB); - if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Succ}); } } @@ -257,8 +277,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); - if (DTU) - DTU->applyUpdatesPermissive(Updates); + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } return true; } @@ -296,7 +321,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); return true; } - return false; + return Changed; } if (auto *IBI = dyn_cast<IndirectBrInst>(T)) { @@ -304,22 +329,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (auto *BA = dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); - std::vector <DominatorTree::UpdateType> Updates; - if (DTU) - Updates.reserve(IBI->getNumDestinations() - 1); + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; // Insert the new branch. Builder.CreateBr(TheOnlyDest); + BasicBlock *SuccToKeep = TheOnlyDest; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - if (IBI->getDestination(i) == TheOnlyDest) { - TheOnlyDest = nullptr; + BasicBlock *DestBB = IBI->getDestination(i); + if (DTU && DestBB != TheOnlyDest) + RemovedSuccessors.insert(DestBB); + if (IBI->getDestination(i) == SuccToKeep) { + SuccToKeep = nullptr; } else { - BasicBlock *ParentBB = IBI->getParent(); - BasicBlock *DestBB = IBI->getDestination(i); - DestBB->removePredecessor(ParentBB); - if (DTU) - Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); + DestBB->removePredecessor(BB); } } Value *Address = IBI->getAddress(); @@ -336,13 +359,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. - if (TheOnlyDest) { + if (SuccToKeep) { BB->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); } - if (DTU) - DTU->applyUpdatesPermissive(Updates); + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } return true; } } @@ -392,6 +420,14 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } + if (auto *CB = dyn_cast<CallBase>(I)) { + // Treat calls that may not return as alive. + // TODO: Remove the intrinsic escape hatch once all intrinsics set + // willreturn properly. + if (!CB->willReturn() && !isa<IntrinsicInst>(I)) + return false; + } + if (!I->mayHaveSideEffects()) return true; @@ -453,21 +489,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. bool llvm::RecursivelyDeleteTriviallyDeadInstructions( - Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) { + Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { Instruction *I = dyn_cast<Instruction>(V); if (!I || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<WeakTrackingVH, 16> DeadInsts; DeadInsts.push_back(I); - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { auto *I = cast<Instruction>(DeadInsts[S]); @@ -478,13 +517,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( } if (Alive == E) return false; - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, + AboutToDeleteCallback); return true; } void llvm::RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + std::function<void(Value *)> AboutToDeleteCallback) { // Process the dead instruction list until empty. while (!DeadInsts.empty()) { Value *V = DeadInsts.pop_back_val(); @@ -498,6 +539,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions( // Don't lose the debug info while deleting the instructions. salvageDebugInfo(*I); + if (AboutToDeleteCallback) + AboutToDeleteCallback(I); + // Null out all of the instruction's operands to see if any operand becomes // dead as we go. for (Use &OpU : I->operands()) { @@ -675,34 +719,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DomTreeUpdater *DTU) { - // This only adjusts blocks with PHI nodes. - if (!isa<PHINode>(BB->begin())) - return; - - // Remove the entries for Pred from the PHI nodes in BB, but do not simplify - // them down. This will leave us with single entry phi nodes and other phis - // that can be removed. - BB->removePredecessor(Pred, true); - - WeakTrackingVH PhiIt = &BB->front(); - while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { - PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); - Value *OldPhiIt = PhiIt; - - if (!recursivelySimplifyInstruction(PN)) - continue; - - // If recursive simplification ended up deleting the next PHI node we would - // iterate to, then our iterator is invalid, restart scanning from the top - // of the block. - if (PhiIt != OldPhiIt) PhiIt = &BB->front(); - } - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); -} - void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DomTreeUpdater *DTU) { @@ -727,13 +743,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { - Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { - Updates.push_back({DominatorTree::Delete, *I, PredBB}); // This predecessor of PredBB may already have DestBB as a successor. - if (llvm::find(successors(*I), DestBB) == succ_end(*I)) + if (!llvm::is_contained(successors(*I), DestBB)) Updates.push_back({DominatorTree::Insert, *I, DestBB}); + Updates.push_back({DominatorTree::Delete, *I, PredBB}); } + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } // Zap anything that took the address of DestBB. Not doing this will give the @@ -1038,14 +1054,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { - Updates.push_back({DominatorTree::Delete, BB, Succ}); // All predecessors of BB will be moved to Succ. - for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - Updates.push_back({DominatorTree::Delete, *I, BB}); + SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB)); + Updates.reserve(Updates.size() + 2 * Predecessors.size()); + for (auto *Predecessor : Predecessors) { // This predecessor of BB may already have Succ as a successor. - if (llvm::find(successors(*I), Succ) == succ_end(*I)) - Updates.push_back({DominatorTree::Insert, *I, Succ}); + if (!llvm::is_contained(successors(Predecessor), Succ)) + Updates.push_back({DominatorTree::Insert, Predecessor, Succ}); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } + Updates.push_back({DominatorTree::Delete, BB, Succ}); } if (isa<PHINode>(Succ->begin())) { @@ -1101,7 +1119,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, "applying corresponding DTU updates."); if (DTU) { - DTU->applyUpdatesPermissive(Updates); + DTU->applyUpdates(Updates); DTU->deleteBB(BB); } else { BB->eraseFromParent(); // Delete the old basic block. @@ -1109,7 +1127,39 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { +static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { + // This implementation doesn't currently consider undef operands + // specially. Theoretically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + bool Changed = false; + + // Examine each PHI. + // Note that increment of I must *NOT* be in the iteration_expression, since + // we don't want to immediately advance when we restart from the beginning. + for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I);) { + ++I; + // Is there an identical PHI node in this basic block? + // Note that we only look in the upper square's triangle, + // we already checked that the lower triangle PHI's aren't identical. + for (auto J = I; PHINode *DuplicatePN = dyn_cast<PHINode>(J); ++J) { + if (!DuplicatePN->isIdenticalToWhenDefined(PN)) + continue; + // A duplicate. Replace this PHI with the base PHI. + ++NumPHICSEs; + DuplicatePN->replaceAllUsesWith(PN); + DuplicatePN->eraseFromParent(); + Changed = true; + + // The RAUW can change PHIs that we already visited. + I = BB->begin(); + break; // Start over from the beginning. + } + } + return Changed; +} + +static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1123,7 +1173,13 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return DenseMapInfo<PHINode *>::getTombstoneKey(); } - static unsigned getHashValue(PHINode *PN) { + static bool isSentinel(PHINode *PN) { + return PN == getEmptyKey() || PN == getTombstoneKey(); + } + + // WARNING: this logic must be kept in sync with + // Instruction::isIdenticalToWhenDefined()! + static unsigned getHashValueImpl(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all // the operands to be safe in case instcombine hasn't run. @@ -1132,16 +1188,37 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { hash_combine_range(PN->block_begin(), PN->block_end()))); } - static bool isEqual(PHINode *LHS, PHINode *RHS) { - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) + static unsigned getHashValue(PHINode *PN) { +#ifndef NDEBUG + // If -phicse-debug-hash was specified, return a constant -- this + // will force all hashing to collide, so we'll exhaustively search + // the table for a match, and the assertion in isEqual will fire if + // there's a bug causing equal keys to hash differently. + if (PHICSEDebugHash) + return 0; +#endif + return getHashValueImpl(PN); + } + + static bool isEqualImpl(PHINode *LHS, PHINode *RHS) { + if (isSentinel(LHS) || isSentinel(RHS)) return LHS == RHS; return LHS->isIdenticalTo(RHS); } + + static bool isEqual(PHINode *LHS, PHINode *RHS) { + // These comparisons are nontrivial, so assert that equality implies + // hash equality (DenseMap demands this as an invariant). + bool Result = isEqualImpl(LHS, RHS); + assert(!Result || (isSentinel(LHS) && LHS == RHS) || + getHashValueImpl(LHS) == getHashValueImpl(RHS)); + return Result; + } }; // Set of unique PHINodes. DenseSet<PHINode *, PHIDenseMapInfo> PHISet; + PHISet.reserve(4 * PHICSENumPHISmallSize); // Examine each PHI. bool Changed = false; @@ -1149,6 +1226,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { auto Inserted = PHISet.insert(PN); if (!Inserted.second) { // A duplicate. Replace this PHI with its duplicate. + ++NumPHICSEs; PN->replaceAllUsesWith(*Inserted.first); PN->eraseFromParent(); Changed = true; @@ -1163,54 +1241,63 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { return Changed; } -/// enforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -static Align enforceKnownAlignment(Value *V, Align Alignment, Align PrefAlign, - const DataLayout &DL) { - assert(PrefAlign > Alignment); +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + if ( +#ifndef NDEBUG + !PHICSEDebugHash && +#endif + hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) + return EliminateDuplicatePHINodesNaiveImpl(BB); + return EliminateDuplicatePHINodesSetBasedImpl(BB); +} +/// If the specified pointer points to an object that we control, try to modify +/// the object's alignment to PrefAlign. Returns a minimum known alignment of +/// the value after the operation, which may be lower than PrefAlign. +/// +/// Increating value alignment isn't often possible though. If alignment is +/// important, a more reliable approach is to simply align all global variables +/// and allocation instructions to their preferred alignment from the beginning. +static Align tryEnforceAlignment(Value *V, Align PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - // TODO: ideally, computeKnownBits ought to have used - // AllocaInst::getAlignment() in its computation already, making - // the below max redundant. But, as it turns out, - // stripPointerCasts recurses through infinite layers of bitcasts, - // while computeKnownBits is not allowed to traverse more than 6 - // levels. - Alignment = std::max(AI->getAlign(), Alignment); - if (PrefAlign <= Alignment) - return Alignment; + // TODO: Ideally, this function would not be called if PrefAlign is smaller + // than the current alignment, as the known bits calculation should have + // already taken it into account. However, this is not always the case, + // as computeKnownBits() has a depth limit, while stripPointerCasts() + // doesn't. + Align CurrentAlign = AI->getAlign(); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return Alignment; + return CurrentAlign; AI->setAlignment(PrefAlign); return PrefAlign; } if (auto *GO = dyn_cast<GlobalObject>(V)) { // TODO: as above, this shouldn't be necessary. - Alignment = max(GO->getAlign(), Alignment); - if (PrefAlign <= Alignment) - return Alignment; + Align CurrentAlign = GO->getPointerAlignment(DL); + if (PrefAlign <= CurrentAlign) + return CurrentAlign; // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. if (!GO->canIncreaseAlignment()) - return Alignment; + return CurrentAlign; GO->setAlignment(PrefAlign); return PrefAlign; } - return Alignment; + return Align(1); } Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, @@ -1232,7 +1319,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, Align Alignment = Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); if (PrefAlign && *PrefAlign > Alignment) - Alignment = enforceKnownAlignment(V, Alignment, *PrefAlign, DL); + Alignment = std::max(Alignment, tryEnforceAlignment(V, *PrefAlign, DL)); // We don't need to make any adjustment. return Alignment; @@ -1270,16 +1357,22 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, /// least n bits. static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { const DataLayout &DL = DII->getModule()->getDataLayout(); - uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy); - if (auto FragmentSize = DII->getFragmentSizeInBits()) - return ValueSize >= *FragmentSize; + TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy); + if (Optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) { + assert(!ValueSize.isScalable() && + "Fragments don't work on scalable types."); + return ValueSize.getFixedSize() >= *FragmentSize; + } // We can't always calculate the size of the DI variable (e.g. if it is a // VLA). Try to use the size of the alloca that the dbg intrinsic describes // intead. if (DII->isAddressOfVariable()) if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) - if (auto FragmentSize = AI->getAllocationSizeInBits(DL)) - return ValueSize >= *FragmentSize; + if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) { + assert(ValueSize.isScalable() == FragmentSize->isScalable() && + "Both sizes should agree on the scalable flag."); + return TypeSize::isKnownGE(ValueSize, *FragmentSize); + } // Could not determine size of variable. Conservatively return false. return false; } @@ -1294,7 +1387,7 @@ static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { MDNode *Scope = DeclareLoc.getScope(); DILocation *InlinedAt = DeclareLoc.getInlinedAt(); // Produce an unknown location with the correct scope / inlinedAt fields. - return DebugLoc::get(0, 0, Scope, InlinedAt); + return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); } /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value @@ -1911,8 +2004,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, return false; } -unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { +std::pair<unsigned, unsigned> +llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { unsigned NumDeadInst = 0; + unsigned NumDeadDbgInst = 0; // Delete the instructions backwards, as it has a reduced likelihood of // having to update as many def-use and use-def chains. Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. @@ -1925,30 +2020,31 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { EndInst = Inst; continue; } - if (!isa<DbgInfoIntrinsic>(Inst)) + if (isa<DbgInfoIntrinsic>(Inst)) + ++NumDeadDbgInst; + else ++NumDeadInst; Inst->eraseFromParent(); } - return NumDeadInst; + return {NumDeadInst, NumDeadDbgInst}; } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, bool PreserveLCSSA, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { BasicBlock *BB = I->getParent(); - std::vector <DominatorTree::UpdateType> Updates; if (MSSAU) MSSAU->changeToUnreachable(I); + SmallSetVector<BasicBlock *, 8> UniqueSuccessors; + // Loop over all of the successors, removing BB's entry from any PHI // nodes. - if (DTU) - Updates.reserve(BB->getTerminator()->getNumSuccessors()); for (BasicBlock *Successor : successors(BB)) { Successor->removePredecessor(BB, PreserveLCSSA); if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Successor}); + UniqueSuccessors.insert(Successor); } // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. @@ -1970,13 +2066,18 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } - if (DTU) - DTU->applyUpdatesPermissive(Updates); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 8> Updates; + Updates.reserve(UniqueSuccessors.size()); + for (BasicBlock *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); + DTU->applyUpdates(Updates); + } return NumInstrsRemoved; } CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { - SmallVector<Value *, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<Value *, 8> Args(II->args()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); CallInst *NewCall = CallInst::Create(II->getFunctionType(), @@ -2017,7 +2118,7 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -2033,7 +2134,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, BB->getInstList().pop_back(); // Create the new invoke instruction. - SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end()); + SmallVector<Value *, 8> InvokeArgs(CI->args()); SmallVector<OperandBundleDef, 1> OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -2164,8 +2265,7 @@ static bool markAliveBlocks(Function &F, UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); if (DTU) - DTU->applyUpdatesPermissive( - {{DominatorTree::Delete, BB, UnwindDestBB}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } else changeToCall(II, DTU); Changed = true; @@ -2194,6 +2294,7 @@ static bool markAliveBlocks(Function &F, } }; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; // Set of unique CatchPads. SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> @@ -2203,14 +2304,22 @@ static bool markAliveBlocks(Function &F, E = CatchSwitch->handler_end(); I != E; ++I) { BasicBlock *HandlerBB = *I; + ++NumPerSuccessorCases[HandlerBB]; auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); if (!HandlerSet.insert({CatchPad, Empty}).second) { + --NumPerSuccessorCases[HandlerBB]; CatchSwitch->removeHandler(I); --I; --E; Changed = true; } } + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, BB, I.first}); + if (DTU) + DTU->applyUpdates(Updates); } Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU); @@ -2254,7 +2363,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}}); + DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDest}}); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even @@ -2270,28 +2379,39 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, return Changed; assert(Reachable.size() < F.size()); - NumRemoved += F.size() - Reachable.size(); - SmallSetVector<BasicBlock *, 8> DeadBlockSet; + // Are there any blocks left to actually delete? + SmallSetVector<BasicBlock *, 8> BlocksToRemove; for (BasicBlock &BB : F) { // Skip reachable basic blocks if (Reachable.count(&BB)) continue; - DeadBlockSet.insert(&BB); + // Skip already-deleted blocks + if (DTU && DTU->isBBPendingDeletion(&BB)) + continue; + BlocksToRemove.insert(&BB); } + if (BlocksToRemove.empty()) + return Changed; + + Changed = true; + NumRemoved += BlocksToRemove.size(); + if (MSSAU) - MSSAU->removeBlocks(DeadBlockSet); + MSSAU->removeBlocks(BlocksToRemove); - // Loop over all of the basic blocks that are not reachable, dropping all of + // Loop over all of the basic blocks that are up for removal, dropping all of // their internal references. Update DTU if available. std::vector<DominatorTree::UpdateType> Updates; - for (auto *BB : DeadBlockSet) { + for (auto *BB : BlocksToRemove) { + SmallSetVector<BasicBlock *, 8> UniqueSuccessors; for (BasicBlock *Successor : successors(BB)) { - if (!DeadBlockSet.count(Successor)) + // Only remove references to BB in reachable successors of BB. + if (Reachable.count(Successor)) Successor->removePredecessor(BB); if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Successor}); + UniqueSuccessors.insert(Successor); } BB->dropAllReferences(); if (DTU) { @@ -2305,27 +2425,22 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); + Updates.reserve(Updates.size() + UniqueSuccessors.size()); + for (auto *UniqueSuccessor : UniqueSuccessors) + Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor}); } } if (DTU) { - DTU->applyUpdatesPermissive(Updates); - bool Deleted = false; - for (auto *BB : DeadBlockSet) { - if (DTU->isBBPendingDeletion(BB)) - --NumRemoved; - else - Deleted = true; + DTU->applyUpdates(Updates); + for (auto *BB : BlocksToRemove) DTU->deleteBB(BB); - } - if (!Deleted) - return false; } else { - for (auto *BB : DeadBlockSet) + for (auto *BB : BlocksToRemove) BB->eraseFromParent(); } - return true; + return Changed; } void llvm::combineMetadata(Instruction *K, const Instruction *J, @@ -2570,10 +2685,13 @@ bool llvm::callsGCLeafFunction(const CallBase *Call, if (F->hasFnAttribute("gc-leaf-function")) return true; - if (auto IID = F->getIntrinsicID()) + if (auto IID = F->getIntrinsicID()) { // Most LLVM intrinsics do not take safepoints. return IID != Intrinsic::experimental_gc_statepoint && - IID != Intrinsic::experimental_deoptimize; + IID != Intrinsic::experimental_deoptimize && + IID != Intrinsic::memcpy_element_unordered_atomic && + IID != Intrinsic::memmove_element_unordered_atomic; + } } // Lib calls can be materialized by some passes, and won't be @@ -2701,7 +2819,7 @@ struct BitPart { /// Analyze the specified subexpression and see if it is capable of providing /// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in /// the output of the expression came from a corresponding bit in some other /// value. This function is recursive, and the end result is a mapping of /// bitnumber to bitnumber. It is the caller's responsibility to validate that @@ -2713,6 +2831,10 @@ struct BitPart { /// BitPart is returned with Provider set to %X and Provenance[24-31] set to /// [0-7]. /// +/// For vector types, all analysis is performed at the per-element level. No +/// cross-element analysis is supported (shuffle/insertion/reduction), and all +/// constant masks must be splatted across all elements. +/// /// To avoid revisiting values, the BitPart results are memoized into the /// provided map. To avoid unnecessary copying of BitParts, BitParts are /// constructed in-place in the \c BPS map. Because of this \c BPS needs to @@ -2730,7 +2852,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return I->second; auto &Result = BPS[V] = None; - auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + auto BitWidth = V->getType()->getScalarSizeInBits(); // Prevent stack overflow by limiting the recursion depth if (Depth == BitPartRecursionMaxDepth) { @@ -2738,13 +2860,16 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; } - if (Instruction *I = dyn_cast<Instruction>(V)) { + if (auto *I = dyn_cast<Instruction>(V)) { + Value *X, *Y; + const APInt *C; + // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + if (match(V, m_Or(m_Value(X), m_Value(Y)))) { + const auto &A = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &B = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!A || !B) return Result; @@ -2753,31 +2878,31 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; Result = BitPart(A->Provider, BitWidth); - for (unsigned i = 0; i < A->Provenance.size(); ++i) { - if (A->Provenance[i] != BitPart::Unset && - B->Provenance[i] != BitPart::Unset && - A->Provenance[i] != B->Provenance[i]) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) { + if (A->Provenance[BitIdx] != BitPart::Unset && + B->Provenance[BitIdx] != BitPart::Unset && + A->Provenance[BitIdx] != B->Provenance[BitIdx]) return Result = None; - if (A->Provenance[i] == BitPart::Unset) - Result->Provenance[i] = B->Provenance[i]; + if (A->Provenance[BitIdx] == BitPart::Unset) + Result->Provenance[BitIdx] = B->Provenance[BitIdx]; else - Result->Provenance[i] = A->Provenance[i]; + Result->Provenance[BitIdx] = A->Provenance[BitIdx]; } return Result; } // If this is a logical shift by a constant, recurse then shift the result. - if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { - unsigned BitShift = - cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + if (match(V, m_LogicalShift(m_Value(X), m_APInt(C)))) { + const APInt &BitShift = *C; + // Ensure the shift amount is defined. - if (BitShift > BitWidth) + if (BitShift.uge(BitWidth)) return Result; - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2785,11 +2910,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Perform the "shift" on BitProvenance. auto &P = Result->Provenance; if (I->getOpcode() == Instruction::Shl) { - P.erase(std::prev(P.end(), BitShift), P.end()); - P.insert(P.begin(), BitShift, BitPart::Unset); + P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); + P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); } else { - P.erase(P.begin(), std::next(P.begin(), BitShift)); - P.insert(P.end(), BitShift, BitPart::Unset); + P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); + P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); } return Result; @@ -2797,44 +2922,102 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a logical 'and' with a mask that clears bits, recurse then // unset the appropriate bits. - if (I->getOpcode() == Instruction::And && - isa<ConstantInt>(I->getOperand(1))) { - APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); - const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + if (match(V, m_And(m_Value(X), m_APInt(C)))) { + const APInt &AndMask = *C; // Check that the mask allows a multiple of 8 bits for a bswap, for an // early exit. unsigned NumMaskedBits = AndMask.countPopulation(); - if (!MatchBitReversals && NumMaskedBits % 8 != 0) + if (!MatchBitReversals && (NumMaskedBits % 8) != 0) return Result; - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; - for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) // If the AndMask is zero for this bit, clear the bit. - if ((AndMask & Bit) == 0) - Result->Provenance[i] = BitPart::Unset; + if (AndMask[BitIdx] == 0) + Result->Provenance[BitIdx] = BitPart::Unset; return Result; } // If this is a zext instruction zero extend the result. - if (I->getOpcode() == Instruction::ZExt) { - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); + if (match(V, m_ZExt(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = BitPart(Res->Provider, BitWidth); - auto NarrowBitWidth = - cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth(); - for (unsigned i = 0; i < NarrowBitWidth; ++i) - Result->Provenance[i] = Res->Provenance[i]; - for (unsigned i = NarrowBitWidth; i < BitWidth; ++i) - Result->Provenance[i] = BitPart::Unset; + auto NarrowBitWidth = X->getType()->getScalarSizeInBits(); + for (unsigned BitIdx = 0; BitIdx < NarrowBitWidth; ++BitIdx) + Result->Provenance[BitIdx] = Res->Provenance[BitIdx]; + for (unsigned BitIdx = NarrowBitWidth; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitPart::Unset; + return Result; + } + + // BITREVERSE - most likely due to us previous matching a partial + // bitreverse. + if (match(V, m_BitReverse(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[(BitWidth - 1) - BitIdx] = Res->Provenance[BitIdx]; + return Result; + } + + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift + // amount (modulo). + // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + if (match(V, m_FShl(m_Value(X), m_Value(Y), m_APInt(C))) || + match(V, m_FShr(m_Value(X), m_Value(Y), m_APInt(C)))) { + // We can treat fshr as a fshl by flipping the modulo amount. + unsigned ModAmt = C->urem(BitWidth); + if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr) + ModAmt = BitWidth - ModAmt; + + const auto &LHS = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + const auto &RHS = + collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + + // Check we have both sources and they are from the same provider. + if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider) + return Result; + + unsigned StartBitRHS = BitWidth - ModAmt; + Result = BitPart(LHS->Provider, BitWidth); + for (unsigned BitIdx = 0; BitIdx < StartBitRHS; ++BitIdx) + Result->Provenance[BitIdx + ModAmt] = LHS->Provenance[BitIdx]; + for (unsigned BitIdx = 0; BitIdx < ModAmt; ++BitIdx) + Result->Provenance[BitIdx] = RHS->Provenance[BitIdx + StartBitRHS]; return Result; } } @@ -2842,8 +3025,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be // the input value to the bswap/bitreverse. Result = BitPart(V, BitWidth); - for (unsigned i = 0; i < BitWidth; ++i) - Result->Provenance[i] = i; + for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) + Result->Provenance[BitIdx] = BitIdx; return Result; } @@ -2870,65 +3053,92 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( return false; if (!MatchBSwaps && !MatchBitReversals) return false; - IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); - if (!ITy || ITy->getBitWidth() > 128) - return false; // Can't do vectors or integers > 128 bits. - unsigned BW = ITy->getBitWidth(); - - unsigned DemandedBW = BW; - IntegerType *DemandedTy = ITy; - if (I->hasOneUse()) { - if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) { - DemandedTy = cast<IntegerType>(Trunc->getType()); - DemandedBW = DemandedTy->getBitWidth(); - } - } + Type *ITy = I->getType(); + if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128) + return false; // Can't do integer/elements > 128 bits. + + Type *DemandedTy = ITy; + if (I->hasOneUse()) + if (auto *Trunc = dyn_cast<TruncInst>(I->user_back())) + DemandedTy = Trunc->getType(); // Try to find all the pieces corresponding to the bswap. std::map<Value *, Optional<BitPart>> BPS; auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); if (!Res) return false; - auto &BitProvenance = Res->Provenance; + ArrayRef<int8_t> BitProvenance = Res->Provenance; + assert(all_of(BitProvenance, + [](int8_t I) { return I == BitPart::Unset || 0 <= I; }) && + "Illegal bit provenance index"); + + // If the upper bits are zero, then attempt to perform as a truncated op. + if (BitProvenance.back() == BitPart::Unset) { + while (!BitProvenance.empty() && BitProvenance.back() == BitPart::Unset) + BitProvenance = BitProvenance.drop_back(); + if (BitProvenance.empty()) + return false; // TODO - handle null value? + DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size()); + if (auto *IVecTy = dyn_cast<VectorType>(ITy)) + DemandedTy = VectorType::get(DemandedTy, IVecTy); + } + + // Check BitProvenance hasn't found a source larger than the result type. + unsigned DemandedBW = DemandedTy->getScalarSizeInBits(); + if (DemandedBW > ITy->getScalarSizeInBits()) + return false; // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; - for (unsigned i = 0; i < DemandedBW; ++i) { - OKForBSwap &= - bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW); - OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); + bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; + bool OKForBitReverse = MatchBitReversals; + for (unsigned BitIdx = 0; + (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, + DemandedBW); + OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], + BitIdx, DemandedBW); } Intrinsic::ID Intrin; - if (OKForBSwap && MatchBSwaps) + if (OKForBSwap) Intrin = Intrinsic::bswap; - else if (OKForBitReverse && MatchBitReversals) + else if (OKForBitReverse) Intrin = Intrinsic::bitreverse; else return false; - if (ITy != DemandedTy) { - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); - Value *Provider = Res->Provider; - IntegerType *ProviderTy = cast<IntegerType>(Provider->getType()); - // We may need to truncate the provider. - if (DemandedTy != ProviderTy) { - auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, - "trunc", I); - InsertedInsts.push_back(Trunc); - Provider = Trunc; - } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); + Value *Provider = Res->Provider; + + // We may need to truncate the provider. + if (DemandedTy != Provider->getType()) { + auto *Trunc = + CastInst::CreateIntegerCast(Provider, DemandedTy, false, "trunc", I); + InsertedInsts.push_back(Trunc); + Provider = Trunc; + } + + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } + + // We may need to zeroextend back to the result type. + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::CreateIntegerCast(Result, ITy, false, "zext", I); InsertedInsts.push_back(ExtInst); - return true; } - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); - InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); return true; } @@ -3020,44 +3230,6 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { } } -using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>; -AllocaInst *llvm::findAllocaForValue(Value *V, - AllocaForValueMapTy &AllocaForValue) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) - return AI; - // See if we've already calculated (or started to calculate) alloca for a - // given value. - AllocaForValueMapTy::iterator I = AllocaForValue.find(V); - if (I != AllocaForValue.end()) - return I->second; - // Store 0 while we're calculating alloca for value V to avoid - // infinite recursion if the value references itself. - AllocaForValue[V] = nullptr; - AllocaInst *Res = nullptr; - if (CastInst *CI = dyn_cast<CastInst>(V)) - Res = findAllocaForValue(CI->getOperand(0), AllocaForValue); - else if (PHINode *PN = dyn_cast<PHINode>(V)) { - for (Value *IncValue : PN->incoming_values()) { - // Allow self-referencing phi-nodes. - if (IncValue == PN) - continue; - AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue); - // AI for incoming values should exist and should all be equal. - if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) - return nullptr; - Res = IncValueAI; - } - } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) { - Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue); - } else { - LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: " - << *V << "\n"); - } - if (Res) - AllocaForValue[V] = Res; - return Res; -} - Value *llvm::invertCondition(Value *Condition) { // First: Check if it's a constant if (Constant *C = dyn_cast<Constant>(Condition)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp index c653aacbee6c..cb5fee7d28e6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -1,4 +1,4 @@ -//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// +//===- LoopPeel.cpp -------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements some loop unrolling utilities for peeling loops -// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for -// unrolling loops with compile-time constant trip counts. -// +// Loop Peeling Utilities. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -49,10 +47,24 @@ using namespace llvm; using namespace llvm::PatternMatch; -#define DEBUG_TYPE "loop-unroll" +#define DEBUG_TYPE "loop-peel" STATISTIC(NumPeeled, "Number of loops peeled"); +static cl::opt<unsigned> UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + +static cl::opt<bool> + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, + cl::desc("Allows loops to be peeled when the dynamic " + "trip count is known to be low.")); + +static cl::opt<bool> + UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", + cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt<unsigned> UnrollPeelMaxCount( "unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling.")); @@ -103,7 +115,12 @@ bool llvm::canPeel(Loop *L) { // This can be an indication of two different things: // 1) The loop is not rotated. // 2) The loop contains irreducible control flow that involves the latch. - if (L->getLoopLatch() != L->getExitingBlock()) + const BasicBlock *Latch = L->getLoopLatch(); + if (Latch != L->getExitingBlock()) + return false; + + // Peeling is only supported if the latch is a branch. + if (!isa<BranchInst>(Latch->getTerminator())) return false; return true; @@ -215,11 +232,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // consider AddRecs of the loop we are trying to peel. if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) continue; - bool Increasing; if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && - !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) + !SE.getMonotonicPredicateType(LeftAR, Pred)) continue; - (void)Increasing; // Check if extending the current DesiredPeelCount lets us evaluate Pred // or !Pred in the loop body statically. @@ -278,9 +293,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE) { + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -292,7 +307,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // Only try to peel innermost loops by default. // The constraint can be relaxed by the target in TTI.getUnrollingPreferences // or by the flag -unroll-allow-loop-nests-peeling. - if (!PP.AllowLoopNestsPeeling && !L->empty()) + if (!PP.AllowLoopNestsPeeling && !L->isInnermost()) return; // If the user provided a peel count, use that. @@ -322,7 +337,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // maximum number of iterations among these values, thus turning all those // Phis into invariants. // First, check that we can peel at least one iteration. - if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { // Store the pre-calculated values here. SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they @@ -342,7 +357,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // Pay respect to limitations implied by loop size and the max peel count. unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); DesiredPeelCount = std::max(DesiredPeelCount, countToEliminateCompares(*L, MaxPeelCount, SE)); @@ -385,7 +400,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (*PeelCount) { if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { + (LoopSize * (*PeelCount + 1) <= Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); PP.PeelCount = *PeelCount; @@ -396,7 +411,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); - LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); } } } @@ -491,7 +506,7 @@ static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, - SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges, + SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI) { @@ -599,6 +614,40 @@ static void cloneLoopBlocks( LVMap[KV.first] = KV.second; } +TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + Optional<bool> UserAllowPeeling, + Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { + TargetTransformInfo::PeelingPreferences PP; + + // Set the default values. + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get the target specifc values. + TTI.getPeelingPreferences(L, SE, PP); + + // User specified values using cl::opt. + if (UnrollingSpecficValues) { + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + } + + // User specifed values provided by argument. + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + /// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. @@ -609,8 +658,8 @@ static void cloneLoopBlocks( /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA) { + ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 8804bba975b6..b678efdc8d88 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -12,7 +12,6 @@ #include "llvm/Transforms/Utils/LoopRotationUtils.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" @@ -36,6 +35,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -44,6 +44,8 @@ using namespace llvm; #define DEBUG_TYPE "loop-rotate" +STATISTIC(NumNotRotatedDueToHeaderSize, + "Number of loops not rotated due to the header size"); STATISTIC(NumRotated, "Number of loops rotated"); static cl::opt<bool> @@ -64,15 +66,17 @@ class LoopRotate { const SimplifyQuery &SQ; bool RotationOnly; bool IsUtilMode; + bool PrepareForLTO; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode) + const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, + bool PrepareForLTO) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), - IsUtilMode(IsUtilMode) {} + IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} bool processLoop(Loop *L); private: @@ -300,7 +304,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { CodeMetrics::collectEphemeralValues(L, AC, EphValues); CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); if (Metrics.notDuplicatable) { LLVM_DEBUG( dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" @@ -320,8 +324,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions, which is more than the threshold (" << MaxHeaderSize << " instructions): "; L->dump()); + ++NumNotRotatedDueToHeaderSize; return Rotated; } + + // When preparing for LTO, avoid rotating loops with calls that could be + // inlined during the LTO stage. + if (PrepareForLTO && Metrics.NumInlineCandidates > 0) + return Rotated; } // Now, this loop is suitable for rotation. @@ -391,6 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { break; } + // Remember the local noalias scope declarations in the header. After the + // rotation, they must be duplicated and the scope must be cloned. This + // avoids unwanted interaction across iterations. + SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; + for (Instruction &I : *OrigHeader) + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + NoAliasDeclInstructions.push_back(Decl); + while (I != E) { Instruction *Inst = &*I++; @@ -451,6 +469,69 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { } } + if (!NoAliasDeclInstructions.empty()) { + // There are noalias scope declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.experimental.noalias.scope.decl, + // U: !noalias or !alias.scope depending on D + // ... { D U1 U2 } can transform into: + // (0) : ... { D U1 U2 } // no relevant rotation for this part + // (1) : ... D' { U1 U2 D } // D is part of OrigHeader + // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader + // + // We now want to transform: + // (1) -> : ... D' { D U1 U2 D'' } + // (2) -> : ... D' U1' { D U2 D'' U1'' } + // D: original llvm.experimental.noalias.scope.decl + // D', U1': duplicate with replaced scopes + // D'', U1'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, + // as U1'' and U1' scopes will not be compatible wrt to the local restrict + + // Clone the llvm.experimental.noalias.decl again for the NewHeader. + Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" + << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + NewNAD->insertBefore(NewHeaderInsertionPoint); + } + + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader'. + { + auto &Context = NewHeader->getContext(); + + SmallVector<MDNode *, 8> NoAliasDeclScopes; + for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(NAD->getScopeList()); + + LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, + "h.rot"); + LLVM_DEBUG(OrigHeader->dump()); + + // Keep the compile time impact low by only adapting the inserted block + // of instructions in the OrigPreHeader. This might result in slightly + // more aliasing between these instructions and those that were already + // present, but it will be much faster when the original PreHeader is + // large. + LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); + auto *FirstDecl = + cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); + auto *LastInst = &OrigPreheader->back(); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, + Context, "pre.rot"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); + } + } + // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. @@ -496,12 +577,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); - DT->applyUpdates(Updates); if (MSSAU) { - MSSAU->applyUpdates(Updates, *DT); + MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); + } else { + DT->applyUpdates(Updates); } } @@ -575,7 +657,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); + bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + if (DidMerge) + RemoveRedundantDbgInstrs(PredBB); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -739,13 +824,8 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly = true, unsigned Threshold = unsigned(-1), - bool IsUtilMode = true) { - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); + bool IsUtilMode = true, bool PrepareForLTO) { LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, - IsUtilMode); - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - + IsUtilMode, PrepareForLTO); return LR.processLoop(L); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp index a8445e94e55a..2e104334ad96 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -163,7 +163,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, /// if it's not already in there. Stop predecessor traversal when we reach /// StopBlock. static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, - std::set<BasicBlock*> &Blocks) { + SmallPtrSetImpl<BasicBlock *> &Blocks) { SmallVector<BasicBlock *, 8> Worklist; Worklist.push_back(InputBB); do { @@ -171,10 +171,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, if (Blocks.insert(BB).second && BB != StopBlock) // If BB is not already processed and it is not a stop block then // insert its predecessor in the work list - for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - BasicBlock *WBB = *I; - Worklist.push_back(WBB); - } + append_range(Worklist, predecessors(BB)); } while (!Worklist.empty()); } @@ -308,9 +305,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. - std::set<BasicBlock*> BlocksInL; - for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { - BasicBlock *P = *PI; + SmallPtrSet<BasicBlock *, 4> BlocksInL; + for (BasicBlock *P : predecessors(Header)) { if (DT->dominates(Header, P)) addBlockAndPredsToSet(P, Header, BlocksInL); } @@ -683,7 +679,7 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, MSSAU)) + if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) continue; // Success. The block is now dead, so remove it from the loop, @@ -691,7 +687,7 @@ ReprocessLoop: LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); - assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + assert(pred_empty(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); @@ -836,8 +832,8 @@ bool LoopSimplify::runOnFunction(Function &F) { bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Simplify each loop nest in the function. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); + for (auto *L : *LI) + Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); #ifndef NDEBUG if (PreserveLCSSA) { @@ -866,9 +862,9 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA // after simplifying the loops. MemorySSA is preserved if it exists. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + for (auto *L : *LI) Changed |= - simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); + simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 3875c631f839..d4cd57405239 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,6 +59,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -108,14 +109,15 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, /// insert a phi-node, otherwise LCSSA will be broken. /// The function is just a helper function for llvm::UnrollLoop that returns /// true if this situation occurs, indicating that LCSSA needs to be fixed. -static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, +static bool needToInsertPhisForLCSSA(Loop *L, + const std::vector<BasicBlock *> &Blocks, LoopInfo *LI) { for (BasicBlock *BB : Blocks) { if (LI->getLoopFor(BB) == L) continue; for (Instruction &I : *BB) { for (Use &U : I.operands()) { - if (auto Def = dyn_cast<Instruction>(U)) { + if (const auto *Def = dyn_cast<Instruction>(U)) { Loop *DefLoop = LI->getLoopFor(Def->getParent()); if (!DefLoop) continue; @@ -286,14 +288,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) { + if (!L->getLoopPreheader()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return LoopUnrollResult::Unmodified; } - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) { + if (!L->getLoopLatch()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return LoopUnrollResult::Unmodified; } @@ -304,37 +304,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, return LoopUnrollResult::Unmodified; } - // The current loop unroll pass can unroll loops that have - // (1) single latch; and - // (2a) latch is unconditional; or - // (2b) latch is conditional and is an exiting block - // FIXME: The implementation can be extended to work with more complicated - // cases, e.g. loops with multiple latches. - BasicBlock *Header = L->getHeader(); - BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - - // A conditional branch which exits the loop, which can be optimized to an - // unconditional branch in the unrolled loop in some cases. - BranchInst *ExitingBI = nullptr; - bool LatchIsExiting = L->isLoopExiting(LatchBlock); - if (LatchIsExiting) - ExitingBI = LatchBI; - else if (BasicBlock *ExitingBlock = L->getExitingBlock()) - ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - LLVM_DEBUG( - dbgs() << "Can't unroll; a conditional latch must exit the loop"); - return LoopUnrollResult::Unmodified; - } - LLVM_DEBUG({ - if (ExitingBI) - dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() - << "\n"; - else - dbgs() << " No single exiting block\n"; - }); - - if (Header->hasAddressTaken()) { + if (L->getHeader()->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << " Won't unroll loop: address of header block is taken.\n"); @@ -363,20 +333,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Are we eliminating the loop control altogether? bool CompletelyUnroll = ULO.Count == ULO.TripCount; - SmallVector<BasicBlock *, 4> ExitBlocks; - L->getExitBlocks(ExitBlocks); - std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); - - // Go through all exits of L and see if there are any phi-nodes there. We just - // conservatively assume that they're inserted to preserve LCSSA form, which - // means that complete unrolling might break this form. We need to either fix - // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For - // now we just recompute LCSSA for the outer loop, but it should be possible - // to fix it in-place. - bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && - any_of(ExitBlocks, [](const BasicBlock *BB) { - return isa<PHINode>(BB->begin()); - }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -401,12 +357,63 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, BasicBlock *ExitingBlock = L->getLoopLatch(); assert(ExitingBlock && "Loop without exiting block?"); assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - Preheader = L->getLoopPreheader(); ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } } + // All these values should be taken only after peeling because they might have + // changed. + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = + PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, + [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); + + // The current loop unroll pass can unroll loops that have + // (1) single latch; and + // (2a) latch is unconditional; or + // (2b) latch is conditional and is an exiting block + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. + BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + // A conditional branch which exits the loop, which can be optimized to an + // unconditional branch in the unrolled loop in some cases. + BranchInst *ExitingBI = nullptr; + bool LatchIsExiting = L->isLoopExiting(LatchBlock); + if (LatchIsExiting) + ExitingBI = LatchBI; + else if (BasicBlock *ExitingBlock = L->getExitingBlock()) + ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { + // If the peeling guard is changed this assert may be relaxed or even + // deleted. + assert(!Peeled && "Peeling guard changed!"); + LLVM_DEBUG( + dbgs() << "Can't unroll; a conditional latch must exit the loop"); + return LoopUnrollResult::Unmodified; + } + LLVM_DEBUG({ + if (ExitingBI) + dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() + << "\n"; + else + dbgs() << " No single exiting block\n"; + }); + // Loops containing convergent instructions must have a count that divides // their TripMultiple. LLVM_DEBUG( @@ -583,6 +590,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector<BasicBlock *, 8> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -676,6 +688,15 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, AC->registerAssumption(II); } } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. @@ -863,9 +884,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); - UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), - UnrolledLoopBlocks.end(), Dest), - UnrolledLoopBlocks.end()); + llvm::erase_value(UnrolledLoopBlocks, Dest); } } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index dd628f3e7e0c..6e32a2b865aa 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -148,8 +148,7 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, } while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); + Instruction *I = Worklist.pop_back_val(); if (!Visit(I)) return false; @@ -459,14 +458,6 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, // finish up connecting the blocks and phi nodes. At this point LastValueMap // is the last unrolled iterations values. - // Update Phis in BB from OldBB to point to NewBB - auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB, - BasicBlock *NewBB) { - for (PHINode &Phi : BB->phis()) { - int I = Phi.getBasicBlockIndex(OldBB); - Phi.setIncomingBlock(I, NewBB); - } - }; // Update Phis in BB from OldBB to point to NewBB and use the latest value // from LastValueMap auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, @@ -525,10 +516,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); - updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0], - ForeBlocksLast.back()); - updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); + SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0], + ForeBlocksLast.back()); + SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); for (unsigned It = 1; It != Count; It++) { // Replace the conditional branch of the previous iteration subloop with an @@ -538,10 +529,10 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); SubTerm->eraseFromParent(); - updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It], - ForeBlocksLast.back()); - updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); + SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It], + ForeBlocksLast.back()); + SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); } @@ -555,8 +546,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit && "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit"); } - updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); + AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); for (unsigned It = 1; It != Count; It++) { // Replace the conditional branch of the previous iteration subloop with an @@ -566,8 +557,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst::Create(AftBlocksFirst[It], AftTerm); AftTerm->eraseFromParent(); - updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); + AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 2515b1676cb9..0abf62be156f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -22,11 +22,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -505,6 +505,32 @@ static bool canProfitablyUnrollMultiExitLoop( // know of kinds of multiexit loops that would benefit from unrolling. } +// Assign the maximum possible trip count as the back edge weight for the +// remainder loop if the original loop comes with a branch weight. +static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, + Loop *RemainderLoop, + uint64_t UnrollFactor) { + uint64_t TrueWeight, FalseWeight; + BranchInst *LatchBR = + cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); + if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() + ? FalseWeight + : TrueWeight; + assert(UnrollFactor > 1); + uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; + BasicBlock *Header = RemainderLoop->getHeader(); + BasicBlock *Latch = RemainderLoop->getLoopLatch(); + auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(RemainderLatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } +} + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -788,6 +814,11 @@ bool llvm::UnrollRuntimeLoopRemainder( InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + // Assign the maximum possible trip count as the back edge weight for the + // remainder loop if the original loop comes with a branch weight. + if (remainderLoop && !UnrollRemainder) + updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp index 43363736684e..f0f423e9812a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -63,6 +63,7 @@ static cl::opt<bool> ForceReductionIntrinsic( static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; +static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -297,10 +298,24 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, llvm_unreachable("unexpected number of options"); } -static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { +bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); } +Optional<ElementCount> +llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) { + Optional<int> Width = + getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width"); + + if (Width.hasValue()) { + Optional<int> IsScalable = getOptionalIntLoopAttribute( + TheLoop, "llvm.loop.vectorize.scalable.enable"); + return ElementCount::get(*Width, IsScalable.getValueOr(false)); + } + + return None; +} + llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name) { const MDOperand *AttrMD = @@ -334,7 +349,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID( bool Changed = false; if (InheritAllAttrs || InheritSomeAttrs) { - for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) { + for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) { MDNode *Op = cast<MDNode>(Existing.get()); auto InheritThisAttribute = [InheritSomeAttrs, @@ -371,7 +386,7 @@ Optional<MDNode *> llvm::makeFollowupLoopID( continue; HasAnyFollowup = true; - for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) { + for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { MDs.push_back(Option.get()); Changed = true; } @@ -404,6 +419,10 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); } +bool llvm::hasMustProgress(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopMustProgress); +} + TransformationMode llvm::hasUnrollTransformation(Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; @@ -450,14 +469,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { if (Enable == false) return TM_SuppressedByUser; - Optional<int> VectorizeWidth = - getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width"); + Optional<ElementCount> VectorizeWidth = + getOptionalElementCountLoopAttribute(L); Optional<int> InterleaveCount = getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); // 'Forcing' vector width and interleave count to one effectively disables // this tranformation. - if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1) + if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() && + InterleaveCount == 1) return TM_SuppressedByUser; if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) @@ -466,10 +486,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) { if (Enable == true) return TM_ForcedByUser; - if (VectorizeWidth == 1 && InterleaveCount == 1) + if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1) return TM_Disable; - if (VectorizeWidth > 1 || InterleaveCount > 1) + if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1) return TM_Enable; if (hasDisableAllTransformsHint(L)) @@ -542,10 +562,6 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, if (SE) SE->forgetLoop(L); - auto *ExitBlock = L->getUniqueExitBlock(); - assert(ExitBlock && "Should have a unique exit block!"); - assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); - auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); assert(OldBr && "Preheader must end with a branch"); assert(OldBr->isUnconditional() && "Preheader must have a single successor"); @@ -575,48 +591,63 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // deleting the backedge of the outer loop). If the outer loop is indeed a // non-loop, it will be deleted in a future iteration of loop deletion pass. IRBuilder<> Builder(OldBr); - Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); - // Remove the old branch. The conditional branch becomes a new terminator. - OldBr->eraseFromParent(); - - // Rewrite phis in the exit block to get their inputs from the Preheader - // instead of the exiting block. - for (PHINode &P : ExitBlock->phis()) { - // Set the zero'th element of Phi to be from the preheader and remove all - // other incoming values. Given the loop has dedicated exits, all other - // incoming values must be from the exiting blocks. - int PredIndex = 0; - P.setIncomingBlock(PredIndex, Preheader); - // Removes all incoming values from all other exiting blocks (including - // duplicate values from an exiting block). - // Nuke all entries except the zero'th entry which is the preheader entry. - // NOTE! We need to remove Incoming Values in the reverse order as done - // below, to keep the indices valid for deletion (removeIncomingValues - // updates getNumIncomingValues and shifts all values down into the operand - // being deleted). - for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) - P.removeIncomingValue(e - i, false); - - assert((P.getNumIncomingValues() == 1 && - P.getIncomingBlock(PredIndex) == Preheader) && - "Should have exactly one value and that's from the preheader!"); - } + auto *ExitBlock = L->getUniqueExitBlock(); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - if (DT) { - DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); - if (MSSA) { - MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, *DT); - if (VerifyMemorySSA) - MSSA->verifyMemorySSA(); + if (ExitBlock) { + assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + + Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); + // Remove the old branch. The conditional branch becomes a new terminator. + OldBr->eraseFromParent(); + + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. + for (PHINode &P : ExitBlock->phis()) { + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P.setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the + // operand being deleted). + for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) + P.removeIncomingValue(e - i, false); + + assert((P.getNumIncomingValues() == 1 && + P.getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); + } + + if (DT) { + DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}); + if (MSSA) { + MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}}, + *DT); + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } } - } - // Disconnect the loop body by branching directly to its exit. - Builder.SetInsertPoint(Preheader->getTerminator()); - Builder.CreateBr(ExitBlock); - // Remove the old branch. - Preheader->getTerminator()->eraseFromParent(); + // Disconnect the loop body by branching directly to its exit. + Builder.SetInsertPoint(Preheader->getTerminator()); + Builder.CreateBr(ExitBlock); + // Remove the old branch. + Preheader->getTerminator()->eraseFromParent(); + } else { + assert(L->hasNoExitBlocks() && + "Loop should have either zero or one exit blocks."); + + Builder.SetInsertPoint(OldBr); + Builder.CreateUnreachable(); + Preheader->getTerminator()->eraseFromParent(); + } if (DT) { DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}}); @@ -635,54 +666,58 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet; llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; - // Given LCSSA form is satisfied, we should not have users of instructions - // within the dead loop outside of the loop. However, LCSSA doesn't take - // unreachable uses into account. We handle them here. - // We could do it after drop all references (in this case all users in the - // loop will be already eliminated and we have less work to do but according - // to API doc of User::dropAllReferences only valid operation after dropping - // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. - for (auto *Block : L->blocks()) - for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); - for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { - Use &U = *UI; - ++UI; - if (auto *Usr = dyn_cast<Instruction>(U.getUser())) - if (L->contains(Usr->getParent())) - continue; - // If we have a DT then we can check that uses outside a loop only in - // unreachable block. - if (DT) - assert(!DT->isReachableFromEntry(U) && - "Unexpected user in reachable block"); - U.set(Undef); + if (ExitBlock) { + // Given LCSSA form is satisfied, we should not have users of instructions + // within the dead loop outside of the loop. However, LCSSA doesn't take + // unreachable uses into account. We handle them here. + // We could do it after drop all references (in this case all users in the + // loop will be already eliminated and we have less work to do but according + // to API doc of User::dropAllReferences only valid operation after dropping + // references, is deletion. So let's substitute all usages of + // instruction from the loop with undef value of corresponding type first. + for (auto *Block : L->blocks()) + for (Instruction &I : *Block) { + auto *Undef = UndefValue::get(I.getType()); + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); + UI != E;) { + Use &U = *UI; + ++UI; + if (auto *Usr = dyn_cast<Instruction>(U.getUser())) + if (L->contains(Usr->getParent())) + continue; + // If we have a DT then we can check that uses outside a loop only in + // unreachable block. + if (DT) + assert(!DT->isReachableFromEntry(U) && + "Unexpected user in reachable block"); + U.set(Undef); + } + auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); + if (!DVI) + continue; + auto Key = + DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); + if (Key != DeadDebugSet.end()) + continue; + DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); + DeadDebugInst.push_back(DVI); } - auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); - if (!DVI) - continue; - auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); - if (Key != DeadDebugSet.end()) - continue; - DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); - DeadDebugInst.push_back(DVI); - } - // After the loop has been deleted all the values defined and modified - // inside the loop are going to be unavailable. - // Since debug values in the loop have been deleted, inserting an undef - // dbg.value truncates the range of any dbg.value before the loop where the - // loop used to be. This is particularly important for constant values. - DIBuilder DIB(*ExitBlock->getModule()); - Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); - assert(InsertDbgValueBefore && - "There should be a non-PHI instruction in exit block, else these " - "instructions will have no parent."); - for (auto *DVI : DeadDebugInst) - DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), - DVI->getVariable(), DVI->getExpression(), - DVI->getDebugLoc(), InsertDbgValueBefore); + // After the loop has been deleted all the values defined and modified + // inside the loop are going to be unavailable. + // Since debug values in the loop have been deleted, inserting an undef + // dbg.value truncates the range of any dbg.value before the loop where the + // loop used to be. This is particularly important for constant values. + DIBuilder DIB(*ExitBlock->getModule()); + Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); + assert(InsertDbgValueBefore && + "There should be a non-PHI instruction in exit block, else these " + "instructions will have no parent."); + for (auto *DVI : DeadDebugInst) + DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), + DVI->getVariable(), DVI->getExpression(), + DVI->getDebugLoc(), InsertDbgValueBefore); + } // Remove the block from the reference counting scheme, so that we can // delete it freely later. @@ -726,6 +761,51 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } } +static Loop *getOutermostLoop(Loop *L) { + while (Loop *Parent = L->getParentLoop()) + L = Parent; + return L; +} + +void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA) { + auto *Latch = L->getLoopLatch(); + assert(Latch && "multiple latches not yet supported"); + auto *Header = L->getHeader(); + Loop *OutermostLoop = getOutermostLoop(L); + + SE.forgetLoop(L); + + // Note: By splitting the backedge, and then explicitly making it unreachable + // we gracefully handle corner cases such as non-bottom tested loops and the + // like. We also have the benefit of being able to reuse existing well tested + // code. It might be worth special casing the common bottom tested case at + // some point to avoid code churn. + + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + + auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get()); + + DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager); + (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false, + /*PreserveLCSSA*/true, &DTU, MSSAU.get()); + + // Erase (and destroy) this loop instance. Handles relinking sub-loops + // and blocks within the loop as needed. + LI.erase(L); + + // If the loop we broke had a parent, then changeToUnreachable might have + // caused a block to be removed from the parent loop (see loop_nest_lcssa + // test case in zero-btc.ll for an example), thus changing the parent's + // exit blocks. If that happened, we need to rebuild LCSSA on the outermost + // loop which might have a had a block removed. + if (OutermostLoop != L) + formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); +} + + /// Checks if \p L has single exit through latch block except possibly /// "deoptimizing" exits. Returns branch instruction terminating the loop /// latch if above check is successful, nullptr otherwise. @@ -838,30 +918,29 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, return true; } -Value *llvm::createMinMaxOp(IRBuilderBase &Builder, - RecurrenceDescriptor::MinMaxRecurrenceKind RK, - Value *Left, Value *Right) { - CmpInst::Predicate P = CmpInst::ICMP_NE; +Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, + Value *Right) { + CmpInst::Predicate Pred; switch (RK) { default: llvm_unreachable("Unknown min/max recurrence kind"); - case RecurrenceDescriptor::MRK_UIntMin: - P = CmpInst::ICMP_ULT; + case RecurKind::UMin: + Pred = CmpInst::ICMP_ULT; break; - case RecurrenceDescriptor::MRK_UIntMax: - P = CmpInst::ICMP_UGT; + case RecurKind::UMax: + Pred = CmpInst::ICMP_UGT; break; - case RecurrenceDescriptor::MRK_SIntMin: - P = CmpInst::ICMP_SLT; + case RecurKind::SMin: + Pred = CmpInst::ICMP_SLT; break; - case RecurrenceDescriptor::MRK_SIntMax: - P = CmpInst::ICMP_SGT; + case RecurKind::SMax: + Pred = CmpInst::ICMP_SGT; break; - case RecurrenceDescriptor::MRK_FloatMin: - P = CmpInst::FCMP_OLT; + case RecurKind::FMin: + Pred = CmpInst::FCMP_OLT; break; - case RecurrenceDescriptor::MRK_FloatMax: - P = CmpInst::FCMP_OGT; + case RecurKind::FMax: + Pred = CmpInst::FCMP_OGT; break; } @@ -871,17 +950,15 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, FastMathFlags FMF; FMF.setFast(); Builder.setFastMathFlags(FMF); - Value *Cmp = Builder.CreateCmp(P, Left, Right, "rdx.minmax.cmp"); + Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp"); Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; } // Helper to generate an ordered reduction. -Value * -llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, - unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - ArrayRef<Value *> RedOps) { +Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, + unsigned Op, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); // Extract and apply reduction ops in ascending order: @@ -895,9 +972,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, "bin.rdx"); } else { - assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && "Invalid min/max"); - Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext); + Result = createMinMaxOp(Builder, RdxKind, Result, Ext); } if (!RedOps.empty()) @@ -908,10 +985,9 @@ llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, } // Helper to generate a log2 shuffle reduction. -Value * -llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - ArrayRef<Value *> RedOps) { +Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, + unsigned Op, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each @@ -928,17 +1004,16 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, // Fill the rest of the mask with undef. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf"); + Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { // The builder propagates its fast-math-flags setting. TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"); } else { - assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) && "Invalid min/max"); - TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf); + TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf); } if (!RedOps.empty()) propagateIRFlags(TmpVec, RedOps); @@ -952,124 +1027,62 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } -/// Create a simple vector reduction specified by an opcode and some -/// flags (if generating min/max reductions). -Value *llvm::createSimpleTargetReduction( - IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode, - Value *Src, TargetTransformInfo::ReductionFlags Flags, - ArrayRef<Value *> RedOps) { - auto *SrcVTy = cast<VectorType>(Src->getType()); - - std::function<Value *()> BuildFunc; - using RD = RecurrenceDescriptor; - RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; - - switch (Opcode) { - case Instruction::Add: - BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; - break; - case Instruction::Mul: - BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; - break; - case Instruction::And: - BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; - break; - case Instruction::Or: - BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; - break; - case Instruction::Xor: - BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; - break; - case Instruction::FAdd: - BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce( - Constant::getNullValue(SrcVTy->getElementType()), Src); - return Rdx; - }; - break; - case Instruction::FMul: - BuildFunc = [&]() { - Type *Ty = SrcVTy->getElementType(); - auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); - return Rdx; - }; - break; - case Instruction::ICmp: - if (Flags.IsMaxOp) { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; - BuildFunc = [&]() { - return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); - }; - } else { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; - BuildFunc = [&]() { - return Builder.CreateIntMinReduce(Src, Flags.IsSigned); - }; - } - break; - case Instruction::FCmp: - if (Flags.IsMaxOp) { - MinMaxKind = RD::MRK_FloatMax; - BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; - } else { - MinMaxKind = RD::MRK_FloatMin; - BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; - } - break; +Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + Value *Src, RecurKind RdxKind, + ArrayRef<Value *> RedOps) { + unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); + TargetTransformInfo::ReductionFlags RdxFlags; + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || + RdxKind == RecurKind::FMax; + RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; + if (!ForceReductionIntrinsic && + !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) + return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + + auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType(); + switch (RdxKind) { + case RecurKind::Add: + return Builder.CreateAddReduce(Src); + case RecurKind::Mul: + return Builder.CreateMulReduce(Src); + case RecurKind::And: + return Builder.CreateAndReduce(Src); + case RecurKind::Or: + return Builder.CreateOrReduce(Src); + case RecurKind::Xor: + return Builder.CreateXorReduce(Src); + case RecurKind::FAdd: + return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), + Src); + case RecurKind::FMul: + return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); + case RecurKind::SMax: + return Builder.CreateIntMaxReduce(Src, true); + case RecurKind::SMin: + return Builder.CreateIntMinReduce(Src, true); + case RecurKind::UMax: + return Builder.CreateIntMaxReduce(Src, false); + case RecurKind::UMin: + return Builder.CreateIntMinReduce(Src, false); + case RecurKind::FMax: + return Builder.CreateFPMaxReduce(Src); + case RecurKind::FMin: + return Builder.CreateFPMinReduce(Src); default: llvm_unreachable("Unhandled opcode"); - break; } - if (ForceReductionIntrinsic || - TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) - return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } -/// Create a vector reduction using a given recurrence descriptor. Value *llvm::createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, - bool NoNaN) { + RecurrenceDescriptor &Desc, Value *Src) { // TODO: Support in-order reductions based on the recurrence descriptor. - using RD = RecurrenceDescriptor; - RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); - TargetTransformInfo::ReductionFlags Flags; - Flags.NoNaN = NoNaN; - // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. IRBuilderBase::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(Desc.getFastMathFlags()); - - switch (RecKind) { - case RD::RK_FloatAdd: - return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); - case RD::RK_FloatMult: - return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); - case RD::RK_IntegerAdd: - return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); - case RD::RK_IntegerMult: - return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); - case RD::RK_IntegerAnd: - return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); - case RD::RK_IntegerOr: - return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); - case RD::RK_IntegerXor: - return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); - case RD::RK_IntegerMinMax: { - RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); - Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); - Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); - return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); - } - case RD::RK_FloatMinMax: { - Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; - return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); - } - default: - llvm_unreachable("Unhandled RecKind"); - } + return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind()); } void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { @@ -1145,7 +1158,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { // producing an expression involving multiple pointers. Until then, we must // bail out here. // - // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject // because it understands lcssa phis while SCEV does not. Value *FromPtr = FromVal; Value *ToPtr = ToVal; @@ -1162,7 +1175,7 @@ static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) { // SCEV may have rewritten an expression that produces the GEP's pointer // operand. That's ok as long as the pointer operand has the same base - // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the // base of a recurrence. This handles the case in which SCEV expansion // converts a pointer type recurrence into a nonrecurrent pointer base // indexed by an integer recurrence. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 16bd08c704ee..599bd1feb2bc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -16,8 +16,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -32,29 +36,22 @@ static cl::opt<bool> cl::desc("Add no-alias annotation for instructions that " "are disambiguated by memchecks")); -LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, ScalarEvolution *SE, - bool UseLAIChecks) - : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT), +LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, + ArrayRef<RuntimePointerCheck> Checks, Loop *L, + LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE) + : VersionedLoop(L), NonVersionedLoop(nullptr), + AliasChecks(Checks.begin(), Checks.end()), + Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT), SE(SE) { - assert(L->getExitBlock() && "No single exit block"); - assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form"); - if (UseLAIChecks) { - setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.getPSE().getUnionPredicate()); - } -} - -void LoopVersioning::setAliasChecks(ArrayRef<RuntimePointerCheck> Checks) { - AliasChecks = {Checks.begin(), Checks.end()}; -} - -void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { - Preds = std::move(Check); + assert(L->getUniqueExitBlock() && "No single exit block"); } void LoopVersioning::versionLoop( const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + assert(VersionedLoop->isLoopSimplifyForm() && + "Loop is not in loop-simplify form"); + Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; Value *SCEVRuntimeCheck; @@ -67,11 +64,10 @@ void LoopVersioning::versionLoop( addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, RtPtrChecking.getSE()); - const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate(); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = - Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator()); + Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator()); auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); // Discard the SCEV runtime check if it is always true. @@ -122,6 +118,11 @@ void LoopVersioning::versionLoop( // Adds the necessary PHI nodes for the versioned loops based on the // loop-defined values used outside of the loop. addPHINodes(DefsUsedOutside); + formDedicatedExitBlocks(NonVersionedLoop, DT, LI, nullptr, true); + formDedicatedExitBlocks(VersionedLoop, DT, LI, nullptr, true); + assert(NonVersionedLoop->isLoopSimplifyForm() && + VersionedLoop->isLoopSimplifyForm() && + "The versioned loops should be in simplify form."); } void LoopVersioning::addPHINodes( @@ -253,47 +254,59 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, } namespace { +bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA, + DominatorTree *DT, ScalarEvolution *SE) { + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->isInnermost()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + if (!L->isLoopSimplifyForm() || !L->isRotatedForm() || + !L->getExitingBlock()) + continue; + const LoopAccessInfo &LAI = GetLAA(*L); + if (!LAI.hasConvergentOp() && + (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, + LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + } + } + + return Changed; +} + /// Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing /// array accesses from the loop. -class LoopVersioningPass : public FunctionPass { +class LoopVersioningLegacyPass : public FunctionPass { public: - LoopVersioningPass() : FunctionPass(ID) { - initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); + LoopVersioningLegacyPass() : FunctionPass(ID) { + initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + return getAnalysis<LoopAccessLegacyAnalysis>().getInfo(&L); + }; + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - // Build up a worklist of inner-loops to version. This is necessary as the - // act of versioning a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector<Loop *, 8> Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - const LoopAccessInfo &LAI = LAA->getInfo(L); - if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() && - (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { - LoopVersioning LVer(LAI, L, LI, DT, SE); - LVer.versionLoop(); - LVer.annotateLoopWithNoAlias(); - Changed = true; - } - } - - return Changed; + return runImpl(LI, GetLAA, DT, SE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -312,18 +325,45 @@ public: #define LVER_OPTION "loop-versioning" #define DEBUG_TYPE LVER_OPTION -char LoopVersioningPass::ID; +char LoopVersioningLegacyPass::ID; static const char LVer_name[] = "Loop Versioning"; -INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) namespace llvm { -FunctionPass *createLoopVersioningPass() { - return new LoopVersioningPass(); +FunctionPass *createLoopVersioningLegacyPass() { + return new LoopVersioningLegacyPass(); } + +PreservedAnalyses LoopVersioningPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + MemorySSA *MSSA = EnableMSSALoopDependency + ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() + : nullptr; + + auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, nullptr, MSSA}; + return LAM.getResult<LoopAccessAnalysis>(L, AR); + }; + + if (runImpl(&LI, GetLAA, &DT, &SE)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 0b225e8abc4e..fe0ff5899d8f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -48,7 +48,7 @@ static bool runImpl(Function &F) { bool Changed = false; for (BasicBlock &BB : F) if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { - SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end()); + SmallVector<Value *, 16> CallArgs(II->args()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); // Insert a normal call instruction... diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 34e836d9660f..ec8d7a7074cd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -26,6 +27,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -55,9 +57,9 @@ namespace { } // end anonymous namespace +namespace { // Return true iff R is covered by Ranges. -static bool IsInRanges(const IntRange &R, - const std::vector<IntRange> &Ranges) { +bool IsInRanges(const IntRange &R, const std::vector<IntRange> &Ranges) { // Note: Ranges must be sorted, non-overlapping and non-adjacent. // Find the first range whose High field is >= R.High, @@ -68,120 +70,34 @@ static bool IsInRanges(const IntRange &R, return I != Ranges.end() && I->Low <= R.Low; } -namespace { - - /// Replace all SwitchInst instructions with chained branch instructions. - class LowerSwitch : public FunctionPass { - public: - // Pass identification, replacement for typeid - static char ID; - - LowerSwitch() : FunctionPass(ID) { - initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LazyValueInfoWrapperPass>(); - } - - struct CaseRange { - ConstantInt* Low; - ConstantInt* High; - BasicBlock* BB; - - CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) - : Low(low), High(high), BB(bb) {} - }; - - using CaseVector = std::vector<CaseRange>; - using CaseItr = std::vector<CaseRange>::iterator; - - private: - void processSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock *> &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI); - - BasicBlock *switchConvert(CaseItr Begin, CaseItr End, - ConstantInt *LowerBound, ConstantInt *UpperBound, - Value *Val, BasicBlock *Predecessor, - BasicBlock *OrigBlock, BasicBlock *Default, - const std::vector<IntRange> &UnreachableRanges); - BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, - ConstantInt *LowerBound, ConstantInt *UpperBound, - BasicBlock *OrigBlock, BasicBlock *Default); - unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); - }; - - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const LowerSwitch::CaseRange& C1, - const LowerSwitch::CaseRange& C2) { - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - -} // end anonymous namespace - -char LowerSwitch::ID = 0; - -// Publicly exposed interface to pass... -char &llvm::LowerSwitchID = LowerSwitch::ID; - -INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) -INITIALIZE_PASS_END(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) - -// createLowerSwitchPass - Interface to this file... -FunctionPass *llvm::createLowerSwitchPass() { - return new LowerSwitch(); -} - -bool LowerSwitch::runOnFunction(Function &F) { - LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); - AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; - - bool Changed = false; - SmallPtrSet<BasicBlock*, 8> DeleteList; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks - - // If the block is a dead Default block that will be deleted later, don't - // waste time processing it. - if (DeleteList.count(Cur)) - continue; - - if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { - Changed = true; - processSwitchInst(SI, DeleteList, AC, LVI); - } - } - - for (BasicBlock* BB: DeleteList) { - LVI->eraseBlock(BB); - DeleteDeadBlock(BB); +struct CaseRange { + ConstantInt *Low; + ConstantInt *High; + BasicBlock *BB; + + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} +}; + +using CaseVector = std::vector<CaseRange>; +using CaseItr = std::vector<CaseRange>::iterator; + +/// The comparison function for sorting the switch case values in the vector. +/// WARNING: Case ranges should be disjoint! +struct CaseCmp { + bool operator()(const CaseRange &C1, const CaseRange &C2) { + const ConstantInt *CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt *CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); } - - return Changed; -} +}; /// Used for debugging purposes. LLVM_ATTRIBUTE_USED -static raw_ostream &operator<<(raw_ostream &O, - const LowerSwitch::CaseVector &C) { +raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) { O << "["; - for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end(); - B != E;) { + for (CaseVector::const_iterator B = C.begin(), E = C.end(); B != E;) { O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]"; if (++B != E) O << ", "; @@ -200,9 +116,9 @@ static raw_ostream &operator<<(raw_ostream &O, /// 2) Removed if subsequent incoming values now share the same case, i.e., /// multiple outcome edges are condensed into one. This is necessary to keep the /// number of phi values equal to the number of branches to SuccBB. -static void -fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, - const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { +void FixPhis( + BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI()->getIterator(); I != IE; ++I) { @@ -233,17 +149,80 @@ fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } } +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. +BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound, + ConstantInt *UpperBound, BasicBlock *OrigBlock, + BasicBlock *Default) { + Function *F = OrigBlock->getParent(); + BasicBlock *NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); + + // Emit comparison + ICmpInst *Comp = nullptr; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = + new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, Leaf.Low, "SwitchLeaf"); + } else { + // Make range comparison + if (Leaf.Low == LowerBound) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); + } else if (Leaf.High == UpperBound) { + // Val <= Max && Val >= Lo --> Val >= Lo + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, + "SwitchLeaf"); + } else if (Leaf.Low->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); + } else { + // Emit V-Lo <=u Hi-Lo + Constant *NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction *Add = BinaryOperator::CreateAdd( + Val, NegLo, Val->getName() + ".off", NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); + } + } + + // Make the conditional branch... + BasicBlock *Succ = Leaf.BB; + BranchInst::Create(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = Leaf.High->getSExtValue() - Leaf.Low->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + /// Convert the switch statement into a binary lookup of the case values. /// The function recursively builds this tree. LowerBound and UpperBound are /// used to keep track of the bounds for Val that have already been checked by /// a block emitted by one of the previous calls to switchConvert in the call /// stack. -BasicBlock * -LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, BasicBlock *OrigBlock, - BasicBlock *Default, - const std::vector<IntRange> &UnreachableRanges) { +BasicBlock *SwitchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { assert(LowerBound && UpperBound && "Bounds must be initialized"); unsigned Size = End - Begin; @@ -255,10 +234,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, if (Begin->Low == LowerBound && Begin->High == UpperBound) { unsigned NumMergedCases = 0; NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue(); - fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); + FixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); return Begin->BB; } - return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, + return NewLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, Default); } @@ -305,12 +284,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); - BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, - NewUpperBound, Val, NewNode, OrigBlock, - Default, UnreachableRanges); - BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, - UpperBound, Val, NewNode, OrigBlock, - Default, UnreachableRanges); + BasicBlock *LBranch = + SwitchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); + BasicBlock *RBranch = + SwitchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, + NewNode, OrigBlock, Default, UnreachableRanges); F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); NewNode->getInstList().push_back(Comp); @@ -319,78 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, return NewNode; } -/// Create a new leaf block for the binary lookup tree. It checks if the -/// switch's value == the case's value. If not, then it jumps to the default -/// branch. At this point in the tree, the value can't be another valid case -/// value, so the jump to the "default" branch is warranted. -BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val, - ConstantInt *LowerBound, - ConstantInt *UpperBound, - BasicBlock *OrigBlock, - BasicBlock *Default) { - Function* F = OrigBlock->getParent(); - BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); - F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); - - // Emit comparison - ICmpInst* Comp = nullptr; - if (Leaf.Low == Leaf.High) { - // Make the seteq instruction... - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, - Leaf.Low, "SwitchLeaf"); - } else { - // Make range comparison - if (Leaf.Low == LowerBound) { - // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, - "SwitchLeaf"); - } else if (Leaf.High == UpperBound) { - // Val <= Max && Val >= Lo --> Val >= Lo - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, - "SwitchLeaf"); - } else if (Leaf.Low->isZero()) { - // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf"); - } else { - // Emit V-Lo <=u Hi-Lo - Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); - Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, - Val->getName()+".off", - NewLeaf); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, - "SwitchLeaf"); - } - } - - // Make the conditional branch... - BasicBlock* Succ = Leaf.BB; - BranchInst::Create(Succ, Default, Comp, NewLeaf); - - // If there were any PHI nodes in this successor, rewrite one entry - // from OrigBlock to come from NewLeaf. - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode* PN = cast<PHINode>(I); - // Remove all but one incoming entries from the cluster - uint64_t Range = Leaf.High->getSExtValue() - - Leaf.Low->getSExtValue(); - for (uint64_t j = 0; j < Range; ++j) { - PN->removeIncomingValue(OrigBlock); - } - - int BlockIdx = PN->getBasicBlockIndex(OrigBlock); - assert(BlockIdx != -1 && "Switch didn't go to this successor??"); - PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); - } - - return NewLeaf; -} - /// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. /// \post \p Cases wouldn't contain references to \p SI's default BB. /// \returns Number of \p SI's cases that do not reference \p SI's default BB. -unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { +unsigned Clusterify(CaseVector &Cases, SwitchInst *SI) { unsigned NumSimpleCases = 0; // Start with "simple" cases @@ -431,9 +342,9 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { /// Replace the specified switch instruction with a sequence of chained if-then /// insts in a balanced binary search. -void LowerSwitch::processSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock *> &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI) { +void ProcessSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock *> &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI) { BasicBlock *OrigBlock = SI->getParent(); Function *F = OrigBlock->getParent(); Value *Val = SI->getCondition(); // The value we are switching on... @@ -458,7 +369,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, if (Cases.empty()) { BranchInst::Create(Default, OrigBlock); // Remove all the references from Default's PHIs to OrigBlock, but one. - fixPhis(Default, OrigBlock, OrigBlock); + FixPhis(Default, OrigBlock, OrigBlock); SI->eraseFromParent(); return; } @@ -489,7 +400,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, // TODO Shouldn't this create a signed range? ConstantRange KnownBitsRange = ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); - const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI); + const ConstantRange LVIRange = LVI->getConstantRange(Val, SI); ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); // We delegate removal of unreachable non-default cases to other passes. In // the unlikely event that some of them survived, we just conservatively @@ -563,10 +474,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, // cases. assert(MaxPop > 0 && PopSucc); Default = PopSucc; - Cases.erase( - llvm::remove_if( - Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), - Cases.end()); + llvm::erase_if(Cases, + [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }); // If there are no cases left, just branch. if (Cases.empty()) { @@ -592,12 +501,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, BranchInst::Create(Default, NewDefault); BasicBlock *SwitchBlock = - switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, OrigBlock, OrigBlock, NewDefault, UnreachableRanges); // If there are entries in any PHI nodes for the default edge, make sure // to update them as well. - fixPhis(Default, OrigBlock, NewDefault); + FixPhis(Default, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); @@ -607,6 +516,84 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, OrigBlock->getInstList().erase(SI); // If the Default block has no more predecessors just add it to DeleteList. - if (pred_begin(OldDefault) == pred_end(OldDefault)) + if (pred_empty(OldDefault)) DeleteList.insert(OldDefault); } + +bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { + bool Changed = false; + SmallPtrSet<BasicBlock *, 8> DeleteList; + + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *Cur = + &*I++; // Advance over block so we don't traverse new blocks + + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(Cur)) + continue; + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { + Changed = true; + ProcessSwitchInst(SI, DeleteList, AC, LVI); + } + } + + for (BasicBlock *BB : DeleteList) { + LVI->eraseBlock(BB); + DeleteDeadBlock(BB); + } + + return Changed; +} + +/// Replace all SwitchInst instructions with chained branch instructions. +class LowerSwitchLegacyPass : public FunctionPass { +public: + // Pass identification, replacement for typeid + static char ID; + + LowerSwitchLegacyPass() : FunctionPass(ID) { + initializeLowerSwitchLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LazyValueInfoWrapperPass>(); + } +}; + +} // end anonymous namespace + +char LowerSwitchLegacyPass::ID = 0; + +// Publicly exposed interface to pass... +char &llvm::LowerSwitchID = LowerSwitchLegacyPass::ID; + +INITIALIZE_PASS_BEGIN(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(LowerSwitchLegacyPass, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitchLegacyPass(); +} + +bool LowerSwitchLegacyPass::runOnFunction(Function &F) { + LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); + auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); + AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; + return LowerSwitch(F, LVI, AC); +} + +PreservedAnalyses LowerSwitchPass::run(Function &F, + FunctionAnalysisManager &AM) { + LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); + AssumptionCache *AC = AM.getCachedResult<AssumptionAnalysis>(F); + return LowerSwitch(F, LVI, AC) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp new file mode 100644 index 000000000000..6a137630deeb --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MatrixUtils.cpp @@ -0,0 +1,104 @@ +//===- MatrixUtils.cpp - Utilities to lower matrix intrinsics ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for generating tiled loops for matrix operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MatrixUtils.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, + Value *Bound, Value *Step, StringRef Name, + IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L, + LoopInfo &LI) { + LLVMContext &Ctx = Preheader->getContext(); + BasicBlock *Header = BasicBlock::Create( + Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit); + BasicBlock *Body = BasicBlock::Create(Header->getContext(), Name + ".body", + Header->getParent(), Exit); + BasicBlock *Latch = BasicBlock::Create(Header->getContext(), Name + ".latch", + Header->getParent(), Exit); + + Type *I32Ty = Type::getInt64Ty(Ctx); + BranchInst::Create(Body, Header); + BranchInst::Create(Latch, Body); + PHINode *IV = + PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()); + IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader); + + B.SetInsertPoint(Latch); + Value *Inc = B.CreateAdd(IV, Step, Name + ".step"); + Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond"); + BranchInst::Create(Header, Exit, Cond, Latch); + IV->addIncoming(Inc, Latch); + + BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator()); + BasicBlock *Tmp = PreheaderBr->getSuccessor(0); + PreheaderBr->setSuccessor(0, Header); + DTU.applyUpdatesPermissive({ + {DominatorTree::Delete, Preheader, Tmp}, + {DominatorTree::Insert, Header, Body}, + {DominatorTree::Insert, Body, Latch}, + {DominatorTree::Insert, Latch, Header}, + {DominatorTree::Insert, Latch, Exit}, + {DominatorTree::Insert, Preheader, Header}, + }); + + L->addBasicBlockToLoop(Header, LI); + L->addBasicBlockToLoop(Body, LI); + L->addBasicBlockToLoop(Latch, LI); + return Body; +} + +// Creates the following loop nest skeleton: +// for C = 0; C < NumColumns; C += TileSize +// for R = 0; R < NumRows; R += TileSize +// for K = 0; K < Inner ; K += TileSize +BasicBlock *TileInfo::CreateTiledLoops(BasicBlock *Start, BasicBlock *End, + IRBuilderBase &B, DomTreeUpdater &DTU, + LoopInfo &LI) { + Loop *ColLoop = LI.AllocateLoop(); + Loop *RowLoop = LI.AllocateLoop(); + Loop *InnerLoop = LI.AllocateLoop(); + RowLoop->addChildLoop(InnerLoop); + ColLoop->addChildLoop(RowLoop); + if (Loop *ParentL = LI.getLoopFor(Start)) + ParentL->addChildLoop(ColLoop); + else + LI.addTopLevelLoop(ColLoop); + + BasicBlock *ColBody = + CreateLoop(Start, End, B.getInt64(NumColumns), B.getInt64(TileSize), + "cols", B, DTU, ColLoop, LI); + BasicBlock *ColLatch = ColBody->getSingleSuccessor(); + BasicBlock *RowBody = + CreateLoop(ColBody, ColLatch, B.getInt64(NumRows), B.getInt64(TileSize), + "rows", B, DTU, RowLoop, LI); + RowLoopLatch = RowBody->getSingleSuccessor(); + + BasicBlock *InnerBody = + CreateLoop(RowBody, RowLoopLatch, B.getInt64(NumInner), + B.getInt64(TileSize), "inner", B, DTU, InnerLoop, LI); + InnerLoopLatch = InnerBody->getSingleSuccessor(); + ColumnLoopHeader = ColBody->getSinglePredecessor(); + RowLoopHeader = RowBody->getSinglePredecessor(); + InnerLoopHeader = InnerBody->getSinglePredecessor(); + CurrentRow = &*RowLoopHeader->begin(); + CurrentCol = &*ColumnLoopHeader->begin(); + CurrentK = &*InnerLoopHeader->begin(); + + return InnerBody; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 7f961dbaf4b4..e350320e7569 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -25,6 +26,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/InitializePasses.h" @@ -40,123 +42,125 @@ static const char *const metaNames[] = { }; namespace { +// This PRNG is from the ISO C spec. It is intentionally simple and +// unsuitable for cryptographic use. We're just looking for enough +// variety to surprise and delight users. +struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { next = seed; } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } +}; - // This PRNG is from the ISO C spec. It is intentionally simple and - // unsuitable for cryptographic use. We're just looking for enough - // variety to surprise and delight users. - struct PRNG { - unsigned long next; +struct Renamer { + Renamer(unsigned int seed) { prng.srand(seed); } - void srand(unsigned int seed) { - next = seed; - } + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } - int rand() { - next = next * 1103515245 + 12345; - return (unsigned int)(next / 65536) % 32768; - } - }; + PRNG prng; +}; - struct Renamer { - Renamer(unsigned int seed) { - prng.srand(seed); - } +void MetaRename(Function &F) { + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); - const char *newName() { - return metaNames[prng.rand() % array_lengthof(metaNames)]; - } + for (auto &BB : F) { + BB.setName("bb"); - PRNG prng; - }; + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); + } +} - struct MetaRenamer : public ModulePass { - // Pass identification, replacement for typeid - static char ID; - - MetaRenamer() : ModulePass(ID) { - initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.setPreservesAll(); - } - - bool runOnModule(Module &M) override { - // Seed our PRNG with simple additive sum of ModuleID. We're looking to - // simply avoid always having the same function names, and we need to - // remain deterministic. - unsigned int randSeed = 0; - for (auto C : M.getModuleIdentifier()) - randSeed += C; - - Renamer renamer(randSeed); - - // Rename all aliases - for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { - StringRef Name = AI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - AI->setName("alias"); - } - - // Rename all global variables - for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - StringRef Name = GI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - GI->setName("global"); - } - - // Rename all struct types - TypeFinder StructTypes; - StructTypes.run(M, true); - for (StructType *STy : StructTypes) { - if (STy->isLiteral() || STy->getName().empty()) continue; - - SmallString<128> NameStorage; - STy->setName((Twine("struct.") + - renamer.newName()).toStringRef(NameStorage)); - } - - // Rename all functions - for (auto &F : M) { - StringRef Name = F.getName(); - LibFunc Tmp; - // Leave library functions alone because their presence or absence could - // affect the behavior of other passes. - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F).getLibFunc( - F, Tmp)) - continue; - - // Leave @main alone. The output of -metarenamer might be passed to - // lli for execution and the latter needs a main entry point. - if (Name != "main") - F.setName(renamer.newName()); - - runOnFunction(F); - } - return true; - } - - bool runOnFunction(Function &F) { - for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->getType()->isVoidTy()) - AI->setName("arg"); - - for (auto &BB : F) { - BB.setName("bb"); - - for (auto &I : BB) - if (!I.getType()->isVoidTy()) - I.setName("tmp"); - } - return true; - } - }; +void MetaRename(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (auto C : M.getModuleIdentifier()) + randSeed += C; + + Renamer renamer(randSeed); + + // Rename all aliases + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + AI->setName("alias"); + } + + // Rename all global variables + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + + GI->setName("global"); + } + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *STy : StructTypes) { + if (STy->isLiteral() || STy->getName().empty()) + continue; + + SmallString<128> NameStorage; + STy->setName( + (Twine("struct.") + renamer.newName()).toStringRef(NameStorage)); + } + + // Rename all functions + for (auto &F : M) { + StringRef Name = F.getName(); + LibFunc Tmp; + // Leave library functions alone because their presence or absence could + // affect the behavior of other passes. + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + GetTLI(F).getLibFunc(F, Tmp)) + continue; + + // Leave @main alone. The output of -metarenamer might be passed to + // lli for execution and the latter needs a main entry point. + if (Name != "main") + F.setName(renamer.newName()); + + MetaRename(F); + } +} + +struct MetaRenamer : public ModulePass { + // Pass identification, replacement for typeid + static char ID; + + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + MetaRename(M, GetTLI); + return true; + } +}; } // end anonymous namespace @@ -175,3 +179,14 @@ INITIALIZE_PASS_END(MetaRenamer, "metarenamer", ModulePass *llvm::createMetaRenamerPass() { return new MetaRenamer(); } + +PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + MetaRename(M, GetTLI); + + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp deleted file mode 100644 index a16ca1fb8efa..000000000000 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp +++ /dev/null @@ -1,178 +0,0 @@ -//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This contains code to emit warnings for potentially incorrect usage of the -// llvm.expect intrinsic. This utility extracts the threshold values from -// metadata associated with the instrumented Branch or Switch instruction. The -// threshold values are then used to determine if a warning should be emmited. -// -// MisExpect metadata is generated when llvm.expect intrinsics are lowered see -// LowerExpectIntrinsic.cpp -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/MisExpect.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" -#include <cstdint> -#include <functional> -#include <numeric> - -#define DEBUG_TYPE "misexpect" - -using namespace llvm; -using namespace misexpect; - -namespace llvm { - -// Command line option to enable/disable the warning when profile data suggests -// a mismatch with the use of the llvm.expect intrinsic -static cl::opt<bool> PGOWarnMisExpect( - "pgo-warn-misexpect", cl::init(false), cl::Hidden, - cl::desc("Use this option to turn on/off " - "warnings about incorrect usage of llvm.expect intrinsics.")); - -} // namespace llvm - -namespace { - -Instruction *getOprndOrInst(Instruction *I) { - assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); - Instruction *Ret = nullptr; - if (auto *B = dyn_cast<BranchInst>(I)) { - Ret = dyn_cast<Instruction>(B->getCondition()); - } - // TODO: Find a way to resolve condition location for switches - // Using the condition of the switch seems to often resolve to an earlier - // point in the program, i.e. the calculation of the switch condition, rather - // than the switches location in the source code. Thus, we should use the - // instruction to get source code locations rather than the condition to - // improve diagnostic output, such as the caret. If the same problem exists - // for branch instructions, then we should remove this function and directly - // use the instruction - // - // else if (auto S = dyn_cast<SwitchInst>(I)) { - // Ret = I; - //} - return Ret ? Ret : I; -} - -void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, - uint64_t ProfCount, uint64_t TotalCount) { - double PercentageCorrect = (double)ProfCount / TotalCount; - auto PerString = - formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); - auto RemStr = formatv( - "Potential performance regression from use of the llvm.expect intrinsic: " - "Annotation was correct on {0} of profiled executions.", - PerString); - Twine Msg(PerString); - Instruction *Cond = getOprndOrInst(I); - if (PGOWarnMisExpect) - Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); - OptimizationRemarkEmitter ORE(I->getParent()->getParent()); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); -} - -} // namespace - -namespace llvm { -namespace misexpect { - -void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights, - LLVMContext &Ctx) { - if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) { - auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0)); - if (MisExpectDataName && - MisExpectDataName->getString().equals("misexpect")) { - LLVM_DEBUG(llvm::dbgs() << "------------------\n"); - LLVM_DEBUG(llvm::dbgs() - << "Function: " << I->getFunction()->getName() << "\n"); - LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n"); - LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) { - llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n"; - }); - - // extract values from misexpect metadata - const auto *IndexCint = - mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1)); - const auto *LikelyCInt = - mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2)); - const auto *UnlikelyCInt = - mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3)); - - if (!IndexCint || !LikelyCInt || !UnlikelyCInt) - return; - - const uint64_t Index = IndexCint->getZExtValue(); - const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue(); - const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue(); - const uint64_t ProfileCount = Weights[Index]; - const uint64_t CaseTotal = std::accumulate( - Weights.begin(), Weights.end(), (uint64_t)0, std::plus<uint64_t>()); - const uint64_t NumUnlikelyTargets = Weights.size() - 1; - - const uint64_t TotalBranchWeight = - LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); - - const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight, - TotalBranchWeight); - uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal); - - LLVM_DEBUG(llvm::dbgs() - << "Unlikely Targets: " << NumUnlikelyTargets << ":\n"); - LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n"); - LLVM_DEBUG(llvm::dbgs() - << "Scaled Threshold: " << ScaledThreshold << ":\n"); - LLVM_DEBUG(llvm::dbgs() << "------------------\n"); - if (ProfileCount < ScaledThreshold) - emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal); - } - } -} - -void checkFrontendInstrumentation(Instruction &I) { - if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) { - unsigned NOps = MD->getNumOperands(); - - // Only emit misexpect diagnostics if at least 2 branch weights are present. - // Less than 2 branch weights means that the profiling metadata is: - // 1) incorrect/corrupted - // 2) not branch weight metadata - // 3) completely deterministic - // In these cases we should not emit any diagnostic related to misexpect. - if (NOps < 3) - return; - - // Operand 0 is a string tag "branch_weights" - if (MDString *Tag = cast<MDString>(MD->getOperand(0))) { - if (Tag->getString().equals("branch_weights")) { - SmallVector<uint32_t, 4> RealWeights(NOps - 1); - for (unsigned i = 1; i < NOps; i++) { - ConstantInt *Value = - mdconst::dyn_extract<ConstantInt>(MD->getOperand(i)); - RealWeights[i - 1] = Value->getZExtValue(); - } - verifyMisExpect(&I, RealWeights, I.getContext()); - } - } - } -} - -} // namespace misexpect -} // namespace llvm -#undef DEBUG_TYPE diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 99b64a7462f6..3312a6f9459b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -53,6 +53,10 @@ static cl::opt<bool> VerifyPredicateInfo( DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", "Controls which variables are renamed with predicateinfo"); +// Maximum number of conditions considered for renaming for each branch/assume. +// This limits renaming of deep and/or chains. +static const unsigned MaxCondsPerBranch = 8; + namespace { // Given a predicate info that is a type of branching terminator, get the // branching block. @@ -367,6 +371,13 @@ void PredicateInfoBuilder::convertUsesToDFSOrdered( } } +bool shouldRename(Value *V) { + // Only want real values, not constants. Additionally, operands with one use + // are only being used in the comparison, which means they will not be useful + // for us to consider for predicateinfo. + return (isa<Instruction>(V) || isa<Argument>(V)) && !V->hasOneUse(); +} + // Collect relevant operations from Comparison that we may want to insert copies // for. void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { @@ -374,15 +385,9 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { auto *Op1 = Comparison->getOperand(1); if (Op0 == Op1) return; - CmpOperands.push_back(Comparison); - // Only want real values, not constants. Additionally, operands with one use - // are only being used in the comparison, which means they will not be useful - // for us to consider for predicateinfo. - // - if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse()) - CmpOperands.push_back(Op0); - if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse()) - CmpOperands.push_back(Op1); + + CmpOperands.push_back(Op0); + CmpOperands.push_back(Op1); } // Add Op, PB to the list of value infos for Op, and mark Op to be renamed. @@ -400,38 +405,32 @@ void PredicateInfoBuilder::addInfoFor(SmallVectorImpl<Value *> &OpsToRename, void PredicateInfoBuilder::processAssume( IntrinsicInst *II, BasicBlock *AssumeBB, SmallVectorImpl<Value *> &OpsToRename) { - // See if we have a comparison we support - SmallVector<Value *, 8> CmpOperands; - SmallVector<Value *, 2> ConditionsToProcess; - CmpInst::Predicate Pred; - Value *Operand = II->getOperand(0); - if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value())) - .match(II->getOperand(0))) { - ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0)); - ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1)); - ConditionsToProcess.push_back(Operand); - } else if (isa<CmpInst>(Operand)) { - - ConditionsToProcess.push_back(Operand); - } - for (auto Cond : ConditionsToProcess) { - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { - collectCmpOps(Cmp, CmpOperands); - // Now add our copy infos for our operands - for (auto *Op : CmpOperands) { - auto *PA = new PredicateAssume(Op, II, Cmp); - addInfoFor(OpsToRename, Op, PA); + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(II->getOperand(0)); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) + continue; + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } + + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + auto *PA = new PredicateAssume(V, II, Cond); + addInfoFor(OpsToRename, V, PA); } - CmpOperands.clear(); - } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { - // Otherwise, it should be an AND. - assert(BinOp->getOpcode() == Instruction::And && - "Should have been an AND"); - auto *PA = new PredicateAssume(BinOp, II, BinOp); - addInfoFor(OpsToRename, BinOp, PA); - } else { - llvm_unreachable("Unknown type of condition"); } } } @@ -443,68 +442,46 @@ void PredicateInfoBuilder::processBranch( SmallVectorImpl<Value *> &OpsToRename) { BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); - SmallVector<BasicBlock *, 2> SuccsToProcess; - SuccsToProcess.push_back(FirstBB); - SuccsToProcess.push_back(SecondBB); - SmallVector<Value *, 2> ConditionsToProcess; - - auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) { - for (auto *Succ : SuccsToProcess) { - // Don't try to insert on a self-edge. This is mainly because we will - // eliminate during renaming anyway. - if (Succ == BranchBB) - continue; - bool TakenEdge = (Succ == FirstBB); - // For and, only insert on the true edge - // For or, only insert on the false edge - if ((isAnd && !TakenEdge) || (isOr && TakenEdge)) + + for (BasicBlock *Succ : {FirstBB, SecondBB}) { + bool TakenEdge = Succ == FirstBB; + // Don't try to insert on a self-edge. This is mainly because we will + // eliminate during renaming anyway. + if (Succ == BranchBB) + continue; + + SmallVector<Value *, 4> Worklist; + SmallPtrSet<Value *, 4> Visited; + Worklist.push_back(BI->getCondition()); + while (!Worklist.empty()) { + Value *Cond = Worklist.pop_back_val(); + if (!Visited.insert(Cond).second) continue; - PredicateBase *PB = - new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge); - addInfoFor(OpsToRename, Op, PB); - if (!Succ->getSinglePredecessor()) - EdgeUsesOnly.insert({BranchBB, Succ}); - } - }; + if (Visited.size() > MaxCondsPerBranch) + break; + + Value *Op0, *Op1; + if (TakenEdge ? match(Cond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) + : match(Cond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { + Worklist.push_back(Op1); + Worklist.push_back(Op0); + } - // Match combinations of conditions. - CmpInst::Predicate Pred; - bool isAnd = false; - bool isOr = false; - SmallVector<Value *, 8> CmpOperands; - if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value()))) || - match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value())))) { - auto *BinOp = cast<BinaryOperator>(BI->getCondition()); - if (BinOp->getOpcode() == Instruction::And) - isAnd = true; - else if (BinOp->getOpcode() == Instruction::Or) - isOr = true; - ConditionsToProcess.push_back(BinOp->getOperand(0)); - ConditionsToProcess.push_back(BinOp->getOperand(1)); - ConditionsToProcess.push_back(BI->getCondition()); - } else if (isa<CmpInst>(BI->getCondition())) { - ConditionsToProcess.push_back(BI->getCondition()); - } - for (auto Cond : ConditionsToProcess) { - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { - collectCmpOps(Cmp, CmpOperands); - // Now add our copy infos for our operands - for (auto *Op : CmpOperands) - InsertHelper(Op, isAnd, isOr, Cmp); - } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { - // This must be an AND or an OR. - assert((BinOp->getOpcode() == Instruction::And || - BinOp->getOpcode() == Instruction::Or) && - "Should have been an AND or an OR"); - // The actual value of the binop is not subject to the same restrictions - // as the comparison. It's either true or false on the true/false branch. - InsertHelper(BinOp, false, false, BinOp); - } else { - llvm_unreachable("Unknown type of condition"); + SmallVector<Value *, 4> Values; + Values.push_back(Cond); + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) + collectCmpOps(Cmp, Values); + + for (Value *V : Values) { + if (shouldRename(V)) { + PredicateBase *PB = + new PredicateBranch(V, BranchBB, Succ, Cond, TakenEdge); + addInfoFor(OpsToRename, V, PB); + if (!Succ->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, Succ}); + } + } } - CmpOperands.clear(); } } // Process a block terminating switch, and place relevant operations to be @@ -822,6 +799,56 @@ PredicateInfo::~PredicateInfo() { } } +Optional<PredicateConstraint> PredicateBase::getConstraint() const { + switch (Type) { + case PT_Assume: + case PT_Branch: { + bool TrueEdge = true; + if (auto *PBranch = dyn_cast<PredicateBranch>(this)) + TrueEdge = PBranch->TrueEdge; + + if (Condition == RenamedOp) { + return {{CmpInst::ICMP_EQ, + TrueEdge ? ConstantInt::getTrue(Condition->getType()) + : ConstantInt::getFalse(Condition->getType())}}; + } + + CmpInst *Cmp = dyn_cast<CmpInst>(Condition); + if (!Cmp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + CmpInst::Predicate Pred; + Value *OtherOp; + if (Cmp->getOperand(0) == RenamedOp) { + Pred = Cmp->getPredicate(); + OtherOp = Cmp->getOperand(1); + } else if (Cmp->getOperand(1) == RenamedOp) { + Pred = Cmp->getSwappedPredicate(); + OtherOp = Cmp->getOperand(0); + } else { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + // Invert predicate along false edge. + if (!TrueEdge) + Pred = CmpInst::getInversePredicate(Pred); + + return {{Pred, OtherOp}}; + } + case PT_Switch: + if (Condition != RenamedOp) { + // TODO: Make this an assertion once RenamedOp is fully accurate. + return None; + } + + return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}}; + } + llvm_unreachable("Unknown predicate type"); +} + void PredicateInfo::verifyPredicateInfo() const {} char PredicateInfoPrinterLegacyPass::ID = 0; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index c7e9c919ec47..86bbb6a889e6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -62,10 +62,6 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { - // FIXME: If the memory unit is of pointer or integer type, we can permit - // assignments to subsections of the memory unit. - unsigned AS = AI->getType()->getAddressSpace(); - // Only allow direct and non-volatile loads and stores... for (const User *U : AI->users()) { if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { @@ -81,19 +77,18 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (!II->isLifetimeStartOrEnd()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) return false; } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) return false; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; if (!GEPI->hasAllZeroIndices()) return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) + if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + return false; + } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) { + if (!onlyUsedByLifetimeMarkers(ASCI)) return false; } else { return false; @@ -106,6 +101,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { namespace { struct AllocaInfo { + using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>; + SmallVector<BasicBlock *, 32> DefiningBlocks; SmallVector<BasicBlock *, 32> UsingBlocks; @@ -113,7 +110,7 @@ struct AllocaInfo { BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; - TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares; + DbgUserVec DbgUsers; void clear() { DefiningBlocks.clear(); @@ -121,7 +118,7 @@ struct AllocaInfo { OnlyStore = nullptr; OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - DbgDeclares.clear(); + DbgUsers.clear(); } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -132,8 +129,8 @@ struct AllocaInfo { // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - Instruction *User = cast<Instruction>(*UI++); + for (User *U : AI->users()) { + Instruction *User = cast<Instruction>(U); if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Remember the basic blocks which define new values for the alloca @@ -154,7 +151,7 @@ struct AllocaInfo { } } - DbgDeclares = FindDbgAddrUses(AI); + findDbgUsers(DbgUsers, AI); } }; @@ -252,7 +249,7 @@ struct PromoteMem2Reg { /// For each alloca, we keep track of the dbg.declare intrinsic that /// describes it, if any, so that we can convert it to a dbg.value /// intrinsic if the alloca gets promoted. - SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares; + SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers; /// The set of basic blocks the renamer has already visited. SmallPtrSet<BasicBlock *, 16> Visited; @@ -312,23 +309,37 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { +static void removeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast<Instruction>(*UI); + for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { + Instruction *I = cast<Instruction>(UI->getUser()); + Use &U = *UI; ++UI; if (isa<LoadInst>(I) || isa<StoreInst>(I)) continue; + // Drop the use of AI in droppable instructions. + if (I->isDroppable()) { + I->dropDroppableUse(U); + continue; + } + if (!I->getType()->isVoidTy()) { // The only users of this bitcast/GEP instruction are lifetime intrinsics. // Follow the use/def chain to erase them now instead of leaving it for // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast<Instruction>(*UUI); + for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) { + Instruction *Inst = cast<Instruction>(UUI->getUser()); + Use &UU = *UUI; ++UUI; + + // Drop the use of I in droppable instructions. + if (Inst->isDroppable()) { + Inst->dropDroppableUse(UU); + continue; + } Inst->eraseFromParent(); } } @@ -355,8 +366,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - Instruction *UserInst = cast<Instruction>(*UI++); + for (User *U : make_early_inc_range(AI->users())) { + Instruction *UserInst = cast<Instruction>(U); if (UserInst == OnlyStore) continue; LoadInst *LI = cast<LoadInst>(UserInst); @@ -412,10 +423,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); - DII->eraseFromParent(); + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); + DII->eraseFromParent(); + } else if (DII->getExpression()->startsWithDeref()) { + DII->eraseFromParent(); + } } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); @@ -465,8 +480,8 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - LoadInst *LI = dyn_cast<LoadInst>(*UI++); + for (User *U : make_early_inc_range(AI->users())) { + LoadInst *LI = dyn_cast<LoadInst>(U); if (!LI) continue; @@ -510,9 +525,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, SI, DIB); + for (DbgVariableIntrinsic *DII : Info.DbgUsers) { + if (DII->isAddressOfVariable()) { + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); + ConvertDebugDeclareToDebugValue(DII, SI, DIB); + } } SI->eraseFromParent(); LBI.deleteValue(SI); @@ -521,8 +538,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, AI->eraseFromParent(); // The alloca's debuginfo can be removed as well. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) - DII->eraseFromParent(); + for (DbgVariableIntrinsic *DII : Info.DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); ++NumLocalPromoted; return true; @@ -531,7 +549,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - AllocaDbgDeclares.resize(Allocas.size()); + AllocaDbgUsers.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; @@ -544,7 +562,7 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. @@ -589,8 +607,8 @@ void PromoteMem2Reg::run() { } // Remember the dbg.declare intrinsic describing this alloca, if any. - if (!Info.DbgDeclares.empty()) - AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares; + if (!Info.DbgUsers.empty()) + AllocaDbgUsers[AllocaNum] = Info.DbgUsers; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -663,9 +681,11 @@ void PromoteMem2Reg::run() { } // Remove alloca's dbg.declare instrinsics from the function. - for (auto &Declares : AllocaDbgDeclares) - for (auto *DII : Declares) - DII->eraseFromParent(); + for (auto &DbgUsers : AllocaDbgUsers) { + for (auto *DII : DbgUsers) + if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref()) + DII->eraseFromParent(); + } // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can @@ -720,7 +740,7 @@ void PromoteMem2Reg::run() { continue; // Get the preds for BB. - SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + SmallVector<BasicBlock *, 16> Preds(predecessors(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient @@ -887,7 +907,7 @@ NextIteration: // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); - unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); + unsigned NumEdges = llvm::count(successors(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); // Add entries for all the phis. @@ -905,8 +925,9 @@ NextIteration: // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; - for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo]) - ConvertDebugDeclareToDebugValue(DII, APN, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, APN, DIB); // Get the next phi node. ++PNI; @@ -965,8 +986,9 @@ NextIteration: // Record debuginfo for the store before removing it. IncomingLocs[AllocaNo] = SI->getDebugLoc(); - for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DII, SI, DIB); + for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) + if (DII->isAddressOfVariable()) + ConvertDebugDeclareToDebugValue(DII, SI, DIB); BB->getInstList().erase(SI); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 57df2334c750..c210d1c46077 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -64,8 +64,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { } Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { - AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB); - return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr; + return getAvailableVals(AV).lookup(BB); } void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { @@ -254,12 +253,10 @@ public: // We can get our predecessor info by walking the pred_iterator list, // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - Preds->append(SomePhi->block_begin(), SomePhi->block_end()); - } else { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Preds->push_back(*PI); - } + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) + append_range(*Preds, SomePhi->blocks()); + else + append_range(*Preds, predecessors(BB)); } /// GetUndefVal - Get an undefined value of the same type as the value @@ -283,12 +280,6 @@ public: PHI->addIncoming(Val, Pred); } - /// InstrIsPHI - Check if an instruction is a PHI. - /// - static PHINode *InstrIsPHI(Instruction *I) { - return dyn_cast<PHINode>(I); - } - /// ValueIsPHI - Check if a value is a PHI. static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { return dyn_cast<PHINode>(Val); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 71b48482f26a..6dbfb0b61fea 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -38,8 +39,7 @@ cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, -/// reusing an existing cast if a suitable one exists, moving an existing -/// cast if a suitable one exists but isn't in the right place, or +/// reusing an existing cast if a suitable one (= dominating IP) exists, or /// creating a new one. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, @@ -58,40 +58,38 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction *Ret = nullptr; // Check to see if there is already a cast! - for (User *U : V->users()) - if (U->getType() == Ty) - if (CastInst *CI = dyn_cast<CastInst>(U)) - if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, create a new cast at IP. - // Likewise, do not reuse a cast at BIP because it must dominate - // instructions that might be inserted before BIP. - if (BasicBlock::iterator(CI) != IP || BIP == IP) { - // Create a new cast, and leave the old cast in place in case - // it is being used as an insert point. - Ret = CastInst::Create(Op, V, Ty, "", &*IP); - Ret->takeName(CI); - CI->replaceAllUsesWith(Ret); - break; - } - Ret = CI; - break; - } + for (User *U : V->users()) { + if (U->getType() != Ty) + continue; + CastInst *CI = dyn_cast<CastInst>(U); + if (!CI || CI->getOpcode() != Op) + continue; + + // Found a suitable cast that is at IP or comes before IP. Use it. Note that + // the cast must also properly dominate the Builder's insertion point. + if (IP->getParent() == CI->getParent() && &*BIP != CI && + (&*IP == CI || CI->comesBefore(&*IP))) { + Ret = CI; + break; + } + } // Create a new cast. - if (!Ret) + if (!Ret) { Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); + rememberInstruction(Ret); + } // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). assert(SE.DT.dominates(Ret, &*BIP)); - rememberInstruction(Ret); return Ret; } -static BasicBlock::iterator findInsertPointAfter(Instruction *I, - BasicBlock *MustDominate) { +BasicBlock::iterator +SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { BasicBlock::iterator IP = ++I->getIterator(); if (auto *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); @@ -102,11 +100,17 @@ static BasicBlock::iterator findInsertPointAfter(Instruction *I, if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { ++IP; } else if (isa<CatchSwitchInst>(IP)) { - IP = MustDominate->getFirstInsertionPt(); + IP = MustDominate->getParent()->getFirstInsertionPt(); } else { assert(!IP->isEHPad() && "unexpected eh pad!"); } + // Adjust insert point to be after instructions inserted by the expander, so + // we can re-use already inserted instructions. Avoid skipping past the + // original \p MustDominate, in case it is an inserted instruction. + while (isInsertedInstruction(&*IP) && &*IP != MustDominate) + ++IP; + return IP; } @@ -122,6 +126,22 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && "InsertNoopCastOfTo cannot change sizes!"); + // inttoptr only works for integral pointers. For non-integral pointers, we + // can create a GEP on i8* null with the integral value as index. Note that + // it is safe to use GEP of null instead of inttoptr here, because only + // expressions already based on a GEP of null should be converted to pointers + // during expansion. + if (Op == Instruction::IntToPtr) { + auto *PtrTy = cast<PointerType>(Ty); + if (DL.isNonIntegralPointerType(PtrTy)) { + auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); + assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 && + "alloc size of i8 must by 1 byte for the GEP to be correct"); + auto *GEP = Builder.CreateGEP( + Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); + return Builder.CreateBitCast(GEP, Ty); + } + } // Short-circuit unnecessary bitcasts. if (Op == Instruction::BitCast) { if (V->getType() == Ty) @@ -166,7 +186,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { // Cast the instruction immediately after the instruction. Instruction *I = cast<Instruction>(V); - BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); + BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -238,7 +258,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, BO->setHasNoUnsignedWrap(); if (Flags & SCEV::FlagNSW) BO->setHasNoSignedWrap(); - rememberInstruction(BO); return BO; } @@ -290,7 +309,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) if (!C->getAPInt().srem(FC->getAPInt())) { - SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + SmallVector<const SCEV *, 4> NewMulOps(M->operands()); NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); S = SE.getMulExpr(NewMulOps); return true; @@ -462,9 +481,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // we didn't find any operands that could be factored, tentatively // assume that element zero was selected (since the zero offset // would obviously be folded away). - Value *Scaled = ScaledOps.empty() ? - Constant::getNullValue(Ty) : - expandCodeFor(SE.getAddExpr(ScaledOps), Ty); + Value *Scaled = + ScaledOps.empty() + ? Constant::getNullValue(Ty) + : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); GepIndices.push_back(Scaled); // Collect struct field index operands. @@ -523,7 +543,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); // Expand the operands for a plain byte offset. - Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) @@ -564,10 +584,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Emit a GEP. - Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); - rememberInstruction(GEP); - - return GEP; + return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); } { @@ -598,7 +615,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Casted = InsertNoopCastOfTo(Casted, PTy); Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); - rememberInstruction(GEP); } return expand(SE.getAddExpr(Ops)); @@ -748,14 +764,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. - Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false); Sum = InsertNoopCastOfTo(Sum, Ty); Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple add. - Value *W = expandCodeFor(Op, Ty); + Value *W = expandCodeForImpl(Op, Ty, false); Sum = InsertNoopCastOfTo(Sum, Ty); // Canonicalize a constant to the RHS. if (isa<Constant>(Sum)) std::swap(Sum, W); @@ -807,7 +823,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them // that are needed into the result. - Value *P = expandCodeFor(I->second, Ty); + Value *P = expandCodeForImpl(I->second, Ty, false); Value *Result = nullptr; if (Exponent & 1) Result = P; @@ -866,7 +882,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeFor(S->getLHS(), Ty); + Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) @@ -875,7 +891,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); } - Value *RHS = expandCodeFor(S->getRHS(), Ty); + Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false); return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); } @@ -895,7 +911,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); - SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); + SmallVector<const SCEV *, 8> NewAddOps(A->operands()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -1073,15 +1089,12 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), GEPPtrTy->getAddressSpace()); IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { + if (IncV->getType() != PN->getType()) IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } } else { IncV = useSubtract ? Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); } return IncV; } @@ -1193,6 +1206,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (!SE.isSCEVable(PN.getType())) continue; + // We should not look for a incomplete PHI. Getting SCEV for a incomplete + // PHI has no meaning at all. + if (!PN.isComplete()) { + DEBUG_WITH_TYPE( + DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); + continue; + } + const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); if (!PhiSCEV) continue; @@ -1253,6 +1274,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); + // Those values were not actually inserted but re-used. + ReusedValues.insert(AddRecPhiMatch); + ReusedValues.insert(IncV); return AddRecPhiMatch; } } @@ -1273,8 +1297,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Expand code for the start value into the loop preheader. assert(L->getLoopPreheader() && "Can't expand add recurrences without a loop preheader!"); - Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, - L->getLoopPreheader()->getTerminator()); + Value *StartV = + expandCodeForImpl(Normalized->getStart(), ExpandTy, + L->getLoopPreheader()->getTerminator(), false); // StartV must have been be inserted into L's preheader to dominate the new // phi. @@ -1292,7 +1317,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); + Value *StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a @@ -1306,7 +1332,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), Twine(IVName) + ".iv"); - rememberInstruction(PN); // Create the step instructions and populate the PHI. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { @@ -1415,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); + // We might be introducing a new use of the post-inc IV that is not poison + // safe, in which case we should drop poison generating flags. Only keep + // those flags for which SCEV has proven that they always hold. + if (isa<OverflowingBinaryOperator>(Result)) { + auto *I = cast<Instruction>(Result); + if (!S->hasNoUnsignedWrap()) + I->setHasNoUnsignedWrap(false); + if (!S->hasNoSignedWrap()) + I->setHasNoSignedWrap(false); + } + // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. @@ -1438,7 +1474,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { { // Expand the step somewhere that dominates the loop header. SCEVInsertPointGuard Guard(Builder, this); - StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); + StepV = expandCodeForImpl( + Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } @@ -1452,16 +1489,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (ResTy != SE.getEffectiveSCEVType(ResTy)) Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); // Truncate the result. - if (TruncTy != Result->getType()) { + if (TruncTy != Result->getType()) Result = Builder.CreateTrunc(Result, TruncTy); - rememberInstruction(Result); - } + // Invert the result. - if (InvertStep) { - Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), - Result); - rememberInstruction(Result); - } + if (InvertStep) + Result = Builder.CreateSub( + expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result); } // Re-apply any non-loop-dominating scale. @@ -1469,24 +1503,22 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, - expandCodeFor(PostLoopScale, IntTy)); - rememberInstruction(Result); + expandCodeForImpl(PostLoopScale, IntTy, false)); } // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { if (Result->getType()->isIntegerTy()) { - Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); + Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false); Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); } else { Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); } } else { Result = InsertNoopCastOfTo(Result, IntTy); - Result = Builder.CreateAdd(Result, - expandCodeFor(PostLoopOffset, IntTy)); - rememberInstruction(Result); + Result = Builder.CreateAdd( + Result, expandCodeForImpl(PostLoopOffset, IntTy, false)); } } @@ -1527,15 +1559,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock()); - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - &*NewInsertPt); + findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); + V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, + &*NewInsertPt, false); return V; } // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); + SmallVector<const SCEV *, 4> NewOps(S->operands()); NewOps[0] = SE.getConstant(Ty, 0); const SCEV *Rest = SE.getAddRecExpr(NewOps, L, S->getNoWrapFlags(SCEV::FlagNW)); @@ -1642,31 +1674,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { return expand(T); } +Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { + Value *V = + expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false); + return Builder.CreatePtrToInt(V, S->getType()); +} + Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateTrunc(V, Ty); - rememberInstruction(I); - return I; + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateTrunc(V, Ty); } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateZExt(V, Ty); - rememberInstruction(I); - return I; + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateZExt(V, Ty); } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expandCodeFor(S->getOperand(), - SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateSExt(V, Ty); - rememberInstruction(I); - return I; + Value *V = expandCodeForImpl( + S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), + false); + return Builder.CreateSExt(V, Ty); } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { @@ -1680,11 +1715,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); - rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1705,11 +1738,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); - rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1730,11 +1761,9 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); - rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); - rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1755,11 +1784,9 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); Value *ICmp = Builder.CreateICmpULT(LHS, RHS); - rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); - rememberInstruction(Sel); LHS = Sel; } // In the case of mixed integer and pointer types, cast the @@ -1769,15 +1796,45 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { return LHS; } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *IP) { +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, + Instruction *IP, bool Root) { setInsertPoint(IP); - return expandCodeFor(SH, Ty); + Value *V = expandCodeForImpl(SH, Ty, Root); + return V; } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { +Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { // Expand the code for this SCEV. Value *V = expand(SH); + + if (PreserveLCSSA) { + if (auto *Inst = dyn_cast<Instruction>(V)) { + // Create a temporary instruction to at the current insertion point, so we + // can hand it off to the helper to create LCSSA PHIs if required for the + // new use. + // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) + // would accept a insertion point and return an LCSSA phi for that + // insertion point, so there is no need to insert & remove the temporary + // instruction. + Instruction *Tmp; + if (Inst->getType()->isIntegerTy()) + Tmp = + cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); + else { + assert(Inst->getType()->isPointerTy()); + Tmp = cast<Instruction>( + Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); + } + V = fixupLCSSAFormFor(Tmp, 0); + + // Clean up temporary instruction. + InsertedValues.erase(Tmp); + InsertedPostIncValues.erase(Tmp); + Tmp->eraseFromParent(); + } + } + + InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V; if (Ty) { assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && "non-trivial casts should be done with the SCEVs directly!"); @@ -1861,20 +1918,17 @@ Value *SCEVExpander::expand(const SCEV *S) { // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = &*L->getHeader()->getFirstInsertionPt(); + while (InsertPt->getIterator() != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || - isa<DbgInfoIntrinsic>(InsertPt))) + isa<DbgInfoIntrinsic>(InsertPt))) { InsertPt = &*std::next(InsertPt->getIterator()); + } break; } } } - // IndVarSimplify sometimes sets the insertion point at the block start, even - // when there are PHIs at that point. We must correct for this. - if (isa<PHINode>(*InsertPt)) - InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); - // Check to see if we already expanded this here. auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) @@ -1922,32 +1976,25 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (!PostIncLoops.empty()) - InsertedPostIncValues.insert(I); - else - InsertedValues.insert(I); -} - -/// getOrInsertCanonicalInductionVariable - This method returns the -/// canonical induction variable of the specified type for the specified -/// loop (inserting one if there is none). A canonical induction variable -/// starts at zero and steps by one on each iteration. -PHINode * -SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, - Type *Ty) { - assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); - - // Build a SCEV for {0,+,1}<L>. - // Conservatively use FlagAnyWrap for now. - const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), - SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); + auto DoInsert = [this](Value *V) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(V); + else + InsertedValues.insert(V); + }; + DoInsert(I); - // Emit code for it. - SCEVInsertPointGuard Guard(Builder, this); - PHINode *V = - cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front())); + if (!PreserveLCSSA) + return; - return V; + if (auto *Inst = dyn_cast<Instruction>(I)) { + // A new instruction has been added, which might introduce new uses outside + // a defining loop. Fix LCSSA from for each operand of the new instruction, + // if required. + for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; + OpIdx++) + fixupLCSSAFormFor(Inst, OpIdx); + } } /// replaceCongruentIVs - Check for congruent phis in this loop header and @@ -1970,8 +2017,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Put pointers at the back and make sure pointer < pointer = false. if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); - return RHS->getType()->getPrimitiveSizeInBits() < - LHS->getType()->getPrimitiveSizeInBits(); + return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() < + LHS->getType()->getPrimitiveSizeInBits().getFixedSize(); }); unsigned NumElim = 0; @@ -2079,6 +2126,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " + << *OrigPhiRef << '\n'); ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { @@ -2092,15 +2141,6 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } -Value *SCEVExpander::getExactExistingExpansion(const SCEV *S, - const Instruction *At, Loop *L) { - Optional<ScalarEvolution::ValueOffsetPair> VO = - getRelatedExistingExpansion(S, At, L); - if (VO && VO.getValue().second == nullptr) - return VO.getValue().first; - return nullptr; -} - Optional<ScalarEvolution::ValueOffsetPair> SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L) { @@ -2139,15 +2179,156 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, return None; } +template<typename T> static int costAndCollectOperands( + const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl<SCEVOperand> &Worklist) { + + const T *S = cast<T>(WorkItem.S); + int Cost = 0; + // Object to help map SCEV operands to expanded IR instructions. + struct OperationIndices { + OperationIndices(unsigned Opc, size_t min, size_t max) : + Opcode(Opc), MinIdx(min), MaxIdx(max) { } + unsigned Opcode; + size_t MinIdx; + size_t MaxIdx; + }; + + // Collect the operations of all the instructions that will be needed to + // expand the SCEVExpr. This is so that when we come to cost the operands, + // we know what the generated user(s) will be. + SmallVector<OperationIndices, 2> Operations; + + auto CastCost = [&](unsigned Opcode) { + Operations.emplace_back(Opcode, 0, 0); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx = 0, unsigned MaxIdx = 1) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, + unsigned MinIdx, unsigned MaxIdx) { + Operations.emplace_back(Opcode, MinIdx, MaxIdx); + Type *OpType = S->getOperand(0)->getType(); + return NumRequired * TTI.getCmpSelInstrCost( + Opcode, OpType, CmpInst::makeCmpResultType(OpType), + CmpInst::BAD_ICMP_PREDICATE, CostKind); + }; + + switch (S->getSCEVType()) { + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scUnknown: + case scConstant: + return 0; + case scPtrToInt: + Cost = CastCost(Instruction::PtrToInt); + break; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); + break; + } + case scAddRecExpr: { + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coeffients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Ignoring constant term (operand 0), how many of the coeffients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast<SCEVConstant>(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + int AddCost = ArithCost(Instruction::Add, NumTerms - 1, + /*MinIdx*/1, /*MaxIdx*/1); + // Here, *each* one of those will require a multiplication. + int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + break; + } + } + + for (auto &CostOp : Operations) { + for (auto SCEVOp : enumerate(S->operands())) { + // Clamp the index to account for multiple IR operations being chained. + size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); + size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); + Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); + } + } + return Cost; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, - const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed, - SmallVectorImpl<const SCEV *> &Worklist) { + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl<const SCEV *> &Processed, + SmallVectorImpl<SCEVOperand> &Worklist) { if (BudgetRemaining < 0) return true; // Already run out of budget, give up. + const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? - if (!Processed.insert(S).second) + if (!isa<SCEVConstant>(S) && !Processed.insert(S).second) return false; // We have already accounted for this expression. // If we can find an existing value for this scev available at the point "At" @@ -2155,52 +2336,37 @@ bool SCEVExpander::isHighCostExpansionHelper( if (getRelatedExistingExpansion(S, &At, L)) return false; // Consider the expression to be free. - switch (S->getSCEVType()) { - case scUnknown: - case scConstant: - return false; // Assume to be zero-cost. - } - TargetTransformInfo::TargetCostKind CostKind = - TargetTransformInfo::TCK_RecipThroughput; + L->getHeader()->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_RecipThroughput; - if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) { - unsigned Opcode; - switch (S->getSCEVType()) { - case scTruncate: - Opcode = Instruction::Trunc; - break; - case scZeroExtend: - Opcode = Instruction::ZExt; - break; - case scSignExtend: - Opcode = Instruction::SExt; - break; - default: - llvm_unreachable("There are no other cast types."); - } - const SCEV *Op = CastExpr->getOperand(); - BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType(), CostKind); - Worklist.emplace_back(Op); + switch (S->getSCEVType()) { + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + case scUnknown: + // Assume to be zero-cost. + return false; + case scConstant: { + // Only evalulate the costs of constants when optimizing for size. + if (CostKind != TargetTransformInfo::TCK_CodeSize) + return 0; + const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); + Type *Ty = S->getType(); + BudgetRemaining -= TTI.getIntImmCostInst( + WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); + return BudgetRemaining < 0; + } + case scTruncate: + case scPtrToInt: + case scZeroExtend: + case scSignExtend: { + int Cost = + costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } - - if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { - // If the divisor is a power of two count this as a logical right-shift. - if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) { - if (SC->getAPInt().isPowerOf2()) { - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), - CostKind); - // Note that we don't count the cost of RHS, because it is a constant, - // and we consider those to be free. But if that changes, we would need - // to log2() it first before calling isHighCostExpansionHelper(). - Worklist.emplace_back(UDivExpr->getLHS()); - return false; // Will answer upon next entry into this function. - } - } - + case scUDivExpr: { // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some @@ -2213,117 +2379,36 @@ bool SCEVExpander::isHighCostExpansionHelper( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. + int Cost = + costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); // Need to count the cost of this UDiv. - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), - CostKind); - Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. } - - if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) { - Type *OpType = NAry->getType(); - - assert(NAry->getNumOperands() >= 2 && - "Polynomial should be at least linear"); - - int AddCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - int MulCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - - // In this polynominal, we may have some zero operands, and we shouldn't - // really charge for those. So how many non-zero coeffients are there? - int NumTerms = llvm::count_if(NAry->operands(), - [](const SCEV *S) { return !S->isZero(); }); - assert(NumTerms >= 1 && "Polynominal should have at least one term."); - assert(!(*std::prev(NAry->operands().end()))->isZero() && - "Last operand should not be zero"); - - // Much like with normal add expr, the polynominal will require - // one less addition than the number of it's terms. - BudgetRemaining -= AddCost * (NumTerms - 1); - if (BudgetRemaining < 0) - return true; - - // Ignoring constant term (operand 0), how many of the coeffients are u> 1? - int NumNonZeroDegreeNonOneTerms = - llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), - [](const SCEV *S) { - auto *SConst = dyn_cast<SCEVConstant>(S); - return !SConst || SConst->getAPInt().ugt(1); - }); - // Here, *each* one of those will require a multiplication. - BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; - if (BudgetRemaining < 0) - return true; - - // What is the degree of this polynominal? - int PolyDegree = NAry->getNumOperands() - 1; - assert(PolyDegree >= 1 && "Should be at least affine."); - - // The final term will be: - // Op_{PolyDegree} * x ^ {PolyDegree} - // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. - // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for - // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. - // FIXME: this is conservatively correct, but might be overly pessimistic. - BudgetRemaining -= MulCost * (PolyDegree - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { - Type *OpType = NAry->getType(); - - int PairCost; - switch (S->getSCEVType()) { - case scAddExpr: - PairCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - break; - case scMulExpr: - // TODO: this is a very pessimistic cost modelling for Mul, - // because of Bin Pow algorithm actually used by the expander, - // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - break; - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: - PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind) + - TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind); - break; - default: - llvm_unreachable("There are no other variants here."); - } - - assert(NAry->getNumOperands() > 1 && + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { + assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? + int Cost = + costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; + return BudgetRemaining < 0; } - - llvm_unreachable("No other scev expressions possible."); + case scAddRecExpr: { + assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>( + WorkItem, TTI, CostKind, Worklist); + return BudgetRemaining < 0; + } + } + llvm_unreachable("Unknown SCEV kind!"); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, @@ -2344,8 +2429,10 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *IP) { - Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); - Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); + Value *Expr0 = + expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); + Value *Expr1 = + expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); Builder.SetInsertPoint(IP); auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); @@ -2361,7 +2448,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); - assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count"); + assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count"); const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); @@ -2377,15 +2464,16 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); Builder.SetInsertPoint(Loc); - Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); + Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false); IntegerType *Ty = IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; - Value *StepValue = expandCodeFor(Step, Ty, Loc); - Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); + Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); + Value *NegStepValue = + expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); + Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2445,8 +2533,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); } - EndCheck = Builder.CreateOr(EndCheck, OfMul); - return EndCheck; + return Builder.CreateOr(EndCheck, OfMul); } Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, @@ -2489,6 +2576,34 @@ Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, return Check; } +Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { + assert(PreserveLCSSA); + SmallVector<Instruction *, 1> ToUpdate; + + auto *OpV = User->getOperand(OpIdx); + auto *OpI = dyn_cast<Instruction>(OpV); + if (!OpI) + return OpV; + + Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent()); + Loop *UseLoop = SE.LI.getLoopFor(User->getParent()); + if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) + return OpV; + + ToUpdate.push_back(OpI); + SmallVector<PHINode *, 16> PHIsToRemove; + formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); + for (PHINode *PN : PHIsToRemove) { + if (!PN->use_empty()) + continue; + InsertedValues.erase(PN); + InsertedPostIncValues.erase(PN); + PN->eraseFromParent(); + } + + return User->getOperand(OpIdx); +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely @@ -2566,4 +2681,40 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, } return false; } + +SCEVExpanderCleaner::~SCEVExpanderCleaner() { + // Result is used, nothing to remove. + if (ResultUsed) + return; + + auto InsertedInstructions = Expander.getAllInsertedInstructions(); +#ifndef NDEBUG + SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(), + InsertedInstructions.end()); + (void)InsertedSet; +#endif + // Remove sets with value handles. + Expander.clear(); + + // Sort so that earlier instructions do not dominate later instructions. + stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) { + return DT.dominates(B, A); + }); + // Remove all inserted instructions. + for (Instruction *I : InsertedInstructions) { + +#ifndef NDEBUG + assert(all_of(I->users(), + [&InsertedSet](Value *U) { + return InsertedSet.contains(cast<Instruction>(U)); + }) && + "removed instruction should only be used by instructions inserted " + "during expansion"); +#endif + assert(!I->getType()->isVoidTy() && + "inserted instruction should have non-void types"); + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } +} } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b450d71c996c..7cfe17618cde 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,8 +13,11 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -58,6 +61,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -67,6 +71,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -85,6 +90,12 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" +cl::opt<bool> llvm::RequireAndPreserveDomTree( + "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); + // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the @@ -105,6 +116,10 @@ static cl::opt<bool> DupRet( cl::desc("Duplicate return instructions into unconditional branches")); static cl::opt<bool> + HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), + cl::desc("Hoist common instructions up to the parent block")); + +static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -138,6 +153,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); +// Two is chosen to allow one negation and a logical combine. +static cl::opt<unsigned> + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -147,9 +169,22 @@ STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumSinkCommons, +STATISTIC(NumFoldValueComparisonIntoPredecessors, + "Number of value comparisons folded into predecessor basic blocks"); +STATISTIC(NumFoldBranchToCommonDest, + "Number of branches folded into predecessor basic block"); +STATISTIC( + NumHoistCommonCode, + "Number of common instruction 'blocks' hoisted up to the begin block"); +STATISTIC(NumHoistCommonInstrs, + "Number of common instructions hoisted up to the begin block"); +STATISTIC(NumSinkCommonCode, + "Number of common instruction 'blocks' sunk down to the end block"); +STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumInvokes, + "Number of invokes with empty resume blocks simplified into calls"); namespace { @@ -182,8 +217,9 @@ struct ValueEqualityComparisonCase { class SimplifyCFGOpt { const TargetTransformInfo &TTI; + DomTreeUpdater *DTU; const DataLayout &DL; - SmallPtrSetImpl<BasicBlock *> *LoopHeaders; + ArrayRef<WeakVH> LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; @@ -193,6 +229,9 @@ class SimplifyCFGOpt { bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder); + bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, + Instruction *PTI, + IRBuilder<> &Builder); bool FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder); @@ -225,13 +264,18 @@ class SimplifyCFGOpt { bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders, + SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, + const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders, const SimplifyCFGOptions &Opts) - : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {} + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { + assert((!DTU || !DTU->hasPostDomTree()) && + "SimplifyCFG is not yet capable of maintaining validity of a " + "PostDomTree, so don't ask for it."); + } - bool run(BasicBlock *BB); bool simplifyOnce(BasicBlock *BB); + bool simplifyOnceImpl(BasicBlock *BB); + bool run(BasicBlock *BB); // Helper to set Resimplify and return change indication. bool requestResimplify() { @@ -273,46 +317,6 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, return !Fail; } -/// Return true if it is safe and profitable to merge these two terminator -/// instructions together, where SI1 is an unconditional branch. PhiNodes will -/// store all PHI nodes in common successors. -static bool -isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, - Instruction *Cond, - SmallVectorImpl<PHINode *> &PhiNodes) { - if (SI1 == SI2) - return false; // Can't merge with self! - assert(SI1->isUnconditional() && SI2->isConditional()); - - // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: - // 1> We have a constant incoming value for the conditional branch; - // 2> We have "Cond" as the incoming value for the unconditional branch; - // 3> SI2->getCondition() and Cond have same operands. - CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); - if (!Ci2) - return false; - if (!(Cond->getOperand(0) == Ci2->getOperand(0) && - Cond->getOperand(1) == Ci2->getOperand(1)) && - !(Cond->getOperand(0) == Ci2->getOperand(1) && - Cond->getOperand(1) == Ci2->getOperand(0))) - return false; - - BasicBlock *SI1BB = SI1->getParent(); - BasicBlock *SI2BB = SI2->getParent(); - SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != Cond || - !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) - return false; - PhiNodes.push_back(PN); - } - return true; -} - /// Update PHI nodes in Succ to indicate that there will now be entries in it /// from the 'NewPred' block. The values that will be flowing into the PHI nodes /// will be the same as those coming in from ExistPred, an existing predecessor @@ -651,7 +655,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or); + bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector<Value *, 8> DFT; @@ -666,11 +670,14 @@ private: if (Instruction *I = dyn_cast<Instruction>(V)) { // If it is a || (or && depending on isEQ), process the operands. - if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) { - if (Visited.insert(I->getOperand(1)).second) - DFT.push_back(I->getOperand(1)); - if (Visited.insert(I->getOperand(0)).second) - DFT.push_back(I->getOperand(0)); + Value *Op0, *Op1; + if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) + : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { + if (Visited.insert(Op1).second) + DFT.push_back(Op1); + if (Visited.insert(Op0).second) + DFT.push_back(Op0); + continue; } @@ -765,7 +772,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); + llvm::erase_value(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. @@ -875,13 +882,18 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( (void)NI; // Remove PHI node entries for the dead edge. - ThisCases[0].Dest->removePredecessor(TI->getParent()); + ThisCases[0].Dest->removePredecessor(PredDef); LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorAndDCECond(TI); + + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); + return true; } @@ -894,13 +906,25 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; + auto *Successor = i->getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; if (DeadCases.count(i->getCaseValue())) { - i->getCaseSuccessor()->removePredecessor(TI->getParent()); + Successor->removePredecessor(PredDef); SI.removeCase(i); + --NumPerSuccessorCases[Successor]; } } + + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, PredDef, I.first}); + if (DTU) + DTU->applyUpdates(Updates); + LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } @@ -930,12 +954,16 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (!TheRealDest) TheRealDest = ThisDef; + SmallSetVector<BasicBlock *, 2> RemovedSuccs; + // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; for (BasicBlock *Succ : successors(TIBB)) - if (Succ != CheckEdge) + if (Succ != CheckEdge) { + if (Succ != TheRealDest) + RemovedSuccs.insert(Succ); Succ->removePredecessor(TIBB); - else + } else CheckEdge = nullptr; // Insert the new branch. @@ -947,6 +975,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( << "\n"); EraseTerminatorAndDCECond(TI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -1014,219 +1049,300 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) { } } -/// The specified terminator is a value equality comparison instruction -/// (either a switch or a branch on "X == c"). -/// See if any of the predecessors of the terminator block are value comparisons -/// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, - IRBuilder<> &Builder) { - BasicBlock *BB = TI->getParent(); - Value *CV = isValueEqualityComparison(TI); // CondVal - assert(CV && "Not a comparison?"); - bool Changed = false; +static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( + BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { + Instruction *PTI = PredBlock->getTerminator(); - SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.pop_back_val(); + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + for (Instruction &BonusInst : *BB) { + if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + continue; - // See if the predecessor is a comparison with the same value. - Instruction *PTI = Pred->getTerminator(); - Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + Instruction *NewBonusInst = BonusInst.clone(); - if (PCV == CV && TI != PTI) { - SmallSetVector<BasicBlock*, 4> FailBlocks; - if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { - for (auto *Succ : FailBlocks) { - if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split")) - return false; - } - } + if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + // Unless the instruction has the same !dbg location as the original + // branch, drop it. When we fold the bonus instructions we want to make + // sure we reset their debug locations in order to avoid stepping on + // dead code caused by folding dead branches. + NewBonusInst->setDebugLoc(DebugLoc()); + } - // Figure out which 'cases' to copy from SI to PSI. - std::vector<ValueEqualityComparisonCase> BBCases; - BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - - std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); - - // Based on whether the default edge from PTI goes to BB or not, fill in - // PredCases and PredDefault with the new switch cases we would like to - // build. - SmallVector<BasicBlock *, 8> NewSuccessors; - - // Update the branch weight metadata along the way - SmallVector<uint64_t, 8> Weights; - bool PredHasWeights = HasBranchWeights(PTI); - bool SuccHasWeights = HasBranchWeights(TI); - - if (PredHasWeights) { - GetBranchWeights(PTI, Weights); - // branch-weight metadata is inconsistent here. - if (Weights.size() != 1 + PredCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (SuccHasWeights) - // If there are no predecessor weights but there are successor weights, - // populate Weights with 1, which will later be scaled to the sum of - // successor's weights - Weights.assign(1 + PredCases.size(), 1); - - SmallVector<uint64_t, 8> SuccWeights; - if (SuccHasWeights) { - GetBranchWeights(TI, SuccWeights); - // branch-weight metadata is inconsistent here. - if (SuccWeights.size() != 1 + BBCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (PredHasWeights) - SuccWeights.assign(1 + BBCases.size(), 1); - - if (PredDefault == BB) { - // If this is the default destination from PTI, only the edges in TI - // that don't occur in PTI, or that branch to BB will be activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest != BB) - PTIHandled.insert(PredCases[i].Value); - else { - // The default destination is BB, we don't need explicit targets. - std::swap(PredCases[i], PredCases.back()); - - if (PredHasWeights || SuccHasWeights) { - // Increase weight for the default case. - Weights[0] += Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&BonusInst] = NewBonusInst; + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. We *can* preserve !annotation, because + // it is tied to the instruction itself, not the value or position. + NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation); + + PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&BonusInst); + BonusInst.setName(NewBonusInst->getName() + ".old"); + + // Update (liveout) uses of bonus instructions, + // now that the bonus instruction has been cloned into predecessor. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(BonusInst.getType(), + (NewBonusInst->getName() + ".merge").str()); + SSAUpdate.AddAvailableValue(BB, &BonusInst); + SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + for (Use &U : make_early_inc_range(BonusInst.uses())) + SSAUpdate.RewriteUseAfterInsertions(U); + } +} - PredCases.pop_back(); - --i; - --e; - } +bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( + Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + BasicBlock *Pred = PTI->getParent(); + + std::vector<DominatorTree::UpdateType> Updates; + + // Figure out which 'cases' to copy from SI to PSI. + std::vector<ValueEqualityComparisonCase> BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - // Reconstruct the new switch statement we will be building. - if (PredDefault != BBDefault) { - PredDefault->removePredecessor(Pred); - PredDefault = BBDefault; - NewSuccessors.push_back(BBDefault); + std::vector<ValueEqualityComparisonCase> PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + SmallMapVector<BasicBlock *, int, 8> NewSuccessors; + + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } - unsigned CasesFromPred = Weights.size(); - uint64_t ValidTotalSuccWeight = 0; - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].Value) && - BBCases[i].Dest != BBDefault) { - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - if (SuccHasWeights || PredHasWeights) { - // The default weight is at index 0, so weight for the ith case - // should be at index i+1. Scale the cases from successor by - // PredDefaultWeight (Weights[0]). - Weights.push_back(Weights[0] * SuccWeights[i + 1]); - ValidTotalSuccWeight += SuccWeights[i + 1]; - } - } + PredCases.pop_back(); + --i; + --e; + } + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + if (PredDefault != BB) + Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); + PredDefault = BBDefault; + ++NewSuccessors[BBDefault]; + } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; if (SuccHasWeights || PredHasWeights) { - ValidTotalSuccWeight += SuccWeights[0]; - // Scale the cases from predecessor by ValidTotalSuccWeight. - for (unsigned i = 1; i < CasesFromPred; ++i) - Weights[i] *= ValidTotalSuccWeight; - // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). - Weights[0] *= SuccWeights[0]; + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i + 1]); + ValidTotalSuccWeight += SuccWeights[i + 1]; } - } else { - // If this is not the default destination from PSI, only the edges - // in SI that occur in PSI with a destination of BB will be - // activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - std::map<ConstantInt *, uint64_t> WeightsForHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest == BB) { - PTIHandled.insert(PredCases[i].Value); - - if (PredHasWeights || SuccHasWeights) { - WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } - - std::swap(PredCases[i], PredCases.back()); - PredCases.pop_back(); - --i; - --e; - } + } - // Okay, now we know which constants were sent to BB from the - // predecessor. Figure out where they will all go now. - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].Value)) { - // If this is one we are capable of getting... - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[BBCases[i].Value]); - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - PTIHandled.erase( - BBCases[i].Value); // This constant is taken care of - } + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt *, uint64_t> WeightsForHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); - // If there are any constants vectored to BB that TI doesn't handle, - // they must go to the default destination of TI. - for (ConstantInt *I : PTIHandled) { - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[I]); - PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); - NewSuccessors.push_back(BBDefault); + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; + std::swap(Weights[i + 1], Weights.back()); + Weights.pop_back(); } + + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; + --e; } - // Okay, at this point, we know which new successor Pred will get. Make - // sure we update the number of entries in the PHI nodes for these - // successors. - for (BasicBlock *NewSuccessor : NewSuccessors) - AddPredecessorToBlock(NewSuccessor, Pred, BB); - - Builder.SetInsertPoint(PTI); - // Convert pointer to int before we switch. - if (CV->getType()->isPointerTy()) { - CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), - "magicptr"); + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].Value)) { + // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); + PredCases.push_back(BBCases[i]); + ++NewSuccessors[BBCases[i].Dest]; + PTIHandled.erase(BBCases[i].Value); // This constant is taken care of } - // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = - Builder.CreateSwitch(CV, PredDefault, PredCases.size()); - NewSI->setDebugLoc(PTI->getDebugLoc()); - for (ValueEqualityComparisonCase &V : PredCases) - NewSI->addCase(V.Value, V.Dest); + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (ConstantInt *I : PTIHandled) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[I]); + PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); + ++NewSuccessors[BBDefault]; + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : + NewSuccessors) { + for (auto I : seq(0, NewSuccessor.second)) { + (void)I; + AddPredecessorToBlock(NewSuccessor.first, Pred, BB); + } + if (!is_contained(successors(Pred), NewSuccessor.first)) + Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); + } + + Builder.SetInsertPoint(PTI); + // Convert pointer to int before we switch. + if (CV->getType()->isPointerTy()) { + CV = + Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); + } + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - if (PredHasWeights || SuccHasWeights) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(Weights); + setBranchWeights(NewSI, MDWeights); + } - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + EraseTerminatorAndDCECond(PTI); - setBranchWeights(NewSI, MDWeights); + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = nullptr; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (!InfLoopBlock) { + // Insert it at the end of the function, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = + BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); + BranchInst::Create(InfLoopBlock, InfLoopBlock); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); } + NewSI->setSuccessor(i, InfLoopBlock); + } - EraseTerminatorAndDCECond(PTI); - - // Okay, last check. If BB is still a successor of PSI, then we must - // have an infinite loop case. If so, add an infinitely looping block - // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = nullptr; - for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) - if (NewSI->getSuccessor(i) == BB) { - if (!InfLoopBlock) { - // Insert it at the end of the function, because it's either code, - // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", - BB->getParent()); - BranchInst::Create(InfLoopBlock, InfLoopBlock); - } - NewSI->setSuccessor(i, InfLoopBlock); - } + if (InfLoopBlock) + Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); - Changed = true; + Updates.push_back({DominatorTree::Delete, Pred, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + + ++NumFoldValueComparisonIntoPredecessors; + return true; +} + +/// The specified terminator is a value equality comparison instruction +/// (either a switch or a branch on "X == c"). +/// See if any of the predecessors of the terminator block are value comparisons +/// on the same value. If so, and if safe to do so, fold them together. +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, + IRBuilder<> &Builder) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + + bool Changed = false; + + SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.pop_back_val(); + Instruction *PTI = Pred->getTerminator(); + + // Don't try to fold into itself. + if (Pred == BB) + continue; + + // See if the predecessor is a comparison with the same value. + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + if (PCV != CV) + continue; + + SmallSetVector<BasicBlock *, 4> FailBlocks; + if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { + for (auto *Succ : FailBlocks) { + if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) + return false; + } } + + PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); + Changed = true; } return Changed; } @@ -1248,7 +1364,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, return true; } -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function @@ -1285,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, BasicBlock *BIParent = BI->getParent(); bool Changed = false; + + auto _ = make_scope_exit([&]() { + if (Changed) + ++NumHoistCommonCode; + }); + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1353,6 +1475,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, I2->eraseFromParent(); Changed = true; } + ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1407,6 +1530,8 @@ HoistTerminator: I2->replaceAllUsesWith(NT); NT->takeName(I1); } + Changed = true; + ++NumHoistCommonInstrs; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. @@ -1448,12 +1573,20 @@ HoistTerminator: } } + SmallVector<DominatorTree::UpdateType, 4> Updates; + // Update any PHI nodes in our new successors. - for (BasicBlock *Succ : successors(BB1)) + for (BasicBlock *Succ : successors(BB1)) { AddPredecessorToBlock(Succ, BIParent, BB1); + Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + } + for (BasicBlock *Succ : successors(BI)) + Updates.push_back({DominatorTree::Delete, BIParent, Succ}); EraseTerminatorAndDCECond(BI); - return true; + if (DTU) + DTU->applyUpdates(Updates); + return Changed; } // Check lifetime markers. @@ -1744,7 +1877,8 @@ namespace { /// true, sink any common code from the predecessors to BB. /// We also allow one predecessor to end with conditional branch (but no more /// than one). -static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, + DomTreeUpdater *DTU) { // We support two situations: // (1) all incoming arcs are unconditional // (2) one incoming arc is conditional @@ -1800,7 +1934,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { if (UnconditionalPreds.size() < 2) return false; - bool Changed = false; // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we @@ -1820,6 +1953,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { --LRI; } + // If no instructions can be sunk, early-return. + if (ScanIdx == 0) + return false; + + bool Changed = false; + auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) @@ -1834,7 +1973,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { return NumPHIInsts <= 1; }; - if (ScanIdx > 0 && Cond) { + if (Cond) { // Check if we would actually sink anything first! This mutates the CFG and // adds an extra block. The goal in doing this is to allow instructions that // couldn't be sunk before to be sunk - obviously, speculatable instructions @@ -1857,7 +1996,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) // Edges couldn't be split. return false; Changed = true; @@ -1875,7 +2014,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. - for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { + unsigned SinkIdx = 0; + for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); @@ -1890,11 +2030,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { break; } - if (!sinkLastInstruction(UnconditionalPreds)) - return Changed; - NumSinkCommons++; + if (!sinkLastInstruction(UnconditionalPreds)) { + LLVM_DEBUG( + dbgs() + << "SINK: stopping here, failed to actually sink instruction!\n"); + break; + } + + NumSinkCommonInstrs++; Changed = true; } + if (SinkIdx != 0) + ++NumSinkCommonCode; return Changed; } @@ -1938,7 +2085,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; - for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) { + // Skip pseudo probe intrinsic calls which are not really killing any memory + // accesses. + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { if (!MaxNumInstToLookAt) break; --MaxNumInstToLookAt; @@ -1959,6 +2108,65 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, return nullptr; } +/// Estimate the cost of the insertion(s) and check that the PHI nodes can be +/// converted to selects. +static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, + BasicBlock *EndBB, + unsigned &SpeculatedInstructions, + int &BudgetRemaining, + const TargetTransformInfo &TTI) { + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() + ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; + + bool HaveRewritablePHIs = false; + for (PHINode &PN : EndBB->phis()) { + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); + + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. + // Skip PHIs which are trivial. + if (ThenV == OrigV) + continue; + + BudgetRemaining -= + TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + // Don't convert to selects if we could remove undefined behavior instead. + if (passingValueIsAlwaysUndefined(OrigV, &PN) || + passingValueIsAlwaysUndefined(ThenV, &PN)) + return false; + + HaveRewritablePHIs = true; + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) + continue; // Known safe and cheap. + + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + return false; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + unsigned MaxCost = + 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) + return false; + + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) + return false; + } + + return HaveRewritablePHIs; +} + /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -2005,6 +2213,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, BasicBlock *BB = BI->getParent(); BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; // If ThenBB is actually on the false edge of the conditional branch, remember // to swap the select operands later. @@ -2037,6 +2247,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, continue; } + // Skip pseudo probes. The consequence is we lose track of the branch + // probability for ThenBB, which is fine since the optimization here takes + // place regardless of the branch probability. + if (isa<PseudoProbeInst>(I)) { + SpeculatedDbgIntrinsics.push_back(I); + continue; + } + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2082,50 +2300,13 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; } - // Check that the PHI nodes can be converted to selects. - bool HaveRewritablePHIs = false; - for (PHINode &PN : EndBB->phis()) { - Value *OrigV = PN.getIncomingValueForBlock(BB); - Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. - if (ThenV == OrigV) - continue; - - // Don't convert to selects if we could remove undefined behavior instead. - if (passingValueIsAlwaysUndefined(OrigV, &PN) || - passingValueIsAlwaysUndefined(ThenV, &PN)) - return false; - - HaveRewritablePHIs = true; - ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); - ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); - if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. - - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = - 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - if (OrigCost + ThenCost > MaxCost) - return false; - - // Account for the cost of an unfolded ConstantExpr which could end up - // getting expanded into Instructions. - // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculatedInstructions; - if (SpeculatedInstructions > 1) - return false; - } - - // If there are no PHIs to process, bail early. This helps ensure idempotence - // as well. - if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) + // Check that we can insert the selects and that it's not too expensive to do + // so. + bool Convert = SpeculatedStore != nullptr; + Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, + SpeculatedInstructions, + BudgetRemaining, TTI); + if (!Convert || BudgetRemaining < 0) return false; // If we get here, we can hoist the instruction and if-convert. @@ -2199,6 +2380,12 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { for (Instruction &I : BB->instructionsWithoutDebug()) { if (Size > MaxSmallBlockSize) return false; // Don't clone large BB's. + + // Can't fold blocks that contain noduplicate or convergent calls. + if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->cannotDuplicate() || CI->isConvergent()) + return false; + // We will delete Phis while threading, so Phis should not be accounted in // block's size if (!isa<PHINode>(I)) @@ -2221,8 +2408,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// If we have a conditional branch on a PHI node value that is defined in the /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, - AssumptionCache *AC) { +static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -2240,13 +2427,6 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - // Can't fold blocks that contain noduplicate or convergent calls. - if (any_of(*BB, [](const Instruction &I) { - const CallInst *CI = dyn_cast<CallInst>(&I); - return CI && (CI->cannotDuplicate() || CI->isConvergent()); - })) - return false; - // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -2265,6 +2445,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; + SmallVector<DominatorTree::UpdateType, 3> Updates; + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -2273,6 +2455,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); + Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. @@ -2331,8 +2514,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, PredBBTI->setSuccessor(i, EdgeBB); } + Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + + if (DTU) + DTU->applyUpdates(Updates); + // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DL, AC) || true; + return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; } return false; @@ -2341,7 +2530,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - const DataLayout &DL) { + DomTreeUpdater *DTU, const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -2374,11 +2563,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, int BudgetRemaining = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); + Changed = true; continue; } @@ -2386,7 +2577,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, BudgetRemaining, TTI)) - return false; + return Changed; } // If we folded the first phi, PN dangles at this point. Refresh it. If @@ -2413,7 +2604,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, isa<BinaryOperator>(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) - return false; + return Changed; // If all PHI nodes are promotable, check to make sure that all instructions // in the predecessor blocks can be promoted as well. If not, we won't be able @@ -2427,11 +2618,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } @@ -2440,11 +2632,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) && + !isa<PseudoProbeInst>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. - return false; + return Changed; } } assert(DomBlock && "Failed to find root DomBlock"); @@ -2487,7 +2680,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, Instruction *OldTI = DomBlock->getTerminator(); Builder.SetInsertPoint(OldTI); Builder.CreateBr(BB); + + SmallVector<DominatorTree::UpdateType, 3> Updates; + if (DTU) { + Updates.push_back({DominatorTree::Insert, DomBlock, BB}); + for (auto *Successor : successors(DomBlock)) + Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); + } + OldTI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -2496,9 +2700,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// introducing a select if the return values disagree. bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { + auto *BB = BI->getParent(); assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // NOTE: destinations may match, this could be degenerate uncond branch. ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); @@ -2515,10 +2721,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // there is no return value for this function, just change the // branch into a return. if (FalseRet->getNumOperands() == 0) { - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); Builder.CreateRetVoid(); EraseTerminatorAndDCECond(BI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } @@ -2530,10 +2743,10 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) - TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + TrueValue = TVPN->getIncomingValueForBlock(BB); if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) - FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + FalseValue = FVPN->getIncomingValueForBlock(BB); // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is @@ -2549,8 +2762,8 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); + TrueSucc->removePredecessor(BB); + FalseSucc->removePredecessor(BB); // Insert select instructions where needed. Value *BrCond = BI->getCondition(); @@ -2575,27 +2788,17 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); EraseTerminatorAndDCECond(BI); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); + } return true; } -/// Return true if the given instruction is available -/// in its predecessor block. If yes, the instruction will be removed. -static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) { - if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) - return false; - for (Instruction &I : *PB) { - Instruction *PBI = &I; - // Check whether Inst and PBI generate the same value. - if (Inst->isIdenticalTo(PBI)) { - Inst->replaceAllUsesWith(PBI); - Inst->eraseFromParent(); - return true; - } - } - return false; -} - /// Return true if either PBI or BI has branch weight available, and store /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does /// not have branch weight, use 1:1 as its weight. @@ -2619,63 +2822,174 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, } } +// Determine if the two branches share a common destination, +// and deduce a glue that we need to use to join branch's conditions +// to arrive at the common destination. +static Optional<std::pair<Instruction::BinaryOps, bool>> +CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { + assert(BI && PBI && BI->isConditional() && PBI->isConditional() && + "Both blocks must end with a conditional branches."); + assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && + "PredBB must be a predecessor of BB."); + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) + return {{Instruction::Or, false}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) + return {{Instruction::And, false}}; + else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) + return {{Instruction::And, true}}; + else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) + return {{Instruction::Or, true}}; + return None; +} + +static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, + DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + BasicBlock *BB = BI->getParent(); + BasicBlock *PredBlock = PBI->getParent(); + + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond; + std::tie(Opc, InvertPredCond) = + *CheckIfCondBranchesShareCommonDestination(BI, PBI); + + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + + IRBuilder<> Builder(PBI); + // The builder is used to create instructions to eliminate the branch in BB. + // If BB's terminator has !annotation metadata, add it to the new + // instructions. + Builder.CollectMetadataToCopy(BB->getTerminator(), + {LLVMContext::MD_annotation}); + + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); + } + + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } + + BasicBlock *UniqueSucc = + PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); + + // Before cloning instructions, notify the successor basic block that it + // is about to have a new predecessor. This will update PHI nodes, + // which will allow us to update live-out uses of bonus instructions. + AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); + + // Try to update branch weights. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight)) { + SmallVector<uint64_t, 8> NewWeights; + + if (PBI->getSuccessor(0) == BB) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, UniqueSucc, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } else { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, UniqueSucc + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end()); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); + + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + + // Now, update the CFG. + PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); + + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, + {DominatorTree::Delete, PredBlock, BB}}); + + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); + + ValueToValueMapTy VMap; // maps original values to cloned values + CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + + // Now that the Cond was cloned into the predecessor basic block, + // or/and the two conditions together. + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp( + Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond")); + PBI->setCondition(NewCond); + + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) { + if (isa<DbgInfoIntrinsic>(I)) { + Instruction *NewI = I.clone(); + RemapInstruction(NewI, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + NewI->insertBefore(PBI); + } + } + + ++NumFoldBranchToCommonDest; + return true; +} + /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { + // If this block ends with an unconditional branch, + // let SpeculativelyExecuteBB() deal with it. + if (!BI->isConditional()) + return false; + BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); bool Changed = false; - Instruction *Cond = nullptr; - if (BI->isConditional()) - Cond = dyn_cast<Instruction>(BI->getCondition()); - else { - // For unconditional branch, check for a simple CFG pattern, where - // BB has a single predecessor and BB's successor is also its predecessor's - // successor. If such pattern exists, check for CSE between BB and its - // predecessor. - if (BasicBlock *PB = BB->getSinglePredecessor()) - if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) - if (PBI->isConditional() && - (BI->getSuccessor(0) == PBI->getSuccessor(0) || - BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (auto I = BB->instructionsWithoutDebug().begin(), - E = BB->instructionsWithoutDebug().end(); - I != E;) { - Instruction *Curr = &*I++; - if (isa<CmpInst>(Curr)) { - Cond = Curr; - break; - } - // Quit if we can't remove this instruction. - if (!tryCSEWithPredecessor(Curr, PB)) - return Changed; - Changed = true; - } - } + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; - if (!Cond) - return Changed; - } + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return Changed; - // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = ++Cond->getIterator(); - - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(CondIt)) - ++CondIt; - - if (&*CondIt != BI) - return Changed; - // Only allow this transformation if computing the condition doesn't involve // too many instructions and these involved instructions can be executed // unconditionally. We denote all involved instructions except the condition @@ -2683,19 +2997,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != &*I; ++I) { - // Ignore dbg intrinsics. - if (isa<DbgInfoIntrinsic>(I)) + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) - return Changed; - // I has only one use and can be executed unconditionally. - Instruction *User = dyn_cast<Instruction>(I->user_back()); - if (User == nullptr || User->getParent() != BB) + // Ignore dbg intrinsics, and the terminator. + if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I)) + continue; + // I must be safe to execute unconditionally. + if (!isSafeToSpeculativelyExecute(&I)) return Changed; - // I is used in the same BB. Since BI uses Cond and doesn't have more slots - // to use any other instruction, User must be an instruction between next(I) - // and Cond. // Account for the cost of duplicating this instruction into each // predecessor. @@ -2715,9 +3026,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, return Changed; // Finally, don't infinitely unroll conditional loops. - BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; - if (TrueDest == BB || FalseDest == BB) + if (is_contained(successors(BB), BB)) return Changed; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -2727,222 +3036,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - SmallVector<PHINode *, 4> PHIs; - if (!PBI || PBI->isUnconditional() || - (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || - (!BI->isConditional() && - !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) + if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; - bool InvertPredCond = false; - - if (BI->isConditional()) { - if (PBI->getSuccessor(0) == TrueDest) { - Opc = Instruction::Or; - } else if (PBI->getSuccessor(1) == FalseDest) { - Opc = Instruction::And; - } else if (PBI->getSuccessor(0) == FalseDest) { - Opc = Instruction::And; - InvertPredCond = true; - } else if (PBI->getSuccessor(1) == TrueDest) { - Opc = Instruction::Or; - InvertPredCond = true; - } else { - continue; - } - } else { - if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) - continue; - } - - LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - Changed = true; - - IRBuilder<> Builder(PBI); - - // If we need to invert the condition in the pred block to match, do so now. - if (InvertPredCond) { - Value *NewCond = PBI->getCondition(); - - if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { - CmpInst *CI = cast<CmpInst>(NewCond); - CI->setPredicate(CI->getInversePredicate()); - } else { - NewCond = - Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); - } + Instruction::BinaryOps Opc; + bool InvertPredCond; + if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) + std::tie(Opc, InvertPredCond) = *Recepie; + else + continue; - PBI->setCondition(NewCond); - PBI->swapSuccessors(); - } + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI) { + Type *Ty = BI->getCondition()->getType(); + unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa<CmpInst>(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); - // If we have bonus instructions, clone them into the predecessor block. - // Note that there may be multiple predecessor blocks, so we cannot move - // bonus instructions to a predecessor block. - ValueToValueMapTy VMap; // maps original values to cloned values - // We already make sure Cond is the last instruction before BI. Therefore, - // all instructions before Cond other than DbgInfoIntrinsic are bonus - // instructions. - for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { - if (isa<DbgInfoIntrinsic>(BonusInst)) + if (Cost > BranchFoldThreshold) continue; - Instruction *NewBonusInst = BonusInst->clone(); - - // When we fold the bonus instructions we want to make sure we - // reset their debug locations in order to avoid stepping on dead - // code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); - - RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&*BonusInst] = NewBonusInst; - - // If we moved a load, we cannot any longer claim any knowledge about - // its potential value. The previous information might have been valid - // only given the branch precondition. - // For an analogous reason, we must also drop all the metadata whose - // semantics we don't understand. - NewBonusInst->dropUnknownNonDebugMetadata(); - - PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&*BonusInst); - BonusInst->setName(BonusInst->getName() + ".old"); } - // Clone Cond into the predecessor basic block, and or/and the - // two conditions together. - Instruction *CondInPred = Cond->clone(); - - // Reset the condition debug location to avoid jumping on dead code - // as the result of folding dead branches. - CondInPred->setDebugLoc(DebugLoc()); - - RemapInstruction(CondInPred, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); - CondInPred->takeName(Cond); - Cond->setName(CondInPred->getName() + ".old"); - - if (BI->isConditional()) { - Instruction *NewCond = cast<Instruction>( - Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); - PBI->setCondition(NewCond); - - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool HasWeights = - extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight); - SmallVector<uint64_t, 8> NewWeights; - - if (PBI->getSuccessor(0) == BB) { - if (HasWeights) { - // PBI: br i1 %x, BB, FalseDest - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * TotalWeight for BI + - // TrueWeight for PBI * FalseWeight for BI. - // We assume that total weights of a BranchInst can fit into 32 bits. - // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); - } - AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - if (HasWeights) { - // PBI: br i1 %x, TrueDest, BB - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TotalWeight for BI + - // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); - } - AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(1, FalseDest); - } - if (NewWeights.size() == 2) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), - NewWeights.end()); - setBranchWeights(PBI, MDWeights[0], MDWeights[1]); - } else - PBI->setMetadata(LLVMContext::MD_prof, nullptr); - } else { - // Update PHI nodes in the common successors. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - ConstantInt *PBI_C = cast<ConstantInt>( - PHIs[i]->getIncomingValueForBlock(PBI->getParent())); - assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = nullptr; - if (PBI->getSuccessor(0) == TrueDest) { - // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) - // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) - // is false: !PBI_Cond and BI_Value - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>( - Builder.CreateBinOp(Instruction::And, NotCond, CondInPred, - "and.cond")); - if (PBI_C->isOne()) - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); - } else { - // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) - // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) - // is false: PBI_Cond and BI_Value - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::And, PBI->getCondition(), CondInPred, "and.cond")); - if (PBI_C->isOne()) { - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, NotCond, MergedCond, "or.cond")); - } - } - // Update PHI Node. - PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); - } - - // PBI is changed to branch to TrueDest below. Remove itself from - // potential phis from all other successors. - if (MSSAU) - MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); - - // Change PBI from Conditional to Unconditional. - BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); - EraseTerminatorAndDCECond(PBI, MSSAU); - PBI = New_PBI; - } - - // If BI was a loop latch, it may have had associated loop metadata. - // We need to copy it to the new latch, that is, PBI. - if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) - PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - - // TODO: If BB is reachable from all paths through PredBlock, then we - // could replace PBI's branch probabilities with BI's. - - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) { - if (isa<DbgInfoIntrinsic>(I)) { - Instruction *NewI = I.clone(); - RemapInstruction(NewI, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NewI->insertBefore(PBI); - } - } - - return Changed; + return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); } return Changed; } @@ -3015,12 +3133,10 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, return PHI; } -static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, - BasicBlock *QTB, BasicBlock *QFB, - BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond, - const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool mergeConditionalStoreToAddress( + BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -3095,7 +3211,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, return true; }; - const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore}; + const std::array<StoreInst *, 2> FreeStores = {PStore, QStore}; if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) @@ -3109,8 +3225,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // If QTB does not exist, then QFB's only predecessor has a conditional // branch to QFB and PostBB. BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); - BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, - "condstore.split"); + BasicBlock *NewBB = + SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); if (!NewBB) return false; PostBB = NewBB; @@ -3139,8 +3255,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); - auto *T = - SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); + auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + /*Unreachable=*/false, + /*BranchWeights=*/nullptr, DTU); QB.SetInsertPoint(T); StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; @@ -3160,7 +3277,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these @@ -3262,16 +3379,17 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) - Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); + Changed |= + mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, + InvertPCond, InvertQCond, DTU, DL, TTI); return Changed; } - /// If the previous block ended with a widenable branch, determine if reusing /// the target block is profitable and legal. This will have the effect of /// "widening" PBI, but doesn't require us to reason about hosting safety. -static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { +static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU) { // TODO: This can be generalized in two important ways: // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input // values from the PBI edge. @@ -3294,15 +3412,25 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BI->getSuccessor(1) != IfFalseBB && // no inf looping BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - BI->getSuccessor(1)->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(1); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(1, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } if (BI->getSuccessor(0) != IfFalseBB && // no inf looping BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { - BI->getSuccessor(0)->removePredecessor(BI->getParent()); + auto *OldSuccessor = BI->getSuccessor(0); + OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(0, IfFalseBB); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, + {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } return false; @@ -3313,6 +3441,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); @@ -3366,7 +3495,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If the previous block ended with a widenable branch, determine if reusing // the target block is profitable and legal. This will have the effect of // "widening" PBI, but doesn't require us to reason about hosting safety. - if (tryWidenCondBranchToCondBranch(PBI, BI)) + if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) @@ -3376,7 +3505,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3419,6 +3548,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // case, it would be unsafe to hoist the operation into a select instruction. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) { @@ -3444,6 +3574,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); + SmallVector<DominatorTree::UpdateType, 5> Updates; + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -3457,6 +3589,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); + Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); OtherDest = InfLoopBlock; } @@ -3483,6 +3616,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); + Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); + Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); + + if (DTU) + DTU->applyUpdates(Updates); + // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredCommon, PredOther, SuccCommon, SuccOther; @@ -3562,6 +3701,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight) { + auto *BB = OldTerm->getParent(); // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -3569,6 +3709,8 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; + // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { // Make sure only to keep exactly one copy of each edge. @@ -3576,9 +3718,13 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; - else - Succ->removePredecessor(OldTerm->getParent(), + else { + Succ->removePredecessor(BB, /*KeepOneInputPHIs=*/true); + + if (Succ != TrueBB && Succ != FalseBB) + RemovedSuccessors.insert(Succ); + } } IRBuilder<> Builder(OldTerm); @@ -3586,11 +3732,11 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // Insert an appropriate new terminator. if (!KeepEdge1 && !KeepEdge2) { - if (TrueBB == FalseBB) + if (TrueBB == FalseBB) { // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else { + } else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); @@ -3605,15 +3751,25 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (!KeepEdge1) + if (!KeepEdge1) { // Only TrueBB was found. Builder.CreateBr(TrueBB); - else + } else { // Only FalseBB was found. Builder.CreateBr(FalseBB); + } } EraseTerminatorAndDCECond(OldTerm); + + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.reserve(RemovedSuccessors.size()); + for (auto *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + return true; } @@ -3768,6 +3924,8 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(DefaultCst); ICI->eraseFromParent(); + SmallVector<DominatorTree::UpdateType, 2> Updates; + // Okay, the switch goes to this block on a default value. Add an edge from // the switch to the merge point on the compared value. BasicBlock *NewBB = @@ -3781,13 +3939,17 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( SIW.setSuccessorWeight(0, *NewW); } SIW.addCase(Cst, NewBB, NewW); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); Builder.CreateBr(SuccBlock); + Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); PHIUse->addIncoming(NewCst, NewBB); + if (DTU) + DTU->applyUpdates(Updates); return true; } @@ -3821,7 +3983,7 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, if (UsedICmps <= 1) return false; - bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or); + bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); // There might be duplicate constants in the list, which the switch // instruction can't handle, remove them now. @@ -3853,12 +4015,15 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, << " cases into SWITCH. BB is:\n" << *BB); + SmallVector<DominatorTree::UpdateType, 2> Updates; + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { - BasicBlock *NewBB = - BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); + BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, + /*MSSAU=*/nullptr, "switch.early.test"); + // Remove the uncond branch added to the old block. Instruction *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); @@ -3870,6 +4035,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, OldTI->eraseFromParent(); + Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); @@ -3905,6 +4072,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, // Erase the old branch instruction. EraseTerminatorAndDCECond(BI); + if (DTU) + DTU->applyUpdates(Updates); LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; @@ -3921,17 +4090,36 @@ bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; } +// Check if cleanup block is empty +static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { + for (Instruction &I : R) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (!II) + return false; + + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::lifetime_end: + break; + default: + return false; + } + } + return true; +} + // Simplify resume that is shared by several landing pads (phi of landing pad). bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - // Check that there are no other instructions except for debug intrinsics - // between the phi of landing pads (RI->getValue()) and resume instruction. - BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), - E = RI->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) - return false; + // Check that there are no other instructions except for debug and lifetime + // intrinsics between the phi's and resume instruction. + if (!isCleanupBlockEmpty( + make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) + return false; SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); @@ -3952,17 +4140,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { if (IncomingValue != LandingPad) continue; - bool isTrivial = true; - - I = IncomingBB->getFirstNonPHI()->getIterator(); - E = IncomingBB->getTerminator()->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) { - isTrivial = false; - break; - } - - if (isTrivial) + if (isCleanupBlockEmpty( + make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) TrivialUnwindBlocks.insert(IncomingBB); } @@ -3981,7 +4160,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // In each SimplifyCFG run, only the current processed block can be erased. @@ -3991,37 +4171,21 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { // predecessors. TrivialBB->getTerminator()->eraseFromParent(); new UnreachableInst(RI->getContext(), TrivialBB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); } // Delete the resume block if all its predecessors have been removed. - if (pred_empty(BB)) - BB->eraseFromParent(); + if (pred_empty(BB)) { + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); + } return !TrivialUnwindBlocks.empty(); } -// Check if cleanup block is empty -static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) { - BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator(); - while (++I != E) { - auto *II = dyn_cast<IntrinsicInst>(I); - if (!II) - return false; - - Intrinsic::ID IntrinsicID = II->getIntrinsicID(); - switch (IntrinsicID) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::lifetime_end: - break; - default: - return false; - } - } - return true; -} - // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); @@ -4030,23 +4194,26 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. - if (!isCleanupBlockEmpty(LPInst, RI)) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(LPInst->getNextNode(), RI))) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); + removeUnwindEdge(Pred, DTU); + ++NumInvokes; } // The landingpad is now unreachable. Zap it. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } -static bool removeEmptyCleanup(CleanupReturnInst *RI) { +static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // If this is a trivial cleanup pad that executes no instructions, it can be // eliminated. If the cleanup pad continues to the caller, any predecessor // that is an EH pad will be updated to continue to the caller and any @@ -4067,7 +4234,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) { return false; // Check that there are no other instructions except for benign intrinsics. - if (!isCleanupBlockEmpty(CPInst, RI)) + if (!isCleanupBlockEmpty( + make_range<Instruction *>(CPInst->getNextNode(), RI))) return false; // If the cleanup return we are simplifying unwinds to the caller, this will @@ -4152,19 +4320,32 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) { } } + std::vector<DominatorTree::UpdateType> Updates; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { // The iterator must be updated here because we are removing this pred. BasicBlock *PredBB = *PI++; if (UnwindDest == nullptr) { - removeUnwindEdge(PredBB); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(PredBB, DTU); + ++NumInvokes; } else { Instruction *TI = PredBB->getTerminator(); TI->replaceUsesOfWith(BB, UnwindDest); + Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); } } - // The cleanup pad is now unreachable. Zap it. - BB->eraseFromParent(); + if (DTU) { + DTU->applyUpdates(Updates); + DTU->deleteBB(BB); + } else + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; } @@ -4211,7 +4392,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { if (mergeCleanupPad(RI)) return true; - if (removeEmptyCleanup(RI)) + if (removeEmptyCleanup(RI, DTU)) return true; return false; @@ -4242,15 +4423,16 @@ bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); - (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU); } // If we eliminated all predecessors of the block, delete the block now. if (pred_empty(BB)) { // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); } return true; @@ -4330,18 +4512,26 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (&BB->front() != UI) return Changed; - SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); + std::vector<DominatorTree::UpdateType> Updates; + + SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Instruction *TI = Preds[i]->getTerminator(); + auto *Predecessor = Preds[i]; + Instruction *TI = Predecessor->getTerminator(); IRBuilder<> Builder(TI); if (auto *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isUnconditional()) { - assert(BI->getSuccessor(0) == BB && "Incorrect CFG"); + // We could either have a proper unconditional branch, + // or a degenerate conditional branch with matching destinations. + if (all_of(BI->successors(), + [BB](auto *Successor) { return Successor == BB; })) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } else { + assert(BI->isConditional() && "Can't get here with an uncond branch."); Value* Cond = BI->getCondition(); + assert(BI->getSuccessor(0) != BI->getSuccessor(1) && + "The destinations are guaranteed to be different here."); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); @@ -4353,6 +4543,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { EraseTerminatorAndDCECond(BI); Changed = true; } + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { SwitchInstProfUpdateWrapper SU(*SI); for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { @@ -4365,14 +4556,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { e = SU->case_end(); Changed = true; } + // Note that the default destination can't be removed! + if (SI->getDefaultDest() != BB) + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; } } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { if (CSI->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + removeUnwindEdge(TI->getParent(), DTU); Changed = true; continue; } @@ -4387,35 +4587,53 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { Changed = true; } } + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); if (CSI->getNumHandlers() == 0) { - BasicBlock *CatchSwitchBB = CSI->getParent(); if (CSI->hasUnwindDest()) { - // Redirect preds to the unwind dest - CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); + // Redirect all predecessors of the block containing CatchSwitchInst + // to instead branch to the CatchSwitchInst's unwind destination. + for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { + Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, + CSI->getUnwindDest()}); + Updates.push_back( + {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); + } + Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); } else { // Rewrite all preds to unwind to caller (or from invoke to call). - SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); + if (DTU) + DTU->applyUpdates(Updates); + Updates.clear(); + SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor)); for (BasicBlock *EHPred : EHPreds) - removeUnwindEdge(EHPred); + removeUnwindEdge(EHPred, DTU); } // The catchswitch is no longer reachable. new UnreachableInst(CSI->getContext(), CSI); CSI->eraseFromParent(); Changed = true; } - } else if (isa<CleanupReturnInst>(TI)) { + } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + (void)CRI; + assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && + "Expected to always have an unwind to BB."); + Updates.push_back({DominatorTree::Delete, Predecessor, BB}); new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } } + if (DTU) + DTU->applyUpdates(Updates); + // If this block is now dead, remove it. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); + if (DTU) + DTU->deleteBB(BB); + else + BB->eraseFromParent(); return true; } @@ -4433,15 +4651,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } -static void createUnreachableSwitchDefault(SwitchInst *Switch) { +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefaultBlock = - SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); + auto *BB = Switch->getParent(); + BasicBlock *NewDefaultBlock = SplitBlockPredecessors( + Switch->getDefaultDest(), Switch->getParent(), "", DTU); + auto *OrigDefaultBlock = Switch->getDefaultDest(); Switch->setDefaultDest(&*NewDefaultBlock); - SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock}, + {DominatorTree::Delete, BB, OrigDefaultBlock}}); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU); + SmallVector<DominatorTree::UpdateType, 2> Updates; + for (auto *Successor : successors(NewDefaultBlock)) + Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor}); auto *NewTerminator = NewDefaultBlock->getTerminator(); new UnreachableInst(Switch->getContext(), NewTerminator); EraseTerminatorAndDCECond(NewTerminator); + if (DTU) + DTU->applyUpdates(Updates); } /// Turn a switch with two reachable destinations into an integer range @@ -4453,6 +4682,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + auto *BB = SI->getParent(); + // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; @@ -4556,17 +4787,23 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, // Clean up the default block - it may have phis or other instructions before // the unreachable terminator. if (!HasDefault) - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, DTU); + + auto *UnreachableDefault = SI->getDefaultDest(); // Drop the switch. SI->eraseFromParent(); + if (!HasDefault && DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); + return true; } /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, +static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, + AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); @@ -4580,11 +4817,15 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; + SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases; for (auto &Case : SI->cases()) { + auto *Successor = Case.getCaseSuccessor(); + ++NumPerSuccessorCases[Successor]; const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); + --NumPerSuccessorCases[Successor]; LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } @@ -4602,7 +4843,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, DTU); return true; } @@ -4619,6 +4860,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, SIW.removeCase(CaseI); } + std::vector<DominatorTree::UpdateType> Updates; + for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) + if (I.second == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + if (DTU) + DTU->applyUpdates(Updates); + return true; } @@ -4974,30 +5222,41 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, // a select, fixing up PHI nodes and basic blocks. static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, Value *SelectValue, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { + std::vector<DominatorTree::UpdateType> Updates; + BasicBlock *SelectBB = SI->getParent(); + BasicBlock *DestBB = PHI->getParent(); + + if (!is_contained(predecessors(DestBB), SelectBB)) + Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); + Builder.CreateBr(DestBB); + + // Remove the switch. + while (PHI->getBasicBlockIndex(SelectBB) >= 0) PHI->removeIncomingValue(SelectBB); PHI->addIncoming(SelectValue, SelectBB); - Builder.CreateBr(PHI->getParent()); - - // Remove the switch. for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == PHI->getParent()) + if (Succ == DestBB) continue; Succ->removePredecessor(SelectBB); + Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); } SI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); } /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -5017,7 +5276,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, Value *SelectValue = ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder); + RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); return true; } // The switch couldn't be converted into a select. @@ -5402,11 +5661,12 @@ static void reuseTableCompare( /// successor block with different constant values, replace the switch with /// lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - Function *Fn = SI->getParent()->getParent(); + BasicBlock *BB = SI->getParent(); + Function *Fn = BB->getParent(); // Only build lookup table when we have a target that supports it or the // attribute is not set. if (!TTI.shouldBuildLookupTables() || @@ -5500,6 +5760,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; + std::vector<DominatorTree::UpdateType> Updates; + // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create( @@ -5532,6 +5794,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { @@ -5539,6 +5802,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } // Populate the BB that does the lookups. @@ -5576,16 +5840,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); - + Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); + Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); Builder.SetInsertPoint(LookupBB); - AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); } if (!DefaultIsReachable || GeneratingCoveredLookupTable) { // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. - SI->getDefaultDest()->removePredecessor(SI->getParent(), + SI->getDefaultDest()->removePredecessor(BB, /*KeepOneInputPHIs=*/true); + Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); } bool ReturnedEarly = false; @@ -5622,19 +5888,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, PHI->addIncoming(Result, LookupBB); } - if (!ReturnedEarly) + if (!ReturnedEarly) { Builder.CreateBr(CommonDest); + Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + } // Remove the switch. + SmallSetVector<BasicBlock *, 8> RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == SI->getDefaultDest()) continue; - Succ->removePredecessor(SI->getParent()); + Succ->removePredecessor(BB); + RemovedSuccessors.insert(Succ); } SI->eraseFromParent(); + if (DTU) { + for (BasicBlock *RemovedSuccessor : RemovedSuccessors) + Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + DTU->applyUpdates(Updates); + } + ++NumLookupTables; if (NeedMask) ++NumLookupTablesHoles; @@ -5770,10 +6046,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { return requestResimplify(); // Remove unreachable cases. - if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); - if (switchToSelect(SI, Builder, DL, TTI)) + if (switchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) @@ -5785,7 +6061,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DL, TTI)) + SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (ReduceSwitchRange(SI, Builder, DL, TTI)) @@ -5800,9 +6076,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { + if (!Dest->hasAddressTaken()) + RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); IBI->removeDestination(i); --i; @@ -5811,6 +6090,14 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { } } + if (DTU) { + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve(RemovedSuccs.size()); + for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); + DTU->applyUpdates(Updates); + } + if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. new UnreachableInst(IBI->getContext(), IBI); @@ -5854,7 +6141,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { /// block when the inputs in the phi are the same for the two blocks being /// merged. In some cases, this could result in removal of the PHI entirely. static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB) { + BasicBlock *BB, DomTreeUpdater *DTU) { auto Succ = BB->getUniqueSuccessor(); assert(Succ); // If there's a phi in the successor block, we'd likely have to introduce @@ -5875,6 +6162,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, if (!BI2 || !BI2->isIdenticalTo(BI)) continue; + std::vector<DominatorTree::UpdateType> Updates; + // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. SmallPtrSet<BasicBlock *, 16> Preds; @@ -5884,6 +6173,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); II->setUnwindDest(OtherPred); + Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); + Updates.push_back({DominatorTree::Delete, Pred, BB}); } // The debug info in OtherPred doesn't cover the merged control flow that @@ -5899,11 +6190,14 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, Succs.insert(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : Succs) { Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } IRBuilder<> Builder(BI); Builder.CreateUnreachable(); BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates(Updates); return true; } return false; @@ -5928,11 +6222,11 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // backedge, so we can eliminate BB. bool NeedCanonicalLoop = Options.NeedCanonicalLoop && - (LoopHeaders && BB->hasNPredecessorsOrMore(2) && - (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); + (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && + (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; // If the only instruction in the block is a seteq/setne comparison against a @@ -5951,7 +6245,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) return true; } @@ -5959,7 +6253,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -6022,7 +6317,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable @@ -6031,8 +6327,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); + if (HoistCommon && Options.HoistCommonInsts) + if (HoistThenElseCodeToIf(BI, TTI)) + return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -6056,14 +6353,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, Options.AC)) + if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) return requestResimplify(); // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -6071,14 +6368,14 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL, TTI)) + if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return requestResimplify(); return false; } /// Check if passing a value to an instruction will cause undefined behavior. -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { Constant *C = dyn_cast<Constant>(V); if (!C) return false; @@ -6101,12 +6398,15 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Look through GEPs. A load from a GEP derived from NULL is still undefined if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) - if (GEP->getPointerOperand() == I) - return passingValueIsAlwaysUndefined(V, GEP); + if (GEP->getPointerOperand() == I) { + if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) + PtrValueMayBeModified = true; + return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); + } // Look through bitcasts. if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) - return passingValueIsAlwaysUndefined(V, BC); + return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) @@ -6121,24 +6421,51 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; - // A call to null is undefined. - if (auto *CB = dyn_cast<CallBase>(Use)) - return !NullPointerIsDefined(CB->getFunction()) && - CB->getCalledOperand() == I; + if (auto *CB = dyn_cast<CallBase>(Use)) { + if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) + return false; + // A call to null is undefined. + if (CB->getCalledOperand() == I) + return true; + + if (C->isNullValue()) { + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) && + CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing null to a nonnnull+noundef argument is undefined. + return !PtrValueMayBeModified; + } + } + } else if (isa<UndefValue>(C)) { + // Passing undef to a noundef argument is undefined. + for (const llvm::Use &Arg : CB->args()) + if (Arg == I) { + unsigned ArgIdx = CB->getArgOperandNo(&Arg); + if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) { + // Passing undef to a noundef argument is undefined. + return true; + } + } + } + } } return false; } /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. -static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { +static bool removeUndefIntroducingPredecessor(BasicBlock *BB, + DomTreeUpdater *DTU) { for (PHINode &PHI : BB->phis()) for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { - Instruction *T = PHI.getIncomingBlock(i)->getTerminator(); + BasicBlock *Predecessor = PHI.getIncomingBlock(i); + Instruction *T = Predecessor->getTerminator(); IRBuilder<> Builder(T); if (BranchInst *BI = dyn_cast<BranchInst>(T)) { - BB->removePredecessor(PHI.getIncomingBlock(i)); + BB->removePredecessor(Predecessor); // Turn uncoditional branches into unreachables and remove the dead // destination from conditional branches. if (BI->isUnconditional()) @@ -6147,6 +6474,8 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : BI->getSuccessor(0)); BI->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); return true; } // TODO: SwitchInst. @@ -6155,7 +6484,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { return false; } -bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { +bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) { bool Changed = false; assert(BB && BB->getParent() && "Block not embedded in function!"); @@ -6166,28 +6495,29 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DTU); return true; } // Check to see if we can constant propagate this terminator instruction // away... - Changed |= ConstantFoldTerminator(BB, true); + Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, + /*TLI=*/nullptr, DTU); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); // Check for and remove branches that will always cause undefined behavior. - Changed |= removeUndefIntroducingPredecessor(BB); + Changed |= removeUndefIntroducingPredecessor(BB, DTU); // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. - if (MergeBlockIntoPredecessor(BB)) + if (MergeBlockIntoPredecessor(BB, DTU)) return true; if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB); + Changed |= SinkCommonCodeFromPredecessors(BB, DTU); IRBuilder<> Builder(BB); @@ -6196,7 +6526,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { // eliminate it, do so now. if (auto *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL); } Instruction *Terminator = BB->getTerminator(); @@ -6228,7 +6558,23 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { return Changed; } +bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { + bool Changed = simplifyOnceImpl(BB); + + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Failed to maintain validity of domtree!"); + + return Changed; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { + assert((!RequireAndPreserveDomTree || + (DTU && + DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) && + "Original domtree is invalid?"); + bool Changed = false; // Repeated simplify BB as long as resimplification is requested. @@ -6244,9 +6590,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const SimplifyCFGOptions &Options, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { - return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders, - Options) + DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, + ArrayRef<WeakVH> LoopHeaders) { + return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr, + BB->getModule()->getDataLayout(), LoopHeaders, Options) .run(BB); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index d3d0c3341908..290c04a7ad10 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -191,15 +191,15 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); - ICmpInst::Predicate InvariantPredicate; - const SCEV *InvariantLHS, *InvariantRHS; - auto *PN = dyn_cast<PHINode>(IVOperand); if (!PN) return false; - if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, - InvariantLHS, InvariantRHS)) + auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L); + if (!LIP) return false; + ICmpInst::Predicate InvariantPredicate = LIP->Pred; + const SCEV *InvariantLHS = LIP->LHS; + const SCEV *InvariantRHS = LIP->RHS; // Rewrite the comparison to a loop invariant comparison if it can be done // cheaply, where cheaply means "we don't need to emit any new @@ -477,6 +477,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { if (WO->use_empty()) WO->eraseFromParent(); + Changed = true; return true; } @@ -967,3 +968,1122 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, } } // namespace llvm + +//===----------------------------------------------------------------------===// +// Widen Induction Variables - Extend the width of an IV to cover its +// widest uses. +//===----------------------------------------------------------------------===// + +class WidenIV { + // Parameters + PHINode *OrigPhi; + Type *WideType; + + // Context + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + + // Does the module have any calls to the llvm.experimental.guard intrinsic + // at all? If not we can avoid scanning instructions looking for guards. + bool HasGuards; + + bool UsePostIncrementRanges; + + // Statistics + unsigned NumElimExt = 0; + unsigned NumWidened = 0; + + // Result + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; + + SmallPtrSet<Instruction *,16> Widened; + + enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + + // A map tracking the kind of extension used to widen each narrow IV + // and narrow IV user. + // Key: pointer to a narrow IV or IV user. + // Value: the kind of extension used to widen this Instruction. + DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; + + using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; + + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; + + Optional<ConstantRange> getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? Optional<ConstantRange>(None) + : Optional<ConstantRange>(It->second); + } + + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) + PostIncRangeInfos.insert({Key, R}); + else + It->second = R.intersectWith(It->second); + } + +public: + /// Record a link in the Narrow IV def-use chain along with the WideIV that + /// computes the same value as the Narrow IV def. This avoids caching Use* + /// pointers. + struct NarrowIVDefUse { + Instruction *NarrowDef = nullptr; + Instruction *NarrowUse = nullptr; + Instruction *WideDef = nullptr; + + // True if the narrow def is never negative. Tracking this information lets + // us use a sign extension instead of a zero extension or vice versa, when + // profitable and legal. + bool NeverNegative = false; + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, + bool NeverNegative) + : NarrowDef(ND), NarrowUse(NU), WideDef(WD), + NeverNegative(NeverNegative) {} + }; + + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges = true); + + PHINode *createWideIV(SCEVExpander &Rewriter); + + unsigned getNumElimExt() { return NumElimExt; }; + unsigned getNumWidened() { return NumWidened; }; + +protected: + Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); + Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR); + Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); + + ExtendKind getExtendKind(Instruction *I); + + using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; + + WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); + + WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); + + const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const; + + Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + + bool widenLoopCompare(NarrowIVDefUse DU); + bool widenWithVariantUse(NarrowIVDefUse DU); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); + +private: + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; +}; + + +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. A nullptr can be returned if no +/// viable location is found: it may happen if User is a PHI and Def only comes +/// to this PHI from unreachable blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT, LoopInfo *LI) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = nullptr; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + + if (!DT->isReachableFromEntry(InsertBB)) + continue; + + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + + // If we have skipped all inputs, it means that Def only comes to Phi from + // unreachable blocks. + if (!InsertPt) + return nullptr; + + auto *DefI = dyn_cast<Instruction>(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); +} + +WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards, bool UsePostIncrementRanges) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), + DeadInsts(DI) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; +} + +Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, + bool IsSigned, Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} + +/// Instantiate a wide operation to replace a narrow operation. This only needs +/// to handle operations that can evaluation to SCEVAddRec. It can safely return +/// 0 for any operation we decide not to clone. +Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + unsigned Opcode = DU.NarrowUse->getOpcode(); + switch (Opcode) { + default: + return nullptr; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + return cloneArithmeticIVUser(DU, WideAR); + + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return cloneBitwiseIVUser(DU); + } +} + +Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything + // about the narrow operand yet so must insert a [sz]ext. It is probably loop + // invariant and will be folded or hoisted. If it actually comes from a + // widened IV, it should be removed during a future call to widenIVUse. + bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + IsSigned, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + IsSigned, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; + + // We're trying to find X such that + // + // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X + // + // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), + // and check using SCEV if any of them are correct. + + // Returns true if extending NonIVNarrowDef according to `SignExt` is a + // correct solution to X. + auto GuessNonIVOperand = [&](bool SignExt) { + const SCEV *WideLHS; + const SCEV *WideRHS; + + auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { + if (SignExt) + return SE->getSignExtendExpr(S, Ty); + return SE->getZeroExtendExpr(S, Ty); + }; + + if (IVOpIdx == 0) { + WideLHS = SE->getSCEV(WideDef); + const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); + WideRHS = GetExtend(NarrowRHS, WideType); + } else { + const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); + WideLHS = GetExtend(NarrowLHS, WideType); + WideRHS = SE->getSCEV(WideDef); + } + + // WideUse is "WideDef `op.wide` X" as described in the comment. + const SCEV *WideUse = + getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); + + return WideUse == WideAR; + }; + + bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + if (!GuessNonIVOperand(SignExtend)) { + SignExtend = !SignExtend; + if (!GuessNonIVOperand(SignExtend)) + return nullptr; + } + + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + SignExtend, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + SignExtend, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { + auto It = ExtendKindMap.find(I); + assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); + return It->second; +} + +const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const { + switch (OpCode) { + case Instruction::Add: + return SE->getAddExpr(LHS, RHS); + case Instruction::Sub: + return SE->getMinusSCEV(LHS, RHS); + case Instruction::Mul: + return SE->getMulExpr(LHS, RHS); + case Instruction::UDiv: + return SE->getUDivExpr(LHS, RHS); + default: + llvm_unreachable("Unsupported opcode."); + }; +} + +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return the AddRec and the kind of +/// extension used. +WidenIV::WidenedRecTy +WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = DU.NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions supported yet. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return {nullptr, Unknown}; + + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + const unsigned ExtendOperIdx = + DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = nullptr; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + ExtendKind ExtKind = getExtendKind(DU.NarrowDef); + if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return {nullptr, Unknown}; + + // When creating this SCEV expr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. + const SCEV *lhs = SE->getSCEV(DU.WideDef); + const SCEV *rhs = ExtendOperExpr; + + // Let's swap operands to the initial order for the case of non-commutative + // operations, like SUB. See PR21014. + if (ExtendOperIdx == 0) + std::swap(lhs, rhs); + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); + + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + + return {AddRec, ExtKind}; +} + +/// Is this instruction potentially interesting for further simplification after +/// widening it's type? In other words, can the extend be safely hoisted out of +/// the loop with SCEV reducing the value to a recurrence on the same loop. If +/// so, return the extended recurrence and the kind of extension used. Otherwise +/// return {nullptr, Unknown}. +WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { + if (!SE->isSCEVable(DU.NarrowUse->getType())) + return {nullptr, Unknown}; + + const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= + SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return {nullptr, Unknown}; + } + + const SCEV *WideExpr; + ExtendKind ExtKind; + if (DU.NeverNegative) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + if (isa<SCEVAddRecExpr>(WideExpr)) + ExtKind = SignExtended; + else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); + ExtKind = SignExtended; + } else { + WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); + ExtKind = ZeroExtended; + } + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return {nullptr, Unknown}; + return {AddRec, ExtKind}; +} + +/// This IV user cannot be widened. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, + LoopInfo *LI) { + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return; + LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " + << *DU.NarrowUse << "\n"); + IRBuilder<> Builder(InsertPt); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + +/// If the narrow use is a compare instruction, then widen the compare +// (and possibly the other operand). The extend operation is hoisted into the +// loop preheader as far as possible. +bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { + ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); + if (!Cmp) + return false; + + // We can legally widen the comparison in the following two cases: + // + // - The signedness of the IV extension and comparison match + // + // - The narrow IV is always positive (and thus its sign extension is equal + // to its zero extension). For instance, let's say we're zero extending + // %narrow for the following use + // + // icmp slt i32 %narrow, %val ... (A) + // + // and %narrow is always positive. Then + // + // (A) == icmp slt i32 sext(%narrow), sext(%val) + // == icmp slt i32 zext(%narrow), sext(%val) + bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) + return false; + + Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); + unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); + + // Widen the compare instruction. + auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); + if (!InsertPt) + return false; + IRBuilder<> Builder(InsertPt); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + + // Widen the other operand of the compare, if necessary. + if (CastWidth < IVWidth) { + Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); + DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); + } + return true; +} + +// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this +// will not work when: +// 1) SCEV traces back to an instruction inside the loop that SCEV can not +// expand, eg. add %indvar, (load %addr) +// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant +// While SCEV fails to avoid trunc, we can still try to use instruction +// combining approach to prove trunc is not required. This can be further +// extended with other instruction combining checks, but for now we handle the +// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") +// +// Src: +// %c = sub nsw %b, %indvar +// %d = sext %c to i64 +// Dst: +// %indvar.ext1 = sext %indvar to i64 +// %m = sext %b to i64 +// %d = sub nsw i64 %m, %indvar.ext1 +// Therefore, as long as the result of add/sub/mul is extended to wide type, no +// trunc is required regardless of how %b is generated. This pattern is common +// when calculating address in 64 bit architecture +bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + // Handle the common case of add<nsw/nuw> + const unsigned OpCode = NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions are supported. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return false; + + // The operand that is not defined by NarrowDef of DU. Let's call it the + // other operand. + assert((NarrowUse->getOperand(0) == NarrowDef || + NarrowUse->getOperand(1) == NarrowDef) && + "bad DU"); + + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(NarrowUse); + ExtendKind ExtKind = getExtendKind(NarrowDef); + bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + auto AnotherOpExtKind = ExtKind; + + // Check that all uses are either: + // - narrow def (in case of we are widening the IV increment); + // - single-input LCSSA Phis; + // - comparison of the chosen type; + // - extend of the chosen type (raison d'etre). + SmallVector<Instruction *, 4> ExtUsers; + SmallVector<PHINode *, 4> LCSSAPhiUsers; + SmallVector<ICmpInst *, 4> ICmpUsers; + for (Use &U : NarrowUse->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + if (User == NarrowDef) + continue; + if (!L->contains(User)) { + auto *LCSSAPhi = cast<PHINode>(User); + // Make sure there is only 1 input, so that we don't have to split + // critical edges. + if (LCSSAPhi->getNumOperands() != 1) + return false; + LCSSAPhiUsers.push_back(LCSSAPhi); + continue; + } + if (auto *ICmp = dyn_cast<ICmpInst>(User)) { + auto Pred = ICmp->getPredicate(); + // We have 3 types of predicates: signed, unsigned and equality + // predicates. For equality, it's legal to widen icmp for either sign and + // zero extend. For sign extend, we can also do so for signed predicates, + // likeweise for zero extend we can widen icmp for unsigned predicates. + if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + return false; + if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + return false; + ICmpUsers.push_back(ICmp); + continue; + } + if (ExtKind == SignExtended) + User = dyn_cast<SExtInst>(User); + else + User = dyn_cast<ZExtInst>(User); + if (!User || User->getType() != WideType) + return false; + ExtUsers.push_back(User); + } + if (ExtUsers.empty()) { + DeadInsts.emplace_back(NarrowUse); + return true; + } + + // We'll prove some facts that should be true in the context of ext users. If + // there is no users, we are done now. If there are some, pick their common + // dominator as context. + Instruction *Context = nullptr; + for (auto *Ext : ExtUsers) { + if (!Context || DT->dominates(Ext, Context)) + Context = Ext; + else if (!DT->dominates(Context, Ext)) + // For users that don't have dominance relation, use common dominator. + Context = + DT->findNearestCommonDominator(Context->getParent(), Ext->getParent()) + ->getTerminator(); + } + assert(Context && "Context not found?"); + + if (!CanSignExtend && !CanZeroExtend) { + // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we + // will most likely not see it. Let's try to prove it. + if (OpCode != Instruction::Add) + return false; + if (ExtKind != ZeroExtended) + return false; + const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); + // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). + if (NarrowUse->getOperand(0) != NarrowDef) + return false; + if (!SE->isKnownNegative(RHS)) + return false; + bool ProvedSubNUW = SE->isKnownPredicateAt( + ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context); + if (!ProvedSubNUW) + return false; + // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as + // neg(zext(neg(op))), which is basically sext(op). + AnotherOpExtKind = SignExtended; + } + + // Verifying that Defining operand is an AddRec + const SCEV *Op1 = SE->getSCEV(WideDef); + const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); + if (!AddRecOp1 || AddRecOp1->getLoop() != L) + return false; + + LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + // Generating a widening use instruction. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind, NarrowUse); + + auto *NarrowBO = cast<BinaryOperator>(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + ExtendKindMap[NarrowUse] = ExtKind; + + for (Instruction *User : ExtUsers) { + assert(User->getType() == WideType && "Checked before!"); + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " + << *WideBO << "\n"); + ++NumElimExt; + User->replaceAllUsesWith(WideBO); + DeadInsts.emplace_back(User); + } + + for (PHINode *User : LCSSAPhiUsers) { + assert(User->getNumOperands() == 1 && "Checked before!"); + Builder.SetInsertPoint(User); + auto *WidePN = + Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); + BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); + assert(LoopExitingBlock && L->contains(LoopExitingBlock) && + "Not a LCSSA Phi?"); + WidePN->addIncoming(WideBO, LoopExitingBlock); + Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); + auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); + User->replaceAllUsesWith(TruncPN); + DeadInsts.emplace_back(User); + } + + for (ICmpInst *User : ICmpUsers) { + Builder.SetInsertPoint(User); + auto ExtendedOp = [&](Value * V)->Value * { + if (V == NarrowUse) + return WideBO; + if (ExtKind == ZeroExtended) + return Builder.CreateZExt(V, WideBO->getType()); + else + return Builder.CreateSExt(V, WideBO->getType()); + }; + auto Pred = User->getPredicate(); + auto *LHS = ExtendedOp(User->getOperand(0)); + auto *RHS = ExtendedOp(User->getOperand(1)); + auto *WideCmp = + Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); + User->replaceAllUsesWith(WideCmp); + DeadInsts.emplace_back(User); + } + + return true; +} + +/// Determine whether an individual user of the narrow IV can be widened. If so, +/// return the wide clone of the user. +Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { + assert(ExtendKindMap.count(DU.NarrowDef) && + "Should already know the kind of extension used to widen NarrowDef"); + + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { + if (LI->getLoopFor(UsePhi->getParent()) != L) { + // For LCSSA phis, sink the truncate outside the loop. + // After SimplifyCFG most loop exit targets have a single predecessor. + // Otherwise fall back to a truncate within the loop. + if (UsePhi->getNumOperands() != 1) + truncateIVUse(DU, DT, LI); + else { + // Widening the PHI requires us to insert a trunc. The logical place + // for this trunc is in the same BB as the PHI. This is not possible if + // the BB is terminated by a catchswitch. + if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) + return nullptr; + + PHINode *WidePhi = + PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", + UsePhi); + WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); + IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); + Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); + UsePhi->replaceAllUsesWith(Trunc); + DeadInsts.emplace_back(UsePhi); + LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " + << *WidePhi << "\n"); + } + return nullptr; + } + } + + // This narrow use can be widened by a sext if it's non-negative or its narrow + // def was widended by a sext. Same for zext. + auto canWidenBySExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + }; + auto canWidenByZExt = [&]() { + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + }; + + // Our raison d'etre! Eliminate sign and zero extension. + if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || + (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *DU.NarrowUse + << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; + } + } + if (NewDef != DU.NarrowUse) { + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); + ++NumElimExt; + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.emplace_back(DU.NarrowUse); + } + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. + + // No further widening is needed. The deceased [sz]ext had done it for us. + return nullptr; + } + + // Does this user itself evaluate to a recurrence after widening? + WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); + if (!WideAddRec.first) + WideAddRec = getWideRecurrence(DU); + + assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + if (!WideAddRec.first) { + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (widenLoopCompare(DU)) + return nullptr; + + // We are here about to generate a truncate instruction that may hurt + // performance because the scalar evolution expression computed earlier + // in WideAddRec.first does not indicate a polynomial induction expression. + // In that case, look at the operands of the use instruction to determine + // if we can still widen the use instead of truncating its operand. + if (widenWithVariantUse(DU)) + return nullptr; + + // This user does not evaluate to a recurrence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + truncateIVUse(DU, DT, LI); + return nullptr; + } + // Assume block terminators cannot evaluate to a recurrence. We can't to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = nullptr; + if (WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) + WideUse = WideInc; + else { + WideUse = cloneIVUser(DU, WideAddRec.first); + if (!WideUse) + return nullptr; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec.first != SE->getSCEV(WideUse)) { + LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " + << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first + << "\n"); + DeadInsts.emplace_back(WideUse); + return nullptr; + } + + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// Add eligible users of NarrowDef to NarrowIVUsers. +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + bool NonNegativeDef = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, + SE->getZero(NarrowSCEV->getType())); + for (User *U : NarrowDef->users()) { + Instruction *NarrowUser = cast<Instruction>(U); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(NarrowUser).second) + continue; + + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context. + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); + } +} + +/// Process a single induction variable. First use the SCEVExpander to create a +/// wide induction variable that evaluates to the same recurrence as the +/// original narrow IV. Then use a worklist to forward traverse the narrow IV's +/// def-use chain. After widenIVUse has processed all interesting IV users, the +/// narrow IV will be isolated for removal by DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return nullptr; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + ? SE->getSignExtendExpr(AddRec, WideType) + : SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return nullptr; + + // An AddRec must have loop-invariant operands. Since this AddRec is + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert( + SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && + "Loop header phi recurrence inputs do not dominate the loop"); + + // Iterate over IV uses (including transitive ones) looking for IV increments + // of the form 'add nsw %iv, <const>'. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + if (UsePostIncrementRanges) + calculatePostIncRanges(OrigPhi); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); + Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); + // If the wide phi is not a phi node, for example a cast node, like bitcast, + // inttoptr, ptrtoint, just skip for now. + if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { + // if the cast node is an inserted instruction without any user, we should + // remove it to make sure the pass don't touch the function as we can not + // wide the phi. + if (ExpandInst->hasNUses(0) && + Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) + DeadInsts.emplace_back(ExpandInst); + return nullptr; + } + + // Remembering the WideIV increment generated by SCEVExpander allows + // widenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + // Propagate the debug location associated with the original loop increment + // to the new (widened) increment. + auto *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + WideInc->setDebugLoc(OrigInc->getDebugLoc()); + } + + LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); + + while (!NarrowIVUsers.empty()) { + WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *WideUse = widenIVUse(DU, Rewriter); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) + pushNarrowIVUsers(DU.NarrowUse, WideUse); + + // widenIVUse may have removed the def-use edge. + if (DU.NarrowDef->use_empty()) + DeadInsts.emplace_back(DU.NarrowDef); + } + + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + + return WidePhi; +} + +/// Calculates control-dependent range for the given def at the given context +/// by looking at dominating conditions inside of the loop +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition, + bool TrueDest) { + CmpInst::Predicate Pred; + Value *CmpRHS; + if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + CmpInst::Predicate P = + TrueDest ? Pred : CmpInst::getInversePredicate(Pred); + + auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + auto CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (Instruction &I : make_range(Ctx->getIterator().getReverse(), + Ctx->getParent()->rend())) { + Value *C = nullptr; + if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) + UpdateRangeFromCondition(C, /*TrueDest=*/true); + } + }; + + UpdateRangeFromGuards(NarrowUser); + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + // If NarrowUserBB is statically unreachable asking dominator queries may + // yield surprising results. (e.g. the block may not have a dom tree node) + if (!DT->isReachableFromEntry(NarrowUserBB)) + return; + + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto *BB = DTB->getBlock(); + auto *TI = BB->getTerminator(); + UpdateRangeFromGuards(TI); + + auto *BI = dyn_cast<BranchInst>(TI); + if (!BI || !BI->isConditional()) + continue; + + auto *TrueSuccessor = BI->getSuccessor(0); + auto *FalseSuccessor = BI->getSuccessor(1); + + auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { + return BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()); + }; + + if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); + + if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); + } +} + +/// Calculates PostIncRangeInfos map for the given IV +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + SmallPtrSet<Instruction *, 16> Visited; + SmallVector<Instruction *, 6> Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast<Instruction>(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +} + +PHINode *llvm::createWideIV(const WideIVInfo &WI, + LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, + unsigned &NumElimExt, unsigned &NumWidened, + bool HasGuards, bool UsePostIncrementRanges) { + WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); + PHINode *WidePHI = Widener.createWideIV(Rewriter); + NumElimExt = Widener.getNumElimExt(); + NumWidened = Widener.getNumWidened(); + return WidePHI; +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index cfcc3454a210..f9a9dd237b6c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/CaptureTracking.h" @@ -542,6 +541,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) { B.CreateMemCpy(Dst, Align(1), Src, Align(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return Dst; } @@ -569,6 +570,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) { // copy for us. Make a memcpy to copy the nul byte with align = 1. CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return DstEnd; } @@ -609,15 +612,27 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { return Dst; } - // Let strncpy handle the zero padding - if (Len > SrcLen + 1) - return nullptr; + // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4) + if (Len > SrcLen + 1) { + if (Len <= 128) { + StringRef Str; + if (!getConstantStringInfo(Src, Str)) + return nullptr; + std::string SrcStr = Str.str(); + SrcStr.resize(Len, '\0'); + Src = B.CreateGlobalString(SrcStr, "str"); + } else { + return nullptr; + } + } Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), ConstantInt::get(DL.getIntPtrType(PT), Len)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return Dst; } @@ -684,8 +699,6 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, Offset); } } - - return nullptr; } // strlen(x?"foo":"bars") --> x ? 3 : 4 @@ -1095,6 +1108,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) { CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1143,7 +1158,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) { // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N); + // Propagate attributes, but memcpy has no return value, so make sure that + // any return attributes are compliant. + // TODO: Attach return value attributes to the 1st operand to preserve them? NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); } @@ -1157,6 +1177,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) { CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), Size); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1217,6 +1239,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } @@ -1629,6 +1653,14 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc())) return nullptr; + // If we have a pow() library call (accesses memory) and we can't guarantee + // that the base is not an infinity, give up: + // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting + // errno), but sqrt(-Inf) is required by various standards to set errno. + if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() && + !isKnownNeverInfinity(Base, TLI)) + return nullptr; + Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); if (!Sqrt) return nullptr; @@ -1715,7 +1747,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF))) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1741,6 +1774,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), Pow->doesNotAccessMemory(), M, B, TLI); + if (!Sqrt) + return nullptr; } // We will memoize intermediate products of the Addition Chain. @@ -2164,7 +2199,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); // It's only worthwhile if both sinpi and cospi are actually used. - if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + if (SinCalls.empty() || CosCalls.empty()) return nullptr; Value *Sin, *Cos, *SinCos; @@ -2190,7 +2225,7 @@ void LibCallSimplifier::classifyArgUse( SmallVectorImpl<CallInst *> &SinCosCalls) { CallInst *CI = dyn_cast<CallInst>(Val); - if (!CI) + if (!CI || CI->use_empty()) return; // Don't consider calls in other functions. @@ -2487,6 +2522,30 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; + if (CI->use_empty()) + // sprintf(dest, "%s", str) -> strcpy(dest, str) + return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI); + + uint64_t SrcLen = GetStringLength(CI->getArgOperand(2)); + if (SrcLen) { + B.CreateMemCpy( + CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen)); + // Returns total number of characters written without null-character. + return ConstantInt::get(CI->getType(), SrcLen - 1); + } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2), + B, TLI)) { + // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest + Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0)); + return B.CreateIntCast(PtrDiff, CI->getType(), false); + } + + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + if (OptForSize) + return nullptr; + Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) return nullptr; @@ -3219,6 +3278,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; @@ -3231,6 +3292,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1), CI->getArgOperand(2)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; @@ -3245,11 +3308,29 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), Align(1)); NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return CI->getArgOperand(0); } return nullptr; } +Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI, + IRBuilderBase &B) { + const DataLayout &DL = CI->getModule()->getDataLayout(); + if (isFortifiedCallFoldable(CI, 3, 2)) + if (Value *Call = emitMemPCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, DL, TLI)) { + CallInst *NewCI = cast<CallInst>(Call); + NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes( + AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); + return NewCI; + } + return nullptr; +} + Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func) { @@ -3330,7 +3411,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { - SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end()); + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5)); return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4), VariadicArgs, B, TLI); } @@ -3341,7 +3422,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, IRBuilderBase &B) { if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { - SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end()); + SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4)); return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, B, TLI); } @@ -3439,6 +3520,8 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI, switch (Func) { case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); + case LibFunc_mempcpy_chk: + return optimizeMemPCpyChk(CI, Builder); case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); case LibFunc_memset_chk: diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp index e257c5a015f5..beeb60698f04 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -43,11 +43,6 @@ cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO( cl::desc("Apply the profile guided size optimizations only " "to cold code under partial-profile sample PGO.")); -cl::opt<bool> PGSOIRPassOrTestOnly( - "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false), - cl::desc("Apply the profile guided size optimizations only" - "to the IR passes or tests.")); - cl::opt<bool> ForcePGSO( "force-pgso", cl::Hidden, cl::init(false), cl::desc("Force the (profiled-guided) size optimizations. ")); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index b559811d120b..1fa574f04c37 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -13,6 +13,7 @@ // present. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -24,22 +25,7 @@ using namespace llvm; -namespace { -struct StripGCRelocates : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - StripGCRelocates() : FunctionPass(ID) { - initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override {} - - bool runOnFunction(Function &F) override; - -}; -char StripGCRelocates::ID = 0; -} - -bool StripGCRelocates::runOnFunction(Function &F) { +static bool stripGCRelocates(Function &F) { // Nothing to do for declarations. if (F.isDeclaration()) return false; @@ -71,6 +57,32 @@ bool StripGCRelocates::runOnFunction(Function &F) { return !GCRelocates.empty(); } -INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates", +PreservedAnalyses StripGCRelocates::run(Function &F, + FunctionAnalysisManager &AM) { + if (!stripGCRelocates(F)) + return PreservedAnalyses::all(); + + // Removing gc.relocate preserves the CFG, but most other analysis probably + // need to re-run. + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +namespace { +struct StripGCRelocatesLegacy : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + StripGCRelocatesLegacy() : FunctionPass(ID) { + initializeStripGCRelocatesLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const override {} + + bool runOnFunction(Function &F) override { return ::stripGCRelocates(F); } +}; +char StripGCRelocatesLegacy::ID = 0; +} // namespace + +INITIALIZE_PASS(StripGCRelocatesLegacy, "strip-gc-relocates", "Strip gc.relocates inserted through RewriteStatepointsForGC", true, false) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 21cbbfb140b6..10fda4df51ba 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -17,10 +18,11 @@ namespace { /// This pass strips all debug info that is not related line tables. /// The result will be the same as if the program where compiled with /// -gline-tables-only. -struct StripNonLineTableDebugInfo : public ModulePass { +struct StripNonLineTableDebugLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid - StripNonLineTableDebugInfo() : ModulePass(ID) { - initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry()); + StripNonLineTableDebugLegacyPass() : ModulePass(ID) { + initializeStripNonLineTableDebugLegacyPassPass( + *PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -33,10 +35,17 @@ struct StripNonLineTableDebugInfo : public ModulePass { }; } -char StripNonLineTableDebugInfo::ID = 0; -INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo", +char StripNonLineTableDebugLegacyPass::ID = 0; +INITIALIZE_PASS(StripNonLineTableDebugLegacyPass, + "strip-nonlinetable-debuginfo", "Strip all debug info except linetables", false, false) -ModulePass *llvm::createStripNonLineTableDebugInfoPass() { - return new StripNonLineTableDebugInfo(); +ModulePass *llvm::createStripNonLineTableDebugLegacyPass() { + return new StripNonLineTableDebugLegacyPass(); +} + +PreservedAnalyses +StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) { + llvm::stripNonLineTableDebugInfo(M); + return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9af39d9a0dd1..3631733713ab 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This pass is used to ensure that functions have at most one return -// instruction in them. Additionally, it keeps track of which node is the new -// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode -// method will return a null pointer. +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -22,73 +20,66 @@ #include "llvm/Transforms/Utils.h" using namespace llvm; -char UnifyFunctionExitNodes::ID = 0; +char UnifyFunctionExitNodesLegacyPass::ID = 0; -UnifyFunctionExitNodes::UnifyFunctionExitNodes() : FunctionPass(ID) { - initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry()); +UnifyFunctionExitNodesLegacyPass::UnifyFunctionExitNodesLegacyPass() + : FunctionPass(ID) { + initializeUnifyFunctionExitNodesLegacyPassPass( + *PassRegistry::getPassRegistry()); } -INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", +INITIALIZE_PASS(UnifyFunctionExitNodesLegacyPass, "mergereturn", "Unify function exit nodes", false, false) Pass *llvm::createUnifyFunctionExitNodesPass() { - return new UnifyFunctionExitNodes(); + return new UnifyFunctionExitNodesLegacyPass(); } -void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ +void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms AU.addPreservedID(LowerSwitchID); } -// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new -// BasicBlock, and converting all returns to unconditional branches to this -// new basic block. The singular exit node is returned. -// -// If there are no return stmts in the Function, a null pointer is returned. -// -bool UnifyFunctionExitNodes::runOnFunction(Function &F) { - // Loop over all of the blocks in a function, tracking all of the blocks that - // return. - // - std::vector<BasicBlock*> ReturningBlocks; - std::vector<BasicBlock*> UnreachableBlocks; +namespace { + +bool unifyUnreachableBlocks(Function &F) { + std::vector<BasicBlock *> UnreachableBlocks; + for (BasicBlock &I : F) - if (isa<ReturnInst>(I.getTerminator())) - ReturningBlocks.push_back(&I); - else if (isa<UnreachableInst>(I.getTerminator())) + if (isa<UnreachableInst>(I.getTerminator())) UnreachableBlocks.push_back(&I); - // Then unreachable blocks. - if (UnreachableBlocks.empty()) { - UnreachableBlock = nullptr; - } else if (UnreachableBlocks.size() == 1) { - UnreachableBlock = UnreachableBlocks.front(); - } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), - "UnifiedUnreachableBlock", &F); - new UnreachableInst(F.getContext(), UnreachableBlock); - - for (BasicBlock *BB : UnreachableBlocks) { - BB->getInstList().pop_back(); // Remove the unreachable inst. - BranchInst::Create(UnreachableBlock, BB); - } + if (UnreachableBlocks.size() <= 1) + return false; + + BasicBlock *UnreachableBlock = + BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); + + for (BasicBlock *BB : UnreachableBlocks) { + BB->getInstList().pop_back(); // Remove the unreachable inst. + BranchInst::Create(UnreachableBlock, BB); } - // Now handle return blocks. - if (ReturningBlocks.empty()) { - ReturnBlock = nullptr; - return false; // No blocks return - } else if (ReturningBlocks.size() == 1) { - ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return true; +} + +bool unifyReturnBlocks(Function &F) { + std::vector<BasicBlock *> ReturningBlocks; + + for (BasicBlock &I : F) + if (isa<ReturnInst>(I.getTerminator())) + ReturningBlocks.push_back(&I); + + if (ReturningBlocks.size() <= 1) return false; - } - // Otherwise, we need to insert a new basic block into the function, add a PHI - // nodes (if the function returns values), and convert all of the return - // instructions into unconditional branches. - // + // Insert a new basic block into the function, add PHI nodes (if the function + // returns values), and convert all of the return instructions into + // unconditional branches. BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); @@ -105,7 +96,6 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. - // for (BasicBlock *BB : ReturningBlocks) { // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... @@ -115,6 +105,25 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } - ReturnBlock = NewRetBlock; + return true; } +} // namespace + +// Unify all exit nodes of the CFG by creating a new BasicBlock, and converting +// all returns to unconditional branches to this new basic block. Also, unify +// all unreachable blocks. +bool UnifyFunctionExitNodesLegacyPass::runOnFunction(Function &F) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed; +} + +PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + Changed |= unifyUnreachableBlocks(F); + Changed |= unifyReturnBlocks(F); + return Changed ? PreservedAnalyses() : PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index b10deee3907c..0b718ed6136e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -16,6 +16,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/UnifyLoopExits.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/InitializePasses.h" @@ -27,10 +29,10 @@ using namespace llvm; namespace { -struct UnifyLoopExits : public FunctionPass { +struct UnifyLoopExitsLegacyPass : public FunctionPass { static char ID; - UnifyLoopExits() : FunctionPass(ID) { - initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry()); + UnifyLoopExitsLegacyPass() : FunctionPass(ID) { + initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -46,17 +48,19 @@ struct UnifyLoopExits : public FunctionPass { }; } // namespace -char UnifyLoopExits::ID = 0; +char UnifyLoopExitsLegacyPass::ID = 0; -FunctionPass *llvm::createUnifyLoopExitsPass() { return new UnifyLoopExits(); } +FunctionPass *llvm::createUnifyLoopExitsPass() { + return new UnifyLoopExitsLegacyPass(); +} -INITIALIZE_PASS_BEGIN(UnifyLoopExits, "unify-loop-exits", +INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitch) +INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(UnifyLoopExits, "unify-loop-exits", +INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) @@ -80,7 +84,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L, const SetVector<BasicBlock *> &Incoming, BasicBlock *LoopExitBlock) { using InstVector = SmallVector<Instruction *, 8>; - using IIMap = DenseMap<Instruction *, InstVector>; + using IIMap = MapVector<Instruction *, InstVector>; IIMap ExternalUsers; for (auto BB : L->blocks()) { for (auto &I : *BB) { @@ -203,11 +207,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { return true; } -bool UnifyLoopExits::runOnFunction(Function &F) { - LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() - << "\n"); - auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +static bool runImpl(LoopInfo &LI, DominatorTree &DT) { bool Changed = false; auto Loops = LI.getLoopsInPreorder(); @@ -218,3 +218,28 @@ bool UnifyLoopExits::runOnFunction(Function &F) { } return Changed; } + +bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName() + << "\n"); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + return runImpl(LI, DT); +} + +namespace llvm { + +PreservedAnalyses UnifyLoopExitsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + + if (!runImpl(LI, DT)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<LoopAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp index 5b58548e54dc..c57cec6be676 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp @@ -13,8 +13,11 @@ #include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h" #include "llvm/ADT/SmallString.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MD5.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -27,13 +30,31 @@ static bool uniqueifyInternalLinkageNames(Module &M) { Md5.final(R); SmallString<32> Str; llvm::MD5::stringifyResult(R, Str); - std::string ModuleNameHash = (Twine(".") + Twine(Str)).str(); + // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers + // or characters but not both. + APInt IntHash = APInt(128, Str.str(), 16); + // Prepend "__uniq" before the hash for tools like profilers to understand that + // this symbol is of internal linkage type. + std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str(); bool Changed = false; + MDBuilder MDB(M.getContext()); // Append the module hash to all internal linkage functions. for (auto &F : M) { if (F.hasInternalLinkage()) { F.setName(F.getName() + ModuleNameHash); + F.addFnAttr("sample-profile-suffix-elision-policy", "selected"); + // Replace linkage names in the debug metadata. + if (DISubprogram *SP = F.getSubprogram()) { + if (SP->getRawLinkageName()) { + auto *Name = MDB.createString(F.getName()); + SP->replaceRawLinkageName(Name); + if (DISubprogram *SPDecl = SP->getDeclaration()) { + if (SPDecl->getRawLinkageName()) + SPDecl->replaceRawLinkageName(Name); + } + } + } Changed = true; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp index ce98a739bea8..73c0532f3fd5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp @@ -34,17 +34,17 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLibCallsShrinkWrapLegacyPassPass(Registry); initializeLoopSimplifyPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); - initializeLowerSwitchPass(Registry); + initializeLowerSwitchLegacyPassPass(Registry); initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); - initializeStripNonLineTableDebugInfoPass(Registry); - initializeUnifyFunctionExitNodesPass(Registry); + initializeStripNonLineTableDebugLegacyPassPass(Registry); + initializeUnifyFunctionExitNodesLegacyPassPass(Registry); initializeMetaRenamerPass(Registry); - initializeStripGCRelocatesPass(Registry); + initializeStripGCRelocatesLegacyPass(Registry); initializePredicateInfoPrinterLegacyPassPass(Registry); initializeInjectTLIMappingsLegacyPass(Registry); initializeFixIrreduciblePass(Registry); - initializeUnifyLoopExitsPass(Registry); + initializeUnifyLoopExitsLegacyPassPass(Registry); initializeUniqueInternalLinkageNamesLegacyPassPass(Registry); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp index 6ff08cd28712..61cd8595a73b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) { bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { Type *StoredTy = StoredVal->getType(); + if (StoredTy == LoadTy) return true; @@ -36,17 +37,29 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize()) return false; + bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); + bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { + if (StoredNI != LoadNI) { // As a special case, allow coercion of memset used to initialize // an array w/null. Despite non-integral pointers not generally having a // specific bit pattern, we do assume null is zero. if (auto *CI = dyn_cast<Constant>(StoredVal)) return CI->isNullValue(); return false; + } else if (StoredNI && LoadNI && + StoredTy->getPointerAddressSpace() != + LoadTy->getPointerAddressSpace()) { + return false; } - + + + // The implementation below uses inttoptr for vectors of unequal size; we + // can't allow this for non integral pointers. We could teach it to extract + // exact subvectors if desired. + if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) + return false; + return true; } @@ -223,14 +236,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, if (isFirstClassAggregateOrScalableType(StoredVal->getType())) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // Allow casts of zero values to null as a special case - auto *CI = dyn_cast<Constant>(StoredVal); - if (!CI || !CI->isNullValue()) - return -1; - } + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) + return -1; Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = @@ -333,9 +340,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) + if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) return -1; Value *DepPtr = DepLI->getPointerOperand(); @@ -393,7 +398,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (!Src) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); + GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src)); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return -1; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index f1b3fe8e2fa9..930e0b7ee01a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -167,12 +167,9 @@ public: void flush(); private: - void mapGlobalInitializer(GlobalVariable &GV, Constant &Init); void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, bool IsOldCtorDtor, ArrayRef<Constant *> NewMembers); - void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target); - void remapFunction(Function &F, ValueToValueMapTy &VM); ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; } @@ -822,11 +819,15 @@ void Mapper::flush() { break; case WorklistEntry::MapAppendingVar: { unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; + // mapAppendingVariable call can change AppendingInits if initalizer for + // the variable depends on another appending global, because of that inits + // need to be extracted and updated before the call. + SmallVector<Constant *, 8> NewInits( + drop_begin(AppendingInits, PrefixSize)); + AppendingInits.resize(PrefixSize); mapAppendingVariable(*E.Data.AppendingGV.GV, E.Data.AppendingGV.InitPrefix, - E.AppendingGVIsOldCtorDtor, - makeArrayRef(AppendingInits).slice(PrefixSize)); - AppendingInits.resize(PrefixSize); + E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits)); break; } case WorklistEntry::MapGlobalIndirectSymbol: @@ -900,14 +901,13 @@ void Mapper::remapInstruction(Instruction *I) { LLVMContext &C = CB->getContext(); AttributeList Attrs = CB->getAttributes(); for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { - if (Attrs.hasAttribute(i, Attribute::ByVal)) { - Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType(); - if (!Ty) - continue; - - Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal); - Attrs = Attrs.addAttribute( - C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty))); + for (Attribute::AttrKind TypedAttr : + {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) { + if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) { + Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, + TypeMapper->remapType(Ty)); + break; + } } } CB->setAttributes(Attrs); |