diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils')
| -rw-r--r-- | llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 71 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/CloneModule.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/GuardUtils.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/Local.cpp | 57 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/LoopUtils.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SSAUpdater.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SampleProfileInference.cpp | 462 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 68 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 52 |
13 files changed, 626 insertions, 140 deletions
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 6469c899feea..d6d6b1a7fa09 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -235,22 +235,26 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { - SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB)); + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenSuccs; SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB), succ_end(PredBB)); - Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1); + Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (BasicBlock *SuccOfBB : SuccsOfBB) + for (BasicBlock *SuccOfBB : successors(BB)) // This successor of BB may already be a PredBB's successor. if (!SuccsOfPredBB.contains(SuccOfBB)) - Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); - for (BasicBlock *SuccOfBB : SuccsOfBB) - Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); + SeenSuccs.clear(); + for (BasicBlock *SuccOfBB : successors(BB)) + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -804,14 +808,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, if (DTU) { SmallVector<DominatorTree::UpdateType, 8> Updates; // Old dominates New. New node dominates all other nodes dominated by Old. - SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), - succ_end(New)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld; Updates.push_back({DominatorTree::Insert, Old, New}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); - for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { - Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); - Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); - } + Updates.reserve(Updates.size() + 2 * succ_size(New)); + for (BasicBlock *SuccessorOfOld : successors(New)) + if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) { + Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld}); + } DTU->applyUpdates(Updates); } else if (DT) @@ -870,14 +874,14 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, SmallVector<DominatorTree::UpdateType, 8> DTUpdates; // New dominates Old. The predecessor nodes of the Old node dominate // New node. - SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), - pred_end(New)); + SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld; DTUpdates.push_back({DominatorTree::Insert, New, Old}); - DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); - for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { - DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); - DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); - } + DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New)); + for (BasicBlock *PredecessorOfOld : predecessors(New)) + if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) { + DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old}); + } DTU->applyUpdates(DTUpdates); @@ -910,13 +914,14 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } else { // Split block expects NewBB to have a non-empty set of predecessors. SmallVector<DominatorTree::UpdateType, 8> Updates; - SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); + SmallPtrSet<BasicBlock *, 8> UniquePreds; Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); - Updates.reserve(Updates.size() + 2 * UniquePreds.size()); - for (auto *UniquePred : UniquePreds) { - Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); - Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); - } + Updates.reserve(Updates.size() + 2 * Preds.size()); + for (auto *Pred : Preds) + if (UniquePreds.insert(Pred).second) { + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::Delete, Pred, OldBB}); + } DTU->applyUpdates(Updates); } } else if (DT) { @@ -1376,14 +1381,14 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); if (DTU) { - SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), - succ_end(Tail)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead; Updates.push_back({DominatorTree::Insert, Head, Tail}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); - for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { - Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); - Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); - } + Updates.reserve(Updates.size() + 2 * succ_size(Tail)); + for (BasicBlock *SuccessorOfHead : successors(Tail)) + if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) { + Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead}); + } } Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 957935398972..580cfd80141e 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -452,18 +452,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setWillReturn(F); + LLVM_FALLTHROUGH; + case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc_memcpy_chk: - Changed |= setDoesNotThrow(F); - return Changed; case LibFunc_memalign: Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetNoUndef(F); @@ -1018,9 +1017,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; - // TODO: add LibFunc entries for: - // case LibFunc_memset_pattern4: - // case LibFunc_memset_pattern8: + case LibFunc_memset_pattern4: + case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -1029,10 +1027,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memset: - Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); - Changed |= setDoesNotThrow(F); + LLVM_FALLTHROUGH; + case LibFunc_memset_chk: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotThrow(F); return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp index 200deca4b317..57c273a0e3c5 100644 --- a/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -135,10 +135,18 @@ std::unique_ptr<Module> llvm::CloneModule( // Similarly, copy over function bodies now... // for (const Function &I : M) { - if (I.isDeclaration()) + Function *F = cast<Function>(VMap[&I]); + + if (I.isDeclaration()) { + // Copy over metadata for declarations since we're not doing it below in + // CloneFunctionInto(). + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I.getAllMetadata(MDs); + for (auto MD : MDs) + F->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); continue; + } - Function *F = cast<Function>(VMap[&I]); if (!ShouldCloneDefinition(&I)) { // Skip after setting the correct linkage for an external reference. F->setLinkage(GlobalValue::ExternalLinkage); diff --git a/llvm/lib/Transforms/Utils/GuardUtils.cpp b/llvm/lib/Transforms/Utils/GuardUtils.cpp index 4dbcbf80d3da..7c310f16d46e 100644 --- a/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ b/llvm/lib/Transforms/Utils/GuardUtils.cpp @@ -74,7 +74,7 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, {}, {}, nullptr, "widenable_cond"); CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC, "exiplicit_guard_cond")); - assert(isWidenableBranch(CheckBI) && "sanity check"); + assert(isWidenableBranch(CheckBI) && "Branch must be widenable."); } } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f4776589910f..997667810580 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1218,10 +1218,9 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { if (!RI || !isa<CallBase>(RI->getOperand(0))) continue; auto *RetVal = cast<CallBase>(RI->getOperand(0)); - // Sanity check that the cloned RetVal exists and is a call, otherwise we - // cannot add the attributes on the cloned RetVal. - // Simplification during inlining could have transformed the cloned - // instruction. + // Check that the cloned RetVal exists and is a call, otherwise we cannot + // add the attributes on the cloned RetVal. Simplification during inlining + // could have transformed the cloned instruction. auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal)); if (!NewRetVal) continue; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 74ab37fadf36..ec926b1f5a94 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -529,8 +529,8 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( std::function<void(Value *)> AboutToDeleteCallback) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { - auto *I = cast<Instruction>(DeadInsts[S]); - if (!isInstructionTriviallyDead(I)) { + auto *I = dyn_cast<Instruction>(DeadInsts[S]); + if (!I || !isInstructionTriviallyDead(I)) { DeadInsts[S] = nullptr; ++Alive; } @@ -760,15 +760,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { - SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB), - pred_end(PredBB)); - Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 2> SeenPreds; + Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) // This predecessor of PredBB may already have DestBB as a successor. if (PredOfPredBB != PredBB) - Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) - Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); + SeenPreds.clear(); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } @@ -1096,16 +1099,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenPreds; // All predecessors of BB will be moved to Succ. - SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB)); SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ)); - Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1); - for (auto *PredOfBB : PredsOfBB) + Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1); + for (auto *PredOfBB : predecessors(BB)) // This predecessor of BB may already have Succ as a successor. if (!PredsOfSucc.contains(PredOfBB)) - Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); - for (auto *PredOfBB : PredsOfBB) - Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + SeenPreds.clear(); + for (auto *PredOfBB : predecessors(BB)) + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); Updates.push_back({DominatorTree::Delete, BB, Succ}); } @@ -2190,26 +2197,6 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } -void llvm::createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - auto *BB = Switch->getParent(); - auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); - BasicBlock *NewDefaultBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), - OrigDefaultBlock); - new UnreachableInst(Switch->getContext(), NewDefaultBlock); - Switch->setDefaultDest(&*NewDefaultBlock); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) - Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); - DTU->applyUpdates(Updates); - } -} - BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, BasicBlock *UnwindEdge, DomTreeUpdater *DTU) { diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index a92cb6a313d3..bb719a499a4c 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -623,15 +623,13 @@ bool llvm::UnrollRuntimeLoopRemainder( if (!SE) return false; - // Only unroll loops with a computable trip count, and the trip count needs - // to be an int value (allowing a pointer type is a TODO item). + // Only unroll loops with a computable trip count. // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); - if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) { + if (isa<SCEVCouldNotCompute>(BECountSC)) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 68572d479742..c8e42acdffb3 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1049,6 +1049,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, return Builder.CreateOrReduce(Src); case RecurKind::Xor: return Builder.CreateXorReduce(Src); + case RecurKind::FMulAdd: case RecurKind::FAdd: return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), Src); @@ -1091,7 +1092,8 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, Value *llvm::createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start) { - assert(Desc.getRecurrenceKind() == RecurKind::FAdd && + assert((Desc.getRecurrenceKind() == RecurKind::FAdd || + Desc.getRecurrenceKind() == RecurKind::FMulAdd) && "Unexpected reduction kind"); assert(Src->getType()->isVectorTy() && "Expected a vector type"); assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 5893ce15b129..7d9992176658 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) { // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { + if (!shouldDelete(User)) + continue; + // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp new file mode 100644 index 000000000000..9495e442e0bf --- /dev/null +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -0,0 +1,462 @@ +//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a profile inference algorithm. Given an incomplete and +// possibly imprecise block counts, the algorithm reconstructs realistic block +// and edge counts that satisfy flow conservation rules, while minimally modify +// input block counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileInference.h" +#include "llvm/Support/Debug.h" +#include <queue> +#include <set> + +using namespace llvm; +#define DEBUG_TYPE "sample-profile-inference" + +namespace { + +/// A value indicating an infinite flow/capacity/weight of a block/edge. +/// Not using numeric_limits<int64_t>::max(), as the values can be summed up +/// during the execution. +static constexpr int64_t INF = ((int64_t)1) << 50; + +/// The minimum-cost maximum flow algorithm. +/// +/// The algorithm finds the maximum flow of minimum cost on a given (directed) +/// network using a modified version of the classical Moore-Bellman-Ford +/// approach. The algorithm applies a number of augmentation iterations in which +/// flow is sent along paths of positive capacity from the source to the sink. +/// The worst-case time complexity of the implementation is O(v(f)*m*n), where +/// where m is the number of edges, n is the number of vertices, and v(f) is the +/// value of the maximum flow. However, the observed running time on typical +/// instances is sub-quadratic, that is, o(n^2). +/// +/// The input is a set of edges with specified costs and capacities, and a pair +/// of nodes (source and sink). The output is the flow along each edge of the +/// minimum total cost respecting the given edge capacities. +class MinCostMaxFlow { +public: + // Initialize algorithm's data structures for a network of a given size. + void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { + Source = SourceNode; + Target = SinkNode; + + Nodes = std::vector<Node>(NodeCount); + Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); + } + + // Run the algorithm. + int64_t run() { + // Find an augmenting path and update the flow along the path + size_t AugmentationIters = 0; + while (findAugmentingPath()) { + augmentFlowAlongPath(); + AugmentationIters++; + } + + // Compute the total flow and its cost + int64_t TotalCost = 0; + int64_t TotalFlow = 0; + for (uint64_t Src = 0; Src < Nodes.size(); Src++) { + for (auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) { + TotalCost += Edge.Cost * Edge.Flow; + if (Src == Source) + TotalFlow += Edge.Flow; + } + } + } + LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters + << " iterations with " << TotalFlow << " total flow" + << " of " << TotalCost << " cost\n"); + (void)TotalFlow; + return TotalCost; + } + + /// Adding an edge to the network with a specified capacity and a cost. + /// Multiple edges between a pair of nodes are allowed but self-edges + /// are not supported. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { + assert(Capacity > 0 && "adding an edge of zero capacity"); + assert(Src != Dst && "loop edge are not supported"); + + Edge SrcEdge; + SrcEdge.Dst = Dst; + SrcEdge.Cost = Cost; + SrcEdge.Capacity = Capacity; + SrcEdge.Flow = 0; + SrcEdge.RevEdgeIndex = Edges[Dst].size(); + + Edge DstEdge; + DstEdge.Dst = Src; + DstEdge.Cost = -Cost; + DstEdge.Capacity = 0; + DstEdge.Flow = 0; + DstEdge.RevEdgeIndex = Edges[Src].size(); + + Edges[Src].push_back(SrcEdge); + Edges[Dst].push_back(DstEdge); + } + + /// Adding an edge to the network of infinite capacity and a given cost. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { + addEdge(Src, Dst, INF, Cost); + } + + /// Get the total flow from a given source node. + /// Returns a list of pairs (target node, amount of flow to the target). + const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { + std::vector<std::pair<uint64_t, int64_t>> Flow; + for (auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) + Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); + } + return Flow; + } + + /// Get the total flow between a pair of nodes. + int64_t getFlow(uint64_t Src, uint64_t Dst) const { + int64_t Flow = 0; + for (auto &Edge : Edges[Src]) { + if (Edge.Dst == Dst) { + Flow += Edge.Flow; + } + } + return Flow; + } + + /// A cost of increasing a block's count by one. + static constexpr int64_t AuxCostInc = 10; + /// A cost of decreasing a block's count by one. + static constexpr int64_t AuxCostDec = 20; + /// A cost of increasing a count of zero-weight block by one. + static constexpr int64_t AuxCostIncZero = 11; + /// A cost of increasing the entry block's count by one. + static constexpr int64_t AuxCostIncEntry = 40; + /// A cost of decreasing the entry block's count by one. + static constexpr int64_t AuxCostDecEntry = 10; + /// A cost of taking an unlikely jump. + static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; + +private: + /// Check for existence of an augmenting path with a positive capacity. + bool findAugmentingPath() { + // Initialize data structures + for (auto &Node : Nodes) { + Node.Distance = INF; + Node.ParentNode = uint64_t(-1); + Node.ParentEdgeIndex = uint64_t(-1); + Node.Taken = false; + } + + std::queue<uint64_t> Queue; + Queue.push(Source); + Nodes[Source].Distance = 0; + Nodes[Source].Taken = true; + while (!Queue.empty()) { + uint64_t Src = Queue.front(); + Queue.pop(); + Nodes[Src].Taken = false; + // Although the residual network contains edges with negative costs + // (in particular, backward edges), it can be shown that there are no + // negative-weight cycles and the following two invariants are maintained: + // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, + // where Dist is the length of the shortest path between two nodes. This + // allows to prune the search-space of the path-finding algorithm using + // the following early-stop criteria: + // -- If we find a path with zero-distance from Source to Target, stop the + // search, as the path is the shortest since Dist[Source, Target] >= 0; + // -- If we have Dist[Source, V] > Dist[Source, Target], then do not + // process node V, as it is guaranteed _not_ to be on a shortest path + // from Source to Target; it follows from inequalities + // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] + // >= Dist[Source, V] + if (Nodes[Target].Distance == 0) + break; + if (Nodes[Src].Distance > Nodes[Target].Distance) + continue; + + // Process adjacent edges + for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { + auto &Edge = Edges[Src][EdgeIdx]; + if (Edge.Flow < Edge.Capacity) { + uint64_t Dst = Edge.Dst; + int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; + if (Nodes[Dst].Distance > NewDistance) { + // Update the distance and the parent node/edge + Nodes[Dst].Distance = NewDistance; + Nodes[Dst].ParentNode = Src; + Nodes[Dst].ParentEdgeIndex = EdgeIdx; + // Add the node to the queue, if it is not there yet + if (!Nodes[Dst].Taken) { + Queue.push(Dst); + Nodes[Dst].Taken = true; + } + } + } + } + } + + return Nodes[Target].Distance != INF; + } + + /// Update the current flow along the augmenting path. + void augmentFlowAlongPath() { + // Find path capacity + int64_t PathCapacity = INF; + uint64_t Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); + Now = Pred; + } + + assert(PathCapacity > 0 && "found incorrect augmenting path"); + + // Update the flow along the path + Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; + + Edge.Flow += PathCapacity; + RevEdge.Flow -= PathCapacity; + + Now = Pred; + } + } + + /// An node in a flow network. + struct Node { + /// The cost of the cheapest path from the source to the current node. + int64_t Distance; + /// The node preceding the current one in the path. + uint64_t ParentNode; + /// The index of the edge between ParentNode and the current node. + uint64_t ParentEdgeIndex; + /// An indicator of whether the current node is in a queue. + bool Taken; + }; + /// An edge in a flow network. + struct Edge { + /// The cost of the edge. + int64_t Cost; + /// The capacity of the edge. + int64_t Capacity; + /// The current flow on the edge. + int64_t Flow; + /// The destination node of the edge. + uint64_t Dst; + /// The index of the reverse edge between Dst and the current node. + uint64_t RevEdgeIndex; + }; + + /// The set of network nodes. + std::vector<Node> Nodes; + /// The set of network edges. + std::vector<std::vector<Edge>> Edges; + /// Source node of the flow. + uint64_t Source; + /// Target (sink) node of the flow. + uint64_t Target; +}; + +/// Initializing flow network for a given function. +/// +/// Every block is split into three nodes that are responsible for (i) an +/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or +/// reduction of the block weight. +void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + assert(NumBlocks > 1 && "Too few blocks in a function"); + LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); + + // Pre-process data: make sure the entry weight is at least 1 + if (Func.Blocks[Func.Entry].Weight == 0) { + Func.Blocks[Func.Entry].Weight = 1; + } + // Introducing dummy source/sink pairs to allow flow circulation. + // The nodes corresponding to blocks of Func have indicies in the range + // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. + uint64_t S = 3 * NumBlocks; + uint64_t T = S + 1; + uint64_t S1 = S + 2; + uint64_t T1 = S + 3; + + Network.initialize(3 * NumBlocks + 4, S1, T1); + + // Create three nodes for every block of the function + for (uint64_t B = 0; B < NumBlocks; B++) { + auto &Block = Func.Blocks[B]; + assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && + "non-zero weight of a block w/o weight except for an entry"); + + // Split every block into two nodes + uint64_t Bin = 3 * B; + uint64_t Bout = 3 * B + 1; + uint64_t Baux = 3 * B + 2; + if (Block.Weight > 0) { + Network.addEdge(S1, Bout, Block.Weight, 0); + Network.addEdge(Bin, T1, Block.Weight, 0); + } + + // Edges from S and to T + assert((!Block.isEntry() || !Block.isExit()) && + "a block cannot be an entry and an exit"); + if (Block.isEntry()) { + Network.addEdge(S, Bin, 0); + } else if (Block.isExit()) { + Network.addEdge(Bout, T, 0); + } + + // An auxiliary node to allow increase/reduction of block counts: + // We assume that decreasing block counts is more expensive than increasing, + // and thus, setting separate costs here. In the future we may want to tune + // the relative costs so as to maximize the quality of generated profiles. + int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; + int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; + if (Block.UnknownWeight) { + // Do not penalize changing weights of blocks w/o known profile count + AuxCostInc = 0; + AuxCostDec = 0; + } else { + // Increasing the count for "cold" blocks with zero initial count is more + // expensive than for "hot" ones + if (Block.Weight == 0) { + AuxCostInc = MinCostMaxFlow::AuxCostIncZero; + } + // Modifying the count of the entry block is expensive + if (Block.isEntry()) { + AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; + AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; + } + } + // For blocks with self-edges, do not penalize a reduction of the count, + // as all of the increase can be attributed to the self-edge + if (Block.HasSelfEdge) { + AuxCostDec = 0; + } + + Network.addEdge(Bin, Baux, AuxCostInc); + Network.addEdge(Baux, Bout, AuxCostInc); + if (Block.Weight > 0) { + Network.addEdge(Bout, Baux, AuxCostDec); + Network.addEdge(Baux, Bin, AuxCostDec); + } + } + + // Creating edges for every jump + for (auto &Jump : Func.Jumps) { + uint64_t Src = Jump.Source; + uint64_t Dst = Jump.Target; + if (Src != Dst) { + uint64_t SrcOut = 3 * Src + 1; + uint64_t DstIn = 3 * Dst; + uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; + Network.addEdge(SrcOut, DstIn, Cost); + } + } + + // Make sure we have a valid flow circulation + Network.addEdge(T, S, 0); +} + +/// Extract resulting block and edge counts from the flow network. +void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + + // Extract resulting block counts + for (uint64_t Src = 0; Src < NumBlocks; Src++) { + auto &Block = Func.Blocks[Src]; + uint64_t SrcOut = 3 * Src + 1; + int64_t Flow = 0; + for (auto &Adj : Network.getFlow(SrcOut)) { + uint64_t DstIn = Adj.first; + int64_t DstFlow = Adj.second; + bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); + if (!IsAuxNode || Block.HasSelfEdge) { + Flow += DstFlow; + } + } + Block.Flow = Flow; + assert(Flow >= 0 && "negative block flow"); + } + + // Extract resulting jump counts + for (auto &Jump : Func.Jumps) { + uint64_t Src = Jump.Source; + uint64_t Dst = Jump.Target; + int64_t Flow = 0; + if (Src != Dst) { + uint64_t SrcOut = 3 * Src + 1; + uint64_t DstIn = 3 * Dst; + Flow = Network.getFlow(SrcOut, DstIn); + } else { + uint64_t SrcOut = 3 * Src + 1; + uint64_t SrcAux = 3 * Src + 2; + int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); + if (AuxFlow > 0) + Flow = AuxFlow; + } + Jump.Flow = Flow; + assert(Flow >= 0 && "negative jump flow"); + } +} + +#ifndef NDEBUG +/// Verify that the computed flow values satisfy flow conservation rules +void verifyWeights(const FlowFunction &Func) { + const uint64_t NumBlocks = Func.Blocks.size(); + auto InFlow = std::vector<uint64_t>(NumBlocks, 0); + auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); + for (auto &Jump : Func.Jumps) { + InFlow[Jump.Target] += Jump.Flow; + OutFlow[Jump.Source] += Jump.Flow; + } + + uint64_t TotalInFlow = 0; + uint64_t TotalOutFlow = 0; + for (uint64_t I = 0; I < NumBlocks; I++) { + auto &Block = Func.Blocks[I]; + if (Block.isEntry()) { + TotalInFlow += Block.Flow; + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + } else if (Block.isExit()) { + TotalOutFlow += Block.Flow; + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } else { + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } + } + assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); +} +#endif + +} // end of anonymous namespace + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { + // Create and apply an inference network model + auto InferenceNetwork = MinCostMaxFlow(); + initializeNetwork(InferenceNetwork, Func); + InferenceNetwork.run(); + + // Extract flow values for every block and every edge + extractWeights(InferenceNetwork, Func); + +#ifndef NDEBUG + // Verify the result + verifyWeights(Func); +#endif +} diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp index 6d995cf4c048..ea0e8343eb88 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -34,6 +34,10 @@ cl::opt<bool> NoWarnSampleUnused( cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. ")); +cl::opt<bool> SampleProfileUseProfi( + "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Use profi to infer block and edge counts.")); + namespace sampleprofutil { /// Return true if the given callsite is hot wrt to hot cutoff threshold. diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a042146d7ace..71c15d5c51fc 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" @@ -1833,22 +1834,6 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { return V; } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursively -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) { - if (isa<OverflowingBinaryOperator>(I)) { - if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { - if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; - if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; - } - } else if (isa<PossiblyExactOperator>(I) && I->isExact()) - return true; - return false; -} - ScalarEvolution::ValueOffsetPair SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { @@ -1872,8 +1857,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S, if (S->getType() == V->getType() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) && - !SCEVLostPoisonFlags(S, EntInst)) + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) return {V, Offset}; } } @@ -1952,26 +1936,36 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!V) V = visit(S); - else if (VO.second) { - if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { - Type *Ety = Vty->getPointerElementType(); - int64_t Offset = VO.second->getSExtValue(); - int64_t ESize = SE.getTypeSizeInBits(Ety); - if ((Offset * 8) % ESize == 0) { - ConstantInt *Idx = + else { + // If we're reusing an existing instruction, we are effectively CSEing two + // copies of the instruction (with potentially different flags). As such, + // we need to drop any poison generating flags unless we can prove that + // said flags must be valid for all new users. + if (auto *I = dyn_cast<Instruction>(V)) + if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) + I->dropPoisonGeneratingFlags(); + + if (VO.second) { + if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { + Type *Ety = Vty->getPointerElementType(); + int64_t Offset = VO.second->getSExtValue(); + int64_t ESize = SE.getTypeSizeInBits(Ety); + if ((Offset * 8) % ESize == 0) { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); - V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); - } else { - ConstantInt *Idx = + V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); + } else { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); + unsigned AS = Vty->getAddressSpace(); + V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); + V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, + "uglygep"); + V = Builder.CreateBitCast(V, Vty); + } + } else { + V = Builder.CreateSub(V, VO.second); } - } else { - V = Builder.CreateSub(V, VO.second); } } // Remember the expanded value for this SCEV at this location. @@ -2180,7 +2174,9 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, } // Use expand's logic which is used for reusing a previous Value in - // ExprValueMap. + // ExprValueMap. Note that we don't currently model the cost of + // needing to drop poison generating flags on the instruction if we + // want to reuse it. We effectively assume that has zero cost. ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); if (VO.first) return VO; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f467de5f924e..afa3ecde77f9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3936,7 +3936,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - SmallPtrSet<BasicBlock *, 2> RemovedSuccessors; + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { @@ -4782,6 +4782,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + auto *BB = Switch->getParent(); + auto *OrigDefaultBlock = Switch->getDefaultDest(); + OrigDefaultBlock->removePredecessor(BB); + BasicBlock *NewDefaultBlock = BasicBlock::Create( + BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), + OrigDefaultBlock); + new UnreachableInst(Switch->getContext(), NewDefaultBlock); + Switch->setDefaultDest(&*NewDefaultBlock); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); + if (!is_contained(successors(BB), OrigDefaultBlock)) + Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); + DTU->applyUpdates(Updates); + } +} + /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, @@ -4927,10 +4947,14 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallVector<BasicBlock *, 8> UniqueSuccessors; for (auto &Case : SI->cases()) { auto *Successor = Case.getCaseSuccessor(); - if (DTU) + if (DTU) { + if (!NumPerSuccessorCases.count(Successor)) + UniqueSuccessors.push_back(Successor); ++NumPerSuccessorCases[Successor]; + } const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { @@ -4973,9 +4997,9 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, if (DTU) { std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + for (auto *Successor : UniqueSuccessors) + if (NumPerSuccessorCases[Successor] == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor}); DTU->applyUpdates(Updates); } @@ -6040,15 +6064,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (Succ == SI->getDefaultDest()) continue; Succ->removePredecessor(BB); - RemovedSuccessors.insert(Succ); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } SI->eraseFromParent(); - if (DTU) { - for (BasicBlock *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + if (DTU) DTU->applyUpdates(Updates); - } ++NumLookupTables; if (NeedMask) @@ -6215,7 +6237,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; - SmallPtrSet<BasicBlock *, 8> RemovedSuccs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { @@ -6305,8 +6327,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. - SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { + SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : UniquePreds) { InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); @@ -6323,8 +6345,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, if (isa<DbgInfoIntrinsic>(Inst)) Inst.eraseFromParent(); - SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { + SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : UniqueSuccs) { Succ->removePredecessor(BB); if (DTU) Updates.push_back({DominatorTree::Delete, BB, Succ}); |
