diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
commit | 145449b1e420787bb99721a429341fa6be3adfb6 (patch) | |
tree | 1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/Utils/SimplifyCFG.cpp | |
parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) |
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 818 |
1 files changed, 615 insertions, 203 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 335ac03ccb52..567b866f7777 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -27,7 +27,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemorySSA.h" @@ -50,7 +50,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -58,7 +57,6 @@ #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -74,7 +72,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -94,8 +91,8 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" cl::opt<bool> llvm::RequireAndPreserveDomTree( - "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "simplifycfg-require-and-preserve-domtree", cl::Hidden, + cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " "into preserving DomTree,")); @@ -167,6 +164,14 @@ static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier( "to fold branch to common destination when vector operations are " "present")); +static cl::opt<bool> EnableMergeCompatibleInvokes( + "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), + cl::desc("Allow SimplifyCFG to merge invokes together when appropriate")); + +static cl::opt<unsigned> MaxSwitchCasesPerResult( + "max-switch-cases-per-result", cl::Hidden, cl::init(16), + cl::desc("Limit cases to analyze when converting a switch to select")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -192,6 +197,8 @@ STATISTIC(NumSinkCommonInstrs, STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumInvokes, "Number of invokes with empty resume blocks simplified into calls"); +STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); +STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); namespace { @@ -291,6 +298,34 @@ public: } // end anonymous namespace +/// Return true if all the PHI nodes in the basic block \p BB +/// receive compatible (identical) incoming values when coming from +/// all of the predecessor blocks that are specified in \p IncomingBlocks. +/// +/// Note that if the values aren't exactly identical, but \p EquivalenceSet +/// is provided, and *both* of the values are present in the set, +/// then they are considered equal. +static bool IncomingValuesAreCompatible( + BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks, + SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) { + assert(IncomingBlocks.size() == 2 && + "Only for a pair of incoming blocks at the time!"); + + // FIXME: it is okay if one of the incoming values is an `undef` value, + // iff the other incoming value is guaranteed to be a non-poison value. + // FIXME: it is okay if one of the incoming values is a `poison` value. + return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) { + Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]); + Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]); + if (IV0 == IV1) + return true; + if (EquivalenceSet && EquivalenceSet->contains(IV0) && + EquivalenceSet->contains(IV1)) + return true; + return false; + }); +} + /// Return true if it is safe to merge these two /// terminator instructions together. static bool @@ -307,17 +342,17 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); bool Fail = false; - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != - PN->getIncomingValueForBlock(SI2BB)) { - if (FailBlocks) - FailBlocks->insert(Succ); - Fail = true; - } - } + for (BasicBlock *Succ : successors(SI2BB)) { + if (!SI1Succs.count(Succ)) + continue; + if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB})) + continue; + Fail = true; + if (FailBlocks) + FailBlocks->insert(Succ); + else + break; + } return !Fail; } @@ -347,6 +382,13 @@ static InstructionCost computeSpeculationCost(const User *I, return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } +/// Check whether this is a potentially trapping constant. +static bool canTrap(const Value *V) { + if (auto *C = dyn_cast<Constant>(V)) + return C->canTrap(); + return false; +} + /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -381,10 +423,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, if (!I) { // Non-instructions all dominate instructions, but not all constantexprs // can be executed unconditionally. - if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) - if (C->canTrap()) - return false; - return true; + return !canTrap(V); } BasicBlock *PBB = I->getParent(); @@ -1459,7 +1498,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, return false; if (!I1NonDbg->isTerminator()) return false; - // Now we know that we only need to hoist debug instrinsics and the + // Now we know that we only need to hoist debug intrinsics and the // terminator. Let the loop below handle those 2 cases. } @@ -2212,6 +2251,320 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, return Changed; } +namespace { + +struct CompatibleSets { + using SetTy = SmallVector<InvokeInst *, 2>; + + SmallVector<SetTy, 1> Sets; + + static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes); + + SetTy &getCompatibleSet(InvokeInst *II); + + void insert(InvokeInst *II); +}; + +CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) { + // Perform a linear scan over all the existing sets, see if the new `invoke` + // is compatible with any particular set. Since we know that all the `invokes` + // within a set are compatible, only check the first `invoke` in each set. + // WARNING: at worst, this has quadratic complexity. + for (CompatibleSets::SetTy &Set : Sets) { + if (CompatibleSets::shouldBelongToSameSet({Set.front(), II})) + return Set; + } + + // Otherwise, we either had no sets yet, or this invoke forms a new set. + return Sets.emplace_back(); +} + +void CompatibleSets::insert(InvokeInst *II) { + getCompatibleSet(II).emplace_back(II); +} + +bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) { + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + + // Can we theoretically merge these `invoke`s? + auto IsIllegalToMerge = [](InvokeInst *II) { + return II->cannotMerge() || II->isInlineAsm(); + }; + if (any_of(Invokes, IsIllegalToMerge)) + return false; + + // Either both `invoke`s must be direct, + // or both `invoke`s must be indirect. + auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); }; + bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall); + bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall); + if (HaveIndirectCalls) { + if (!AllCallsAreIndirect) + return false; + } else { + // All callees must be identical. + Value *Callee = nullptr; + for (InvokeInst *II : Invokes) { + Value *CurrCallee = II->getCalledOperand(); + assert(CurrCallee && "There is always a called operand."); + if (!Callee) + Callee = CurrCallee; + else if (Callee != CurrCallee) + return false; + } + } + + // Either both `invoke`s must not have a normal destination, + // or both `invoke`s must have a normal destination, + auto HasNormalDest = [](InvokeInst *II) { + return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg()); + }; + if (any_of(Invokes, HasNormalDest)) { + // Do not merge `invoke` that does not have a normal destination with one + // that does have a normal destination, even though doing so would be legal. + if (!all_of(Invokes, HasNormalDest)) + return false; + + // All normal destinations must be identical. + BasicBlock *NormalBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrNormalBB = II->getNormalDest(); + assert(CurrNormalBB && "There is always a 'continue to' basic block."); + if (!NormalBB) + NormalBB = CurrNormalBB; + else if (NormalBB != CurrNormalBB) + return false; + } + + // In the normal destination, the incoming values for these two `invoke`s + // must be compatible. + SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end()); + if (!IncomingValuesAreCompatible( + NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()}, + &EquivalenceSet)) + return false; + } + +#ifndef NDEBUG + // All unwind destinations must be identical. + // We know that because we have started from said unwind destination. + BasicBlock *UnwindBB = nullptr; + for (InvokeInst *II : Invokes) { + BasicBlock *CurrUnwindBB = II->getUnwindDest(); + assert(CurrUnwindBB && "There is always an 'unwind to' basic block."); + if (!UnwindBB) + UnwindBB = CurrUnwindBB; + else + assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination."); + } +#endif + + // In the unwind destination, the incoming values for these two `invoke`s + // must be compatible. + if (!IncomingValuesAreCompatible( + Invokes.front()->getUnwindDest(), + {Invokes[0]->getParent(), Invokes[1]->getParent()})) + return false; + + // Ignoring arguments, these `invoke`s must be identical, + // including operand bundles. + const InvokeInst *II0 = Invokes.front(); + for (auto *II : Invokes.drop_front()) + if (!II->isSameOperationAs(II0)) + return false; + + // Can we theoretically form the data operands for the merged `invoke`? + auto IsIllegalToMergeArguments = [](auto Ops) { + Type *Ty = std::get<0>(Ops)->getType(); + assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?"); + return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops); + }; + assert(Invokes.size() == 2 && "Always called with exactly two candidates."); + if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()), + IsIllegalToMergeArguments)) + return false; + + return true; +} + +} // namespace + +// Merge all invokes in the provided set, all of which are compatible +// as per the `CompatibleSets::shouldBelongToSameSet()`. +static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes, + DomTreeUpdater *DTU) { + assert(Invokes.size() >= 2 && "Must have at least two invokes to merge."); + + SmallVector<DominatorTree::UpdateType, 8> Updates; + if (DTU) + Updates.reserve(2 + 3 * Invokes.size()); + + bool HasNormalDest = + !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg()); + + // Clone one of the invokes into a new basic block. + // Since they are all compatible, it doesn't matter which invoke is cloned. + InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() { + InvokeInst *II0 = Invokes.front(); + BasicBlock *II0BB = II0->getParent(); + BasicBlock *InsertBeforeBlock = + II0->getParent()->getIterator()->getNextNode(); + Function *Func = II0BB->getParent(); + LLVMContext &Ctx = II0->getContext(); + + BasicBlock *MergedInvokeBB = BasicBlock::Create( + Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock); + + auto *MergedInvoke = cast<InvokeInst>(II0->clone()); + // NOTE: all invokes have the same attributes, so no handling needed. + MergedInvokeBB->getInstList().push_back(MergedInvoke); + + if (!HasNormalDest) { + // This set does not have a normal destination, + // so just form a new block with unreachable terminator. + BasicBlock *MergedNormalDest = BasicBlock::Create( + Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock); + new UnreachableInst(Ctx, MergedNormalDest); + MergedInvoke->setNormalDest(MergedNormalDest); + } + + // The unwind destination, however, remainds identical for all invokes here. + + return MergedInvoke; + }(); + + if (DTU) { + // Predecessor blocks that contained these invokes will now branch to + // the new block that contains the merged invoke, ... + for (InvokeInst *II : Invokes) + Updates.push_back( + {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()}); + + // ... which has the new `unreachable` block as normal destination, + // or unwinds to the (same for all `invoke`s in this set) `landingpad`, + for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke)) + Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(), + SuccBBOfMergedInvoke}); + + // Since predecessor blocks now unconditionally branch to a new block, + // they no longer branch to their original successors. + for (InvokeInst *II : Invokes) + for (BasicBlock *SuccOfPredBB : successors(II->getParent())) + Updates.push_back( + {DominatorTree::Delete, II->getParent(), SuccOfPredBB}); + } + + bool IsIndirectCall = Invokes[0]->isIndirectCall(); + + // Form the merged operands for the merged invoke. + for (Use &U : MergedInvoke->operands()) { + // Only PHI together the indirect callees and data operands. + if (MergedInvoke->isCallee(&U)) { + if (!IsIndirectCall) + continue; + } else if (!MergedInvoke->isDataOperand(&U)) + continue; + + // Don't create trivial PHI's with all-identical incoming values. + bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) { + return II->getOperand(U.getOperandNo()) != U.get(); + }); + if (!NeedPHI) + continue; + + // Form a PHI out of all the data ops under this index. + PHINode *PN = PHINode::Create( + U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke); + for (InvokeInst *II : Invokes) + PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent()); + + U.set(PN); + } + + // We've ensured that each PHI node has compatible (identical) incoming values + // when coming from each of the `invoke`s in the current merge set, + // so update the PHI nodes accordingly. + for (BasicBlock *Succ : successors(MergedInvoke)) + AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(), + /*ExistPred=*/Invokes.front()->getParent()); + + // And finally, replace the original `invoke`s with an unconditional branch + // to the block with the merged `invoke`. Also, give that merged `invoke` + // the merged debugloc of all the original `invoke`s. + const DILocation *MergedDebugLoc = nullptr; + for (InvokeInst *II : Invokes) { + // Compute the debug location common to all the original `invoke`s. + if (!MergedDebugLoc) + MergedDebugLoc = II->getDebugLoc(); + else + MergedDebugLoc = + DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc()); + + // And replace the old `invoke` with an unconditionally branch + // to the block with the merged `invoke`. + for (BasicBlock *OrigSuccBB : successors(II->getParent())) + OrigSuccBB->removePredecessor(II->getParent()); + BranchInst::Create(MergedInvoke->getParent(), II->getParent()); + II->replaceAllUsesWith(MergedInvoke); + II->eraseFromParent(); + ++NumInvokesMerged; + } + MergedInvoke->setDebugLoc(MergedDebugLoc); + ++NumInvokeSetsFormed; + + if (DTU) + DTU->applyUpdates(Updates); +} + +/// If this block is a `landingpad` exception handling block, categorize all +/// the predecessor `invoke`s into sets, with all `invoke`s in each set +/// being "mergeable" together, and then merge invokes in each set together. +/// +/// This is a weird mix of hoisting and sinking. Visually, it goes from: +/// [...] [...] +/// | | +/// [invoke0] [invoke1] +/// / \ / \ +/// [cont0] [landingpad] [cont1] +/// to: +/// [...] [...] +/// \ / +/// [invoke] +/// / \ +/// [cont] [landingpad] +/// +/// But of course we can only do that if the invokes share the `landingpad`, +/// edges invoke0->cont0 and invoke1->cont1 are "compatible", +/// and the invoked functions are "compatible". +static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { + if (!EnableMergeCompatibleInvokes) + return false; + + bool Changed = false; + + // FIXME: generalize to all exception handling blocks? + if (!BB->isLandingPad()) + return Changed; + + CompatibleSets Grouper; + + // Record all the predecessors of this `landingpad`. As per verifier, + // the only allowed predecessor is the unwind edge of an `invoke`. + // We want to group "compatible" `invokes` into the same set to be merged. + for (BasicBlock *PredBB : predecessors(BB)) + Grouper.insert(cast<InvokeInst>(PredBB->getTerminator())); + + // And now, merge `invoke`s that were grouped togeter. + for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) { + if (Invokes.size() < 2) + continue; + Changed = true; + MergeCompatibleInvokesImpl(Invokes, DTU); + } + + return Changed; +} + /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// @@ -2326,15 +2679,15 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, passingValueIsAlwaysUndefined(ThenV, &PN)) return false; + if (canTrap(OrigV) || canTrap(ThenV)) + return false; + HaveRewritablePHIs = true; ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. + continue; // Known cheap (FIXME: Maybe not true for aggregates). - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0; InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0; InstructionCost MaxCost = @@ -2626,40 +2979,85 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { return true; } -/// If we have a conditional branch on a PHI node value that is defined in the -/// same block as the branch and if any PHI entries are constants, thread edges -/// corresponding to that entry to be branches to their ultimate destination. -static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI, - DomTreeUpdater *DTU, - const DataLayout &DL, - AssumptionCache *AC) { +static ConstantInt * +getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To, + SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, + ConstantInt *> &Visited) { + // Don't look past the block defining the value, we might get the value from + // a previous loop iteration. + auto *I = dyn_cast<Instruction>(V); + if (I && I->getParent() == To) + return nullptr; + + // We know the value if the From block branches on it. + auto *BI = dyn_cast<BranchInst>(From->getTerminator()); + if (BI && BI->isConditional() && BI->getCondition() == V && + BI->getSuccessor(0) != BI->getSuccessor(1)) + return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext()) + : ConstantInt::getFalse(BI->getContext()); + + // Limit the amount of blocks we inspect. + if (Visited.size() >= 8) + return nullptr; + + auto Pair = Visited.try_emplace({From, To}, nullptr); + if (!Pair.second) + return Pair.first->second; + + // Check whether the known value is the same for all predecessors. + ConstantInt *Common = nullptr; + for (BasicBlock *Pred : predecessors(From)) { + ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited); + if (!C || (Common && Common != C)) + return nullptr; + Common = C; + } + return Visited[{From, To}] = Common; +} + +/// If we have a conditional branch on something for which we know the constant +/// value in predecessors (e.g. a phi node in the current block), thread edges +/// from the predecessor to their ultimate destination. +static Optional<bool> +FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { + SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues; BasicBlock *BB = BI->getParent(); - PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); - // NOTE: we currently cannot transform this case if the PHI node is used - // outside of the block. - if (!PN || PN->getParent() != BB || !PN->hasOneUse()) - return false; + Value *Cond = BI->getCondition(); + PHINode *PN = dyn_cast<PHINode>(Cond); + if (PN && PN->getParent() == BB) { + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + FoldSingleEntryPHINodes(PN->getParent()); + return true; + } - // Degenerate case of a single entry PHI. - if (PN->getNumIncomingValues() == 1) { - FoldSingleEntryPHINodes(PN->getParent()); - return true; + for (Use &U : PN->incoming_values()) + if (auto *CB = dyn_cast<ConstantInt>(U)) + KnownValues.insert({PN->getIncomingBlock(U), CB}); + } else { + SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited; + for (BasicBlock *Pred : predecessors(BB)) { + if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited)) + KnownValues.insert({Pred, CB}); + } } + if (KnownValues.empty()) + return false; + // Now we know that this block has multiple preds and two succs. + // Check that the block is small enough and values defined in the block are + // not used outside of it. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - // Okay, this is a simple enough basic block. See if any phi values are - // constants. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); - if (!CB || !CB->getType()->isIntegerTy(1)) - continue; - + for (const auto &Pair : KnownValues) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. - BasicBlock *PredBB = PN->getIncomingBlock(i); + ConstantInt *CB = Pair.second; + BasicBlock *PredBB = Pair.first; BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); if (RealDest == BB) @@ -2690,6 +3088,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI, // cloned instructions outside of EdgeBB. BasicBlock::iterator InsertPt = EdgeBB->begin(); DenseMap<Value *, Value *> TranslateMap; // Track translated values. + TranslateMap[Cond] = Pair.second; for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast<PHINode>(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); @@ -2708,7 +3107,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI, } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { + if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { @@ -2746,6 +3145,12 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI, DTU->applyUpdates(Updates); } + // For simplicity, we created a separate basic block for the edge. Merge + // it back into the predecessor if possible. This not only avoids + // unnecessary SimplifyCFG iterations, but also makes sure that we don't + // bypass the check for trivial cycles above. + MergeBlockIntoPredecessor(EdgeBB, DTU); + // Signal repeat, simplifying any other constants. return None; } @@ -2753,13 +3158,15 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI, return false; } -static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, - const DataLayout &DL, AssumptionCache *AC) { +static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, + DomTreeUpdater *DTU, + const DataLayout &DL, + AssumptionCache *AC) { Optional<bool> Result; bool EverChanged = false; do { // Note that None means "we changed things, but recurse further." - Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC); + Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC); EverChanged |= Result == None || *Result; } while (Result == None); return EverChanged; @@ -2847,7 +3254,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, {DL, PN})) { + if (Value *V = simplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); Changed = true; @@ -3186,18 +3593,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + if (!Cond || + (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) && + !isa<SelectInst>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0))) - if (CE->canTrap()) - return false; - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) - if (CE->canTrap()) - return false; + if (canTrap(Cond->getOperand(0))) + return false; + if (canTrap(Cond->getOperand(1))) + return false; // Finally, don't infinitely unroll conditional loops. if (is_contained(successors(BB), BB)) @@ -3384,7 +3791,9 @@ static bool mergeConditionalStoreToAddress( return false; // Now check the stores are compatible. - if (!QStore->isUnordered() || !PStore->isUnordered()) + if (!QStore->isUnordered() || !PStore->isUnordered() || + PStore->getValueOperand()->getType() != + QStore->getValueOperand()->getType()) return false; // Check that sinking the store won't cause program behavior changes. Sinking @@ -3687,7 +4096,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { // Okay, the outcome of this conditional branch is statically - // knowable. If this block had a single pred, handle specially. + // knowable. If this block had a single pred, handle specially, otherwise + // FoldCondBranchOnValueKnownInPredecessor() will handle it. if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; @@ -3695,35 +4105,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - - // Otherwise, if there are multiple predecessors, insert a PHI that merges - // in the constant and simplify the block result. Subsequent passes of - // simplifycfg will thread the block. - if (BlockIsSimpleEnoughToThreadThrough(BB)) { - pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - PHINode *NewPN = PHINode::Create( - Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), - BI->getCondition()->getName() + ".pr", &BB->front()); - // Okay, we're going to insert the PHI node. Since PBI is not the only - // predecessor, compute the PHI'd conditional value for all of the preds. - // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && - PBI->isConditional() && PBI->getCondition() == BI->getCondition() && - PBI->getSuccessor(0) != PBI->getSuccessor(1)) { - bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming( - ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), - P); - } else { - NewPN->addIncoming(BI->getCondition(), P); - } - } - - BI->setCondition(NewPN); - return true; - } } // If the previous block ended with a widenable branch, determine if reusing @@ -3732,9 +4113,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; - if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) - if (CE->canTrap()) - return false; + if (canTrap(BI->getCondition())) + return false; // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional @@ -3791,15 +4171,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, PHINode *PN = cast<PHINode>(II); Value *BIV = PN->getIncomingValueForBlock(BB); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV)) - if (CE->canTrap()) - return false; + if (canTrap(BIV)) + return false; unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); Value *PBIV = PN->getIncomingValue(PBBIdx); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV)) - if (CE->canTrap()) - return false; + if (canTrap(PBIV)) + return false; } // Finally, if everything is ok, fold the branches to logical ops. @@ -4116,7 +4494,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { + if (Value *V = simplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } @@ -4812,8 +5190,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, } } -/// Turn a switch with two reachable destinations into an integer range -/// comparison and branch. +/// Turn a switch into an integer range comparison and branch. +/// Switches with more than 2 destinations are ignored. +/// Switches with 1 destination are also ignored. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); @@ -4845,6 +5224,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, } return false; // More than two destinations. } + if (!DestB) + return false; // All destinations are the same and the default is unreachable assert(DestA && DestB && "Single-destination switch should have been folded."); @@ -5169,11 +5550,6 @@ ConstantFold(Instruction *I, const DataLayout &DL, return nullptr; } - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { - return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], - COps[1], DL); - } - return ConstantFoldInstOperands(I, COps, DL); } @@ -5182,7 +5558,7 @@ ConstantFold(Instruction *I, const DataLayout &DL, /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool -GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, +getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, const DataLayout &DL, const TargetTransformInfo &TTI) { @@ -5253,9 +5629,9 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, // Helper function used to add CaseVal to the list of cases that generate // Result. Returns the updated number of cases that generate this result. -static uintptr_t MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { +static size_t mapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); @@ -5271,18 +5647,19 @@ static uintptr_t MapCaseToResult(ConstantInt *CaseVal, // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool -InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult, const DataLayout &DL, - const TargetTransformInfo &TTI, - uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { +static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL, + const TargetTransformInfo &TTI, + uintptr_t MaxUniqueResults) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); // Resulting value at phi nodes for this case value. SwitchCaseResultsTy Results; - if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, + if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; @@ -5291,11 +5668,11 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, return false; // Add the case->result mapping to UniqueResults. - const uintptr_t NumCasesForResult = - MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + const size_t NumCasesForResult = + mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); // Early out if there are too many cases for this result. - if (NumCasesForResult > MaxCasesPerResult) + if (NumCasesForResult > MaxSwitchCasesPerResult) return false; // Early out if there are too many unique results. @@ -5311,7 +5688,7 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, // Find the default result value. SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults; BasicBlock *DefaultDest = SI->getDefaultDest(); - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, DL, TTI); // If the default value is not found abort unless the default destination // is unreachable. @@ -5326,48 +5703,76 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, // Helper function that checks if it is possible to transform a switch with only // two cases (or two cases + default) that produces a result into a select. -// Example: -// switch (a) { -// case 10: %0 = icmp eq i32 %a, 10 -// return 10; %1 = select i1 %0, i32 10, i32 4 -// case 20: ----> %2 = icmp eq i32 %a, 20 -// return 2; %3 = select i1 %2, i32 2, i32 %1 -// default: -// return 4; -// } -static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, - Constant *DefaultResult, Value *Condition, - IRBuilder<> &Builder) { +// TODO: Handle switches with more than 2 cases that map to the same result. +static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, + Constant *DefaultResult, Value *Condition, + IRBuilder<> &Builder) { // If we are selecting between only two cases transform into a simple // select or a two-way select if default is possible. + // Example: + // switch (a) { %0 = icmp eq i32 %a, 10 + // case 10: return 42; %1 = select i1 %0, i32 42, i32 4 + // case 20: return 2; ----> %2 = icmp eq i32 %a, 20 + // default: return 4; %3 = select i1 %2, i32 2, i32 %1 + // } if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 && ResultVector[1].second.size() == 1) { - ConstantInt *const FirstCase = ResultVector[0].second[0]; - ConstantInt *const SecondCase = ResultVector[1].second[0]; - - bool DefaultCanTrigger = DefaultResult; + ConstantInt *FirstCase = ResultVector[0].second[0]; + ConstantInt *SecondCase = ResultVector[1].second[0]; Value *SelectValue = ResultVector[1].first; - if (DefaultCanTrigger) { - Value *const ValueCompare = + if (DefaultResult) { + Value *ValueCompare = Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"); } - Value *const ValueCompare = + Value *ValueCompare = Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue, "switch.select"); } - // Handle the degenerate case where two cases have the same value. - if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 && - DefaultResult) { - Value *Cmp1 = Builder.CreateICmpEQ( - Condition, ResultVector[0].second[0], "switch.selectcmp.case1"); - Value *Cmp2 = Builder.CreateICmpEQ( - Condition, ResultVector[0].second[1], "switch.selectcmp.case2"); - Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); - return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + // Handle the degenerate case where two cases have the same result value. + if (ResultVector.size() == 1 && DefaultResult) { + ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second; + unsigned CaseCount = CaseValues.size(); + // n bits group cases map to the same result: + // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default + // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default + // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default + if (isPowerOf2_32(CaseCount)) { + ConstantInt *MinCaseVal = CaseValues[0]; + // Find mininal value. + for (auto Case : CaseValues) + if (Case->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = Case; + + // Mark the bits case number touched. + APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth()); + for (auto Case : CaseValues) + BitMask |= (Case->getValue() - MinCaseVal->getValue()); + + // Check if cases with the same result can cover all number + // in touched bits. + if (BitMask.countPopulation() == Log2_32(CaseCount)) { + if (!MinCaseVal->isNullValue()) + Condition = Builder.CreateSub(Condition, MinCaseVal); + Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and"); + Value *Cmp = Builder.CreateICmpEQ( + And, Constant::getNullValue(And->getType()), "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } + } + + // Handle the degenerate case where two cases have the same value. + if (CaseValues.size() == 2) { + Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0], + "switch.selectcmp.case1"); + Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1], + "switch.selectcmp.case2"); + Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); + return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); + } } return nullptr; @@ -5375,10 +5780,10 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, // Helper function to cleanup a switch instruction that has been converted into // a select, fixing up PHI nodes and basic blocks. -static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, - Value *SelectValue, - IRBuilder<> &Builder, - DomTreeUpdater *DTU) { +static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, + Value *SelectValue, + IRBuilder<> &Builder, + DomTreeUpdater *DTU) { std::vector<DominatorTree::UpdateType> Updates; BasicBlock *SelectBB = SI->getParent(); @@ -5409,33 +5814,31 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, DTU->applyUpdates(Updates); } -/// If the switch is only used to initialize one or more -/// phi nodes in a common successor block with only two different -/// constant values, replace the switch with select. -static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - DomTreeUpdater *DTU, const DataLayout &DL, - const TargetTransformInfo &TTI) { +/// If a switch is only used to initialize one or more phi nodes in a common +/// successor block with only two different constant values, try to replace the +/// switch with a select. Returns true if the fold was made. +static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI, /*MaxUniqueResults*/2, - /*MaxCasesPerResult*/2)) + if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL, TTI, /*MaxUniqueResults*/ 2)) return false; - assert(PHI != nullptr && "PHI for value select not found"); + assert(PHI != nullptr && "PHI for value select not found"); Builder.SetInsertPoint(SI); Value *SelectValue = - ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); - if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); - return true; - } - // The switch couldn't be converted into a select. - return false; + foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder); + if (!SelectValue) + return false; + + removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU); + return true; } namespace { @@ -5655,7 +6058,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { IntegerType *IT = cast<IntegerType>(Index->getType()); uint64_t TableSize = Array->getInitializer()->getType()->getArrayNumElements(); - if (TableSize > (1ULL << (IT->getBitWidth() - 1))) + if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u))) Index = Builder.CreateZExt( Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), "switch.tableidx.zext"); @@ -5707,6 +6110,27 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, DL.fitsInLegalInteger(IT->getBitWidth()); } +static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { + // 40% is the default density for building a jump table in optsize/minsize + // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this + // function was based on. + const uint64_t MinDensity = 40; + + if (CaseRange >= UINT64_MAX / 100) + return false; // Avoid multiplication overflows below. + + return NumCases * 100 >= CaseRange * MinDensity; +} + +static bool isSwitchDense(ArrayRef<int64_t> Values) { + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + if (Range < Diff) + return false; // Overflow. + + return isSwitchDense(Values.size(), Range); +} + /// Determine whether a lookup table should be built for this switch, based on /// the number of cases, size of the table, and the types of the results. // TODO: We could support larger than legal types by limiting based on the @@ -5716,8 +6140,8 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap<PHINode *, Type *> &ResultTypes) { - if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) - return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() > TableSize) + return false; // TableSize overflowed. bool AllTablesFitInRegister = true; bool HasIllegalType = false; @@ -5747,10 +6171,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, if (HasIllegalType) return false; - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - return SI->getNumCases() * 10 >= TableSize * 4; + return isSwitchDense(SI->getNumCases(), TableSize); } /// Try to reuse the switch table index compare. Following pattern: @@ -5888,7 +6309,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Resulting value at phi nodes for this case value. using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; ResultsTy Results; - if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, + if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; @@ -5916,7 +6337,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // or a bitmask that fits in a register. SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; bool HasDefaultResults = - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, + getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL, TTI); bool NeedMask = (TableHasHoles && !HasDefaultResults); @@ -6086,17 +6507,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return true; } -static bool isSwitchDense(ArrayRef<int64_t> Values) { - // See also SelectionDAGBuilder::isDense(), which this function was based on. - uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); - uint64_t Range = Diff + 1; - uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. - uint64_t MinDensity = 40; - - return NumCases * 100 >= Range * MinDensity; -} - /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -6211,14 +6621,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { } // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) + // The conversion from switch to comparison may lose information on + // impossible switch values, so disable it early in the pipeline. + if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder)) return requestResimplify(); // Remove unreachable cases. if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); - if (switchToSelect(SI, Builder, DTU, DL, TTI)) + if (trySwitchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) @@ -6521,12 +6933,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return requestResimplify(); } - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) - return requestResimplify(); + // If this is a branch on something for which we know the constant value in + // predecessors (e.g. a phi node in the current block), thread control + // through this block. + if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC)) + return requestResimplify(); // Scan predecessor blocks for conditional branches. for (BasicBlock *Pred : predecessors(BB)) @@ -6725,7 +7136,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { return true; if (SinkCommon && Options.SinkCommonInsts) - if (SinkCommonCodeFromPredecessors(BB, DTU)) { + if (SinkCommonCodeFromPredecessors(BB, DTU) || + MergeCompatibleInvokes(BB, DTU)) { // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, // so we may now how duplicate PHI's. // Let's rerun EliminateDuplicatePHINodes() first, |