aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/Utils/SimplifyCFG.cpp
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp818
1 files changed, 615 insertions, 203 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 335ac03ccb52..567b866f7777 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -27,7 +27,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -50,7 +50,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
@@ -58,7 +57,6 @@
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -74,7 +72,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -94,8 +91,8 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
cl::opt<bool> llvm::RequireAndPreserveDomTree(
- "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "simplifycfg-require-and-preserve-domtree", cl::Hidden,
+
cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
"into preserving DomTree,"));
@@ -167,6 +164,14 @@ static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
"to fold branch to common destination when vector operations are "
"present"));
+static cl::opt<bool> EnableMergeCompatibleInvokes(
+ "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
+ cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
+
+static cl::opt<unsigned> MaxSwitchCasesPerResult(
+ "max-switch-cases-per-result", cl::Hidden, cl::init(16),
+ cl::desc("Limit cases to analyze when converting a switch to select"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -192,6 +197,8 @@ STATISTIC(NumSinkCommonInstrs,
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumInvokes,
"Number of invokes with empty resume blocks simplified into calls");
+STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
+STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
namespace {
@@ -291,6 +298,34 @@ public:
} // end anonymous namespace
+/// Return true if all the PHI nodes in the basic block \p BB
+/// receive compatible (identical) incoming values when coming from
+/// all of the predecessor blocks that are specified in \p IncomingBlocks.
+///
+/// Note that if the values aren't exactly identical, but \p EquivalenceSet
+/// is provided, and *both* of the values are present in the set,
+/// then they are considered equal.
+static bool IncomingValuesAreCompatible(
+ BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
+ SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
+ assert(IncomingBlocks.size() == 2 &&
+ "Only for a pair of incoming blocks at the time!");
+
+ // FIXME: it is okay if one of the incoming values is an `undef` value,
+ // iff the other incoming value is guaranteed to be a non-poison value.
+ // FIXME: it is okay if one of the incoming values is a `poison` value.
+ return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
+ Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
+ Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
+ if (IV0 == IV1)
+ return true;
+ if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
+ EquivalenceSet->contains(IV1))
+ return true;
+ return false;
+ });
+}
+
/// Return true if it is safe to merge these two
/// terminator instructions together.
static bool
@@ -307,17 +342,17 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
bool Fail = false;
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) !=
- PN->getIncomingValueForBlock(SI2BB)) {
- if (FailBlocks)
- FailBlocks->insert(Succ);
- Fail = true;
- }
- }
+ for (BasicBlock *Succ : successors(SI2BB)) {
+ if (!SI1Succs.count(Succ))
+ continue;
+ if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
+ continue;
+ Fail = true;
+ if (FailBlocks)
+ FailBlocks->insert(Succ);
+ else
+ break;
+ }
return !Fail;
}
@@ -347,6 +382,13 @@ static InstructionCost computeSpeculationCost(const User *I,
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
}
+/// Check whether this is a potentially trapping constant.
+static bool canTrap(const Value *V) {
+ if (auto *C = dyn_cast<Constant>(V))
+ return C->canTrap();
+ return false;
+}
+
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -381,10 +423,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB,
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
// can be executed unconditionally.
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
- if (C->canTrap())
- return false;
- return true;
+ return !canTrap(V);
}
BasicBlock *PBB = I->getParent();
@@ -1459,7 +1498,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
return false;
if (!I1NonDbg->isTerminator())
return false;
- // Now we know that we only need to hoist debug instrinsics and the
+ // Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
}
@@ -2212,6 +2251,320 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
return Changed;
}
+namespace {
+
+struct CompatibleSets {
+ using SetTy = SmallVector<InvokeInst *, 2>;
+
+ SmallVector<SetTy, 1> Sets;
+
+ static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
+
+ SetTy &getCompatibleSet(InvokeInst *II);
+
+ void insert(InvokeInst *II);
+};
+
+CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
+ // Perform a linear scan over all the existing sets, see if the new `invoke`
+ // is compatible with any particular set. Since we know that all the `invokes`
+ // within a set are compatible, only check the first `invoke` in each set.
+ // WARNING: at worst, this has quadratic complexity.
+ for (CompatibleSets::SetTy &Set : Sets) {
+ if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
+ return Set;
+ }
+
+ // Otherwise, we either had no sets yet, or this invoke forms a new set.
+ return Sets.emplace_back();
+}
+
+void CompatibleSets::insert(InvokeInst *II) {
+ getCompatibleSet(II).emplace_back(II);
+}
+
+bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+
+ // Can we theoretically merge these `invoke`s?
+ auto IsIllegalToMerge = [](InvokeInst *II) {
+ return II->cannotMerge() || II->isInlineAsm();
+ };
+ if (any_of(Invokes, IsIllegalToMerge))
+ return false;
+
+ // Either both `invoke`s must be direct,
+ // or both `invoke`s must be indirect.
+ auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
+ bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
+ bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
+ if (HaveIndirectCalls) {
+ if (!AllCallsAreIndirect)
+ return false;
+ } else {
+ // All callees must be identical.
+ Value *Callee = nullptr;
+ for (InvokeInst *II : Invokes) {
+ Value *CurrCallee = II->getCalledOperand();
+ assert(CurrCallee && "There is always a called operand.");
+ if (!Callee)
+ Callee = CurrCallee;
+ else if (Callee != CurrCallee)
+ return false;
+ }
+ }
+
+ // Either both `invoke`s must not have a normal destination,
+ // or both `invoke`s must have a normal destination,
+ auto HasNormalDest = [](InvokeInst *II) {
+ return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
+ };
+ if (any_of(Invokes, HasNormalDest)) {
+ // Do not merge `invoke` that does not have a normal destination with one
+ // that does have a normal destination, even though doing so would be legal.
+ if (!all_of(Invokes, HasNormalDest))
+ return false;
+
+ // All normal destinations must be identical.
+ BasicBlock *NormalBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrNormalBB = II->getNormalDest();
+ assert(CurrNormalBB && "There is always a 'continue to' basic block.");
+ if (!NormalBB)
+ NormalBB = CurrNormalBB;
+ else if (NormalBB != CurrNormalBB)
+ return false;
+ }
+
+ // In the normal destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
+ if (!IncomingValuesAreCompatible(
+ NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
+ &EquivalenceSet))
+ return false;
+ }
+
+#ifndef NDEBUG
+ // All unwind destinations must be identical.
+ // We know that because we have started from said unwind destination.
+ BasicBlock *UnwindBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrUnwindBB = II->getUnwindDest();
+ assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
+ if (!UnwindBB)
+ UnwindBB = CurrUnwindBB;
+ else
+ assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
+ }
+#endif
+
+ // In the unwind destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ if (!IncomingValuesAreCompatible(
+ Invokes.front()->getUnwindDest(),
+ {Invokes[0]->getParent(), Invokes[1]->getParent()}))
+ return false;
+
+ // Ignoring arguments, these `invoke`s must be identical,
+ // including operand bundles.
+ const InvokeInst *II0 = Invokes.front();
+ for (auto *II : Invokes.drop_front())
+ if (!II->isSameOperationAs(II0))
+ return false;
+
+ // Can we theoretically form the data operands for the merged `invoke`?
+ auto IsIllegalToMergeArguments = [](auto Ops) {
+ Type *Ty = std::get<0>(Ops)->getType();
+ assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?");
+ return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops);
+ };
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+ if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
+ IsIllegalToMergeArguments))
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+// Merge all invokes in the provided set, all of which are compatible
+// as per the `CompatibleSets::shouldBelongToSameSet()`.
+static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
+ DomTreeUpdater *DTU) {
+ assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
+
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ if (DTU)
+ Updates.reserve(2 + 3 * Invokes.size());
+
+ bool HasNormalDest =
+ !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
+
+ // Clone one of the invokes into a new basic block.
+ // Since they are all compatible, it doesn't matter which invoke is cloned.
+ InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
+ InvokeInst *II0 = Invokes.front();
+ BasicBlock *II0BB = II0->getParent();
+ BasicBlock *InsertBeforeBlock =
+ II0->getParent()->getIterator()->getNextNode();
+ Function *Func = II0BB->getParent();
+ LLVMContext &Ctx = II0->getContext();
+
+ BasicBlock *MergedInvokeBB = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
+
+ auto *MergedInvoke = cast<InvokeInst>(II0->clone());
+ // NOTE: all invokes have the same attributes, so no handling needed.
+ MergedInvokeBB->getInstList().push_back(MergedInvoke);
+
+ if (!HasNormalDest) {
+ // This set does not have a normal destination,
+ // so just form a new block with unreachable terminator.
+ BasicBlock *MergedNormalDest = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
+ new UnreachableInst(Ctx, MergedNormalDest);
+ MergedInvoke->setNormalDest(MergedNormalDest);
+ }
+
+ // The unwind destination, however, remainds identical for all invokes here.
+
+ return MergedInvoke;
+ }();
+
+ if (DTU) {
+ // Predecessor blocks that contained these invokes will now branch to
+ // the new block that contains the merged invoke, ...
+ for (InvokeInst *II : Invokes)
+ Updates.push_back(
+ {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
+
+ // ... which has the new `unreachable` block as normal destination,
+ // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
+ for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
+ Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
+ SuccBBOfMergedInvoke});
+
+ // Since predecessor blocks now unconditionally branch to a new block,
+ // they no longer branch to their original successors.
+ for (InvokeInst *II : Invokes)
+ for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
+ Updates.push_back(
+ {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
+ }
+
+ bool IsIndirectCall = Invokes[0]->isIndirectCall();
+
+ // Form the merged operands for the merged invoke.
+ for (Use &U : MergedInvoke->operands()) {
+ // Only PHI together the indirect callees and data operands.
+ if (MergedInvoke->isCallee(&U)) {
+ if (!IsIndirectCall)
+ continue;
+ } else if (!MergedInvoke->isDataOperand(&U))
+ continue;
+
+ // Don't create trivial PHI's with all-identical incoming values.
+ bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
+ return II->getOperand(U.getOperandNo()) != U.get();
+ });
+ if (!NeedPHI)
+ continue;
+
+ // Form a PHI out of all the data ops under this index.
+ PHINode *PN = PHINode::Create(
+ U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke);
+ for (InvokeInst *II : Invokes)
+ PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
+
+ U.set(PN);
+ }
+
+ // We've ensured that each PHI node has compatible (identical) incoming values
+ // when coming from each of the `invoke`s in the current merge set,
+ // so update the PHI nodes accordingly.
+ for (BasicBlock *Succ : successors(MergedInvoke))
+ AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
+ /*ExistPred=*/Invokes.front()->getParent());
+
+ // And finally, replace the original `invoke`s with an unconditional branch
+ // to the block with the merged `invoke`. Also, give that merged `invoke`
+ // the merged debugloc of all the original `invoke`s.
+ const DILocation *MergedDebugLoc = nullptr;
+ for (InvokeInst *II : Invokes) {
+ // Compute the debug location common to all the original `invoke`s.
+ if (!MergedDebugLoc)
+ MergedDebugLoc = II->getDebugLoc();
+ else
+ MergedDebugLoc =
+ DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
+
+ // And replace the old `invoke` with an unconditionally branch
+ // to the block with the merged `invoke`.
+ for (BasicBlock *OrigSuccBB : successors(II->getParent()))
+ OrigSuccBB->removePredecessor(II->getParent());
+ BranchInst::Create(MergedInvoke->getParent(), II->getParent());
+ II->replaceAllUsesWith(MergedInvoke);
+ II->eraseFromParent();
+ ++NumInvokesMerged;
+ }
+ MergedInvoke->setDebugLoc(MergedDebugLoc);
+ ++NumInvokeSetsFormed;
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+}
+
+/// If this block is a `landingpad` exception handling block, categorize all
+/// the predecessor `invoke`s into sets, with all `invoke`s in each set
+/// being "mergeable" together, and then merge invokes in each set together.
+///
+/// This is a weird mix of hoisting and sinking. Visually, it goes from:
+/// [...] [...]
+/// | |
+/// [invoke0] [invoke1]
+/// / \ / \
+/// [cont0] [landingpad] [cont1]
+/// to:
+/// [...] [...]
+/// \ /
+/// [invoke]
+/// / \
+/// [cont] [landingpad]
+///
+/// But of course we can only do that if the invokes share the `landingpad`,
+/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
+/// and the invoked functions are "compatible".
+static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
+ if (!EnableMergeCompatibleInvokes)
+ return false;
+
+ bool Changed = false;
+
+ // FIXME: generalize to all exception handling blocks?
+ if (!BB->isLandingPad())
+ return Changed;
+
+ CompatibleSets Grouper;
+
+ // Record all the predecessors of this `landingpad`. As per verifier,
+ // the only allowed predecessor is the unwind edge of an `invoke`.
+ // We want to group "compatible" `invokes` into the same set to be merged.
+ for (BasicBlock *PredBB : predecessors(BB))
+ Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
+
+ // And now, merge `invoke`s that were grouped togeter.
+ for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
+ if (Invokes.size() < 2)
+ continue;
+ Changed = true;
+ MergeCompatibleInvokesImpl(Invokes, DTU);
+ }
+
+ return Changed;
+}
+
/// Determine if we can hoist sink a sole store instruction out of a
/// conditional block.
///
@@ -2326,15 +2679,15 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
passingValueIsAlwaysUndefined(ThenV, &PN))
return false;
+ if (canTrap(OrigV) || canTrap(ThenV))
+ return false;
+
HaveRewritablePHIs = true;
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
if (!OrigCE && !ThenCE)
- continue; // Known safe and cheap.
+ continue; // Known cheap (FIXME: Maybe not true for aggregates).
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
- return false;
InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
InstructionCost MaxCost =
@@ -2626,40 +2979,85 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
return true;
}
-/// If we have a conditional branch on a PHI node value that is defined in the
-/// same block as the branch and if any PHI entries are constants, thread edges
-/// corresponding to that entry to be branches to their ultimate destination.
-static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
- DomTreeUpdater *DTU,
- const DataLayout &DL,
- AssumptionCache *AC) {
+static ConstantInt *
+getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>,
+ ConstantInt *> &Visited) {
+ // Don't look past the block defining the value, we might get the value from
+ // a previous loop iteration.
+ auto *I = dyn_cast<Instruction>(V);
+ if (I && I->getParent() == To)
+ return nullptr;
+
+ // We know the value if the From block branches on it.
+ auto *BI = dyn_cast<BranchInst>(From->getTerminator());
+ if (BI && BI->isConditional() && BI->getCondition() == V &&
+ BI->getSuccessor(0) != BI->getSuccessor(1))
+ return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
+ : ConstantInt::getFalse(BI->getContext());
+
+ // Limit the amount of blocks we inspect.
+ if (Visited.size() >= 8)
+ return nullptr;
+
+ auto Pair = Visited.try_emplace({From, To}, nullptr);
+ if (!Pair.second)
+ return Pair.first->second;
+
+ // Check whether the known value is the same for all predecessors.
+ ConstantInt *Common = nullptr;
+ for (BasicBlock *Pred : predecessors(From)) {
+ ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited);
+ if (!C || (Common && Common != C))
+ return nullptr;
+ Common = C;
+ }
+ return Visited[{From, To}] = Common;
+}
+
+/// If we have a conditional branch on something for which we know the constant
+/// value in predecessors (e.g. a phi node in the current block), thread edges
+/// from the predecessor to their ultimate destination.
+static Optional<bool>
+FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
+ SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues;
BasicBlock *BB = BI->getParent();
- PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
- // NOTE: we currently cannot transform this case if the PHI node is used
- // outside of the block.
- if (!PN || PN->getParent() != BB || !PN->hasOneUse())
- return false;
+ Value *Cond = BI->getCondition();
+ PHINode *PN = dyn_cast<PHINode>(Cond);
+ if (PN && PN->getParent() == BB) {
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
- // Degenerate case of a single entry PHI.
- if (PN->getNumIncomingValues() == 1) {
- FoldSingleEntryPHINodes(PN->getParent());
- return true;
+ for (Use &U : PN->incoming_values())
+ if (auto *CB = dyn_cast<ConstantInt>(U))
+ KnownValues.insert({PN->getIncomingBlock(U), CB});
+ } else {
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited))
+ KnownValues.insert({Pred, CB});
+ }
}
+ if (KnownValues.empty())
+ return false;
+
// Now we know that this block has multiple preds and two succs.
+ // Check that the block is small enough and values defined in the block are
+ // not used outside of it.
if (!BlockIsSimpleEnoughToThreadThrough(BB))
return false;
- // Okay, this is a simple enough basic block. See if any phi values are
- // constants.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (!CB || !CB->getType()->isIntegerTy(1))
- continue;
-
+ for (const auto &Pair : KnownValues) {
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
+ ConstantInt *CB = Pair.second;
+ BasicBlock *PredBB = Pair.first;
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
if (RealDest == BB)
@@ -2690,6 +3088,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
// cloned instructions outside of EdgeBB.
BasicBlock::iterator InsertPt = EdgeBB->begin();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
+ TranslateMap[Cond] = Pair.second;
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -2708,7 +3107,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
}
// Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
+ if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
@@ -2746,6 +3145,12 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
DTU->applyUpdates(Updates);
}
+ // For simplicity, we created a separate basic block for the edge. Merge
+ // it back into the predecessor if possible. This not only avoids
+ // unnecessary SimplifyCFG iterations, but also makes sure that we don't
+ // bypass the check for trivial cycles above.
+ MergeBlockIntoPredecessor(EdgeBB, DTU);
+
// Signal repeat, simplifying any other constants.
return None;
}
@@ -2753,13 +3158,15 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
return false;
}
-static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
- const DataLayout &DL, AssumptionCache *AC) {
+static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
Optional<bool> Result;
bool EverChanged = false;
do {
// Note that None means "we changed things, but recurse further."
- Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC);
+ Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
EverChanged |= Result == None || *Result;
} while (Result == None);
return EverChanged;
@@ -2847,7 +3254,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
+ if (Value *V = simplifyInstruction(PN, {DL, PN})) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
Changed = true;
@@ -3186,18 +3593,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ if (!Cond ||
+ (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
+ !isa<SelectInst>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
- if (CE->canTrap())
- return false;
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
- if (CE->canTrap())
- return false;
+ if (canTrap(Cond->getOperand(0)))
+ return false;
+ if (canTrap(Cond->getOperand(1)))
+ return false;
// Finally, don't infinitely unroll conditional loops.
if (is_contained(successors(BB), BB))
@@ -3384,7 +3791,9 @@ static bool mergeConditionalStoreToAddress(
return false;
// Now check the stores are compatible.
- if (!QStore->isUnordered() || !PStore->isUnordered())
+ if (!QStore->isUnordered() || !PStore->isUnordered() ||
+ PStore->getValueOperand()->getType() !=
+ QStore->getValueOperand()->getType())
return false;
// Check that sinking the store won't cause program behavior changes. Sinking
@@ -3687,7 +4096,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
// Okay, the outcome of this conditional branch is statically
- // knowable. If this block had a single pred, handle specially.
+ // knowable. If this block had a single pred, handle specially, otherwise
+ // FoldCondBranchOnValueKnownInPredecessor() will handle it.
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
@@ -3695,35 +4105,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
return true; // Nuke the branch on constant.
}
-
- // Otherwise, if there are multiple predecessors, insert a PHI that merges
- // in the constant and simplify the block result. Subsequent passes of
- // simplifycfg will thread the block.
- if (BlockIsSimpleEnoughToThreadThrough(BB)) {
- pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- PHINode *NewPN = PHINode::Create(
- Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
- BI->getCondition()->getName() + ".pr", &BB->front());
- // Okay, we're going to insert the PHI node. Since PBI is not the only
- // predecessor, compute the PHI'd conditional value for all of the preds.
- // Any predecessor where the condition is not computable we keep symbolic.
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
- PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(
- ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
- P);
- } else {
- NewPN->addIncoming(BI->getCondition(), P);
- }
- }
-
- BI->setCondition(NewPN);
- return true;
- }
}
// If the previous block ended with a widenable branch, determine if reusing
@@ -3732,9 +4113,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
- if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
- if (CE->canTrap())
- return false;
+ if (canTrap(BI->getCondition()))
+ return false;
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
@@ -3791,15 +4171,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
PHINode *PN = cast<PHINode>(II);
Value *BIV = PN->getIncomingValueForBlock(BB);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
- if (CE->canTrap())
- return false;
+ if (canTrap(BIV))
+ return false;
unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
Value *PBIV = PN->getIncomingValue(PBBIdx);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
- if (CE->canTrap())
- return false;
+ if (canTrap(PBIV))
+ return false;
}
// Finally, if everything is ok, fold the branches to logical ops.
@@ -4116,7 +4494,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
- if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
+ if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
}
@@ -4812,8 +5190,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
}
}
-/// Turn a switch with two reachable destinations into an integer range
-/// comparison and branch.
+/// Turn a switch into an integer range comparison and branch.
+/// Switches with more than 2 destinations are ignored.
+/// Switches with 1 destination are also ignored.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
IRBuilder<> &Builder) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
@@ -4845,6 +5224,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
}
return false; // More than two destinations.
}
+ if (!DestB)
+ return false; // All destinations are the same and the default is unreachable
assert(DestA && DestB &&
"Single-destination switch should have been folded.");
@@ -5169,11 +5550,6 @@ ConstantFold(Instruction *I, const DataLayout &DL,
return nullptr;
}
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
- COps[1], DL);
- }
-
return ConstantFoldInstOperands(I, COps, DL);
}
@@ -5182,7 +5558,7 @@ ConstantFold(Instruction *I, const DataLayout &DL,
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
static bool
-GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
+getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
BasicBlock **CommonDest,
SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
const DataLayout &DL, const TargetTransformInfo &TTI) {
@@ -5253,9 +5629,9 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// Helper function used to add CaseVal to the list of cases that generate
// Result. Returns the updated number of cases that generate this result.
-static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *Result) {
+static size_t mapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
for (auto &I : UniqueResults) {
if (I.first == Result) {
I.second.push_back(CaseVal);
@@ -5271,18 +5647,19 @@ static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
// results for the PHI node of the common destination block for a switch
// instruction. Returns false if multiple PHI nodes have been found or if
// there is not a common destination block for the switch.
-static bool
-InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult, const DataLayout &DL,
- const TargetTransformInfo &TTI,
- uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
+static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI,
+ uintptr_t MaxUniqueResults) {
for (auto &I : SI->cases()) {
ConstantInt *CaseVal = I.getCaseValue();
// Resulting value at phi nodes for this case value.
SwitchCaseResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
+ if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
DL, TTI))
return false;
@@ -5291,11 +5668,11 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
return false;
// Add the case->result mapping to UniqueResults.
- const uintptr_t NumCasesForResult =
- MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+ const size_t NumCasesForResult =
+ mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
// Early out if there are too many cases for this result.
- if (NumCasesForResult > MaxCasesPerResult)
+ if (NumCasesForResult > MaxSwitchCasesPerResult)
return false;
// Early out if there are too many unique results.
@@ -5311,7 +5688,7 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
// Find the default result value.
SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
BasicBlock *DefaultDest = SI->getDefaultDest();
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
DL, TTI);
// If the default value is not found abort unless the default destination
// is unreachable.
@@ -5326,48 +5703,76 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
// Helper function that checks if it is possible to transform a switch with only
// two cases (or two cases + default) that produces a result into a select.
-// Example:
-// switch (a) {
-// case 10: %0 = icmp eq i32 %a, 10
-// return 10; %1 = select i1 %0, i32 10, i32 4
-// case 20: ----> %2 = icmp eq i32 %a, 20
-// return 2; %3 = select i1 %2, i32 2, i32 %1
-// default:
-// return 4;
-// }
-static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
- Constant *DefaultResult, Value *Condition,
- IRBuilder<> &Builder) {
+// TODO: Handle switches with more than 2 cases that map to the same result.
+static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
// If we are selecting between only two cases transform into a simple
// select or a two-way select if default is possible.
+ // Example:
+ // switch (a) { %0 = icmp eq i32 %a, 10
+ // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
+ // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
+ // default: return 4; %3 = select i1 %2, i32 2, i32 %1
+ // }
if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
ResultVector[1].second.size() == 1) {
- ConstantInt *const FirstCase = ResultVector[0].second[0];
- ConstantInt *const SecondCase = ResultVector[1].second[0];
-
- bool DefaultCanTrigger = DefaultResult;
+ ConstantInt *FirstCase = ResultVector[0].second[0];
+ ConstantInt *SecondCase = ResultVector[1].second[0];
Value *SelectValue = ResultVector[1].first;
- if (DefaultCanTrigger) {
- Value *const ValueCompare =
+ if (DefaultResult) {
+ Value *ValueCompare =
Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
DefaultResult, "switch.select");
}
- Value *const ValueCompare =
+ Value *ValueCompare =
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
SelectValue, "switch.select");
}
- // Handle the degenerate case where two cases have the same value.
- if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 &&
- DefaultResult) {
- Value *Cmp1 = Builder.CreateICmpEQ(
- Condition, ResultVector[0].second[0], "switch.selectcmp.case1");
- Value *Cmp2 = Builder.CreateICmpEQ(
- Condition, ResultVector[0].second[1], "switch.selectcmp.case2");
- Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
- return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ // Handle the degenerate case where two cases have the same result value.
+ if (ResultVector.size() == 1 && DefaultResult) {
+ ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
+ unsigned CaseCount = CaseValues.size();
+ // n bits group cases map to the same result:
+ // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
+ // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
+ // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
+ if (isPowerOf2_32(CaseCount)) {
+ ConstantInt *MinCaseVal = CaseValues[0];
+ // Find mininal value.
+ for (auto Case : CaseValues)
+ if (Case->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = Case;
+
+ // Mark the bits case number touched.
+ APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
+ for (auto Case : CaseValues)
+ BitMask |= (Case->getValue() - MinCaseVal->getValue());
+
+ // Check if cases with the same result can cover all number
+ // in touched bits.
+ if (BitMask.countPopulation() == Log2_32(CaseCount)) {
+ if (!MinCaseVal->isNullValue())
+ Condition = Builder.CreateSub(Condition, MinCaseVal);
+ Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
+ Value *Cmp = Builder.CreateICmpEQ(
+ And, Constant::getNullValue(And->getType()), "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
+ }
+
+ // Handle the degenerate case where two cases have the same value.
+ if (CaseValues.size() == 2) {
+ Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
+ "switch.selectcmp.case1");
+ Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
+ "switch.selectcmp.case2");
+ Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
}
return nullptr;
@@ -5375,10 +5780,10 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
// Helper function to cleanup a switch instruction that has been converted into
// a select, fixing up PHI nodes and basic blocks.
-static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
- Value *SelectValue,
- IRBuilder<> &Builder,
- DomTreeUpdater *DTU) {
+static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
+ Value *SelectValue,
+ IRBuilder<> &Builder,
+ DomTreeUpdater *DTU) {
std::vector<DominatorTree::UpdateType> Updates;
BasicBlock *SelectBB = SI->getParent();
@@ -5409,33 +5814,31 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
DTU->applyUpdates(Updates);
}
-/// If the switch is only used to initialize one or more
-/// phi nodes in a common successor block with only two different
-/// constant values, replace the switch with select.
-static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- DomTreeUpdater *DTU, const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+/// If a switch is only used to initialize one or more phi nodes in a common
+/// successor block with only two different constant values, try to replace the
+/// switch with a select. Returns true if the fold was made.
+static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
BasicBlock *CommonDest = nullptr;
Constant *DefaultResult;
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
- if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
- DL, TTI, /*MaxUniqueResults*/2,
- /*MaxCasesPerResult*/2))
+ if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL, TTI, /*MaxUniqueResults*/ 2))
return false;
- assert(PHI != nullptr && "PHI for value select not found");
+ assert(PHI != nullptr && "PHI for value select not found");
Builder.SetInsertPoint(SI);
Value *SelectValue =
- ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
- if (SelectValue) {
- RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
- return true;
- }
- // The switch couldn't be converted into a select.
- return false;
+ foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
+ if (!SelectValue)
+ return false;
+
+ removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
+ return true;
}
namespace {
@@ -5655,7 +6058,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
IntegerType *IT = cast<IntegerType>(Index->getType());
uint64_t TableSize =
Array->getInitializer()->getType()->getArrayNumElements();
- if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+ if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
Index = Builder.CreateZExt(
Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
"switch.tableidx.zext");
@@ -5707,6 +6110,27 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
DL.fitsInLegalInteger(IT->getBitWidth());
}
+static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
+ // 40% is the default density for building a jump table in optsize/minsize
+ // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
+ // function was based on.
+ const uint64_t MinDensity = 40;
+
+ if (CaseRange >= UINT64_MAX / 100)
+ return false; // Avoid multiplication overflows below.
+
+ return NumCases * 100 >= CaseRange * MinDensity;
+}
+
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+ uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+ uint64_t Range = Diff + 1;
+ if (Range < Diff)
+ return false; // Overflow.
+
+ return isSwitchDense(Values.size(), Range);
+}
+
/// Determine whether a lookup table should be built for this switch, based on
/// the number of cases, size of the table, and the types of the results.
// TODO: We could support larger than legal types by limiting based on the
@@ -5716,8 +6140,8 @@ static bool
ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
const TargetTransformInfo &TTI, const DataLayout &DL,
const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
- if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
- return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() > TableSize)
+ return false; // TableSize overflowed.
bool AllTablesFitInRegister = true;
bool HasIllegalType = false;
@@ -5747,10 +6171,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
if (HasIllegalType)
return false;
- // The table density should be at least 40%. This is the same criterion as for
- // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Find the best cut-off.
- return SI->getNumCases() * 10 >= TableSize * 4;
+ return isSwitchDense(SI->getNumCases(), TableSize);
}
/// Try to reuse the switch table index compare. Following pattern:
@@ -5888,7 +6309,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Resulting value at phi nodes for this case value.
using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
ResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -5916,7 +6337,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
bool HasDefaultResults =
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
DefaultResultsList, DL, TTI);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
@@ -6086,17 +6507,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
-static bool isSwitchDense(ArrayRef<int64_t> Values) {
- // See also SelectionDAGBuilder::isDense(), which this function was based on.
- uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
- uint64_t Range = Diff + 1;
- uint64_t NumCases = Values.size();
- // 40% is the default density for building a jump table in optsize/minsize mode.
- uint64_t MinDensity = 40;
-
- return NumCases * 100 >= Range * MinDensity;
-}
-
/// Try to transform a switch that has "holes" in it to a contiguous sequence
/// of cases.
///
@@ -6211,14 +6621,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
}
// Try to transform the switch into an icmp and a branch.
- if (TurnSwitchRangeIntoICmp(SI, Builder))
+ // The conversion from switch to comparison may lose information on
+ // impossible switch values, so disable it early in the pipeline.
+ if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
return requestResimplify();
// Remove unreachable cases.
if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
return requestResimplify();
- if (switchToSelect(SI, Builder, DTU, DL, TTI))
+ if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
@@ -6521,12 +6933,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
}
- // If this is a branch on a phi node in the current block, thread control
- // through this block if any PHI node entries are constants.
- if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
- if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC))
- return requestResimplify();
+ // If this is a branch on something for which we know the constant value in
+ // predecessors (e.g. a phi node in the current block), thread control
+ // through this block.
+ if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
+ return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (BasicBlock *Pred : predecessors(BB))
@@ -6725,7 +7136,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
return true;
if (SinkCommon && Options.SinkCommonInsts)
- if (SinkCommonCodeFromPredecessors(BB, DTU)) {
+ if (SinkCommonCodeFromPredecessors(BB, DTU) ||
+ MergeCompatibleInvokes(BB, DTU)) {
// SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
// so we may now how duplicate PHI's.
// Let's rerun EliminateDuplicatePHINodes() first,