summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/JumpThreading.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp212
1 files changed, 179 insertions, 33 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dc9143bebc45..6b0377e0ecb3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -14,25 +14,50 @@
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -41,8 +66,15 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
#include <memory>
+#include <utility>
+
using namespace llvm;
using namespace jumpthreading;
@@ -70,6 +102,7 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::Hidden);
namespace {
+
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
@@ -85,12 +118,12 @@ namespace {
///
/// In this case, the unconditional branch at the end of the first if can be
/// revectored to the false side of the second if.
- ///
class JumpThreading : public FunctionPass {
JumpThreadingPass Impl;
public:
static char ID; // Pass identification
+
JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -108,9 +141,11 @@ namespace {
void releaseMemory() override { Impl.releaseMemory(); }
};
-}
+
+} // end anonymous namespace
char JumpThreading::ID = 0;
+
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
@@ -120,14 +155,125 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(int Threshold) { return new JumpThreading(Threshold); }
+FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
+ return new JumpThreading(Threshold);
+}
JumpThreadingPass::JumpThreadingPass(int T) {
BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
-/// runOnFunction - Top level algorithm.
-///
+// Update branch probability information according to conditional
+// branch probablity. This is usually made possible for cloned branches
+// in inline instances by the context specific profile in the caller.
+// For instance,
+//
+// [Block PredBB]
+// [Branch PredBr]
+// if (t) {
+// Block A;
+// } else {
+// Block B;
+// }
+//
+// [Block BB]
+// cond = PN([true, %A], [..., %B]); // PHI node
+// [Branch CondBr]
+// if (cond) {
+// ... // P(cond == true) = 1%
+// }
+//
+// Here we know that when block A is taken, cond must be true, which means
+// P(cond == true | A) = 1
+//
+// Given that P(cond == true) = P(cond == true | A) * P(A) +
+// P(cond == true | B) * P(B)
+// we get:
+// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
+//
+// which gives us:
+// P(A) is less than P(cond == true), i.e.
+// P(t == true) <= P(cond == true)
+//
+// In other words, if we know P(cond == true) is unlikely, we know
+// that P(t == true) is also unlikely.
+//
+static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!CondBr)
+ return;
+
+ BranchProbability BP;
+ uint64_t TrueWeight, FalseWeight;
+ if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+
+ // Returns the outgoing edge of the dominating predecessor block
+ // that leads to the PhiNode's incoming block:
+ auto GetPredOutEdge =
+ [](BasicBlock *IncomingBB,
+ BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
+ auto *PredBB = IncomingBB;
+ auto *SuccBB = PhiBB;
+ while (true) {
+ BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (PredBr && PredBr->isConditional())
+ return {PredBB, SuccBB};
+ auto *SinglePredBB = PredBB->getSinglePredecessor();
+ if (!SinglePredBB)
+ return {nullptr, nullptr};
+ SuccBB = PredBB;
+ PredBB = SinglePredBB;
+ }
+ };
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *PhiOpnd = PN->getIncomingValue(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
+
+ if (!CI || !CI->getType()->isIntegerTy(1))
+ continue;
+
+ BP = (CI->isOne() ? BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight)
+ : BranchProbability::getBranchProbability(
+ FalseWeight, TrueWeight + FalseWeight));
+
+ auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
+ if (!PredOutEdge.first)
+ return;
+
+ BasicBlock *PredBB = PredOutEdge.first;
+ BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
+
+ uint64_t PredTrueWeight, PredFalseWeight;
+ // FIXME: We currently only set the profile data when it is missing.
+ // With PGO, this can be used to refine even existing profile data with
+ // context information. This needs to be done after more performance
+ // testing.
+ if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
+ continue;
+
+ // We can not infer anything useful when BP >= 50%, because BP is the
+ // upper bound probability value.
+ if (BP >= BranchProbability(50, 100))
+ continue;
+
+ SmallVector<uint32_t, 2> Weights;
+ if (PredBr->getSuccessor(0) == PredOutEdge.second) {
+ Weights.push_back(BP.getNumerator());
+ Weights.push_back(BP.getCompl().getNumerator());
+ } else {
+ Weights.push_back(BP.getCompl().getNumerator());
+ Weights.push_back(BP.getNumerator());
+ }
+ PredBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(PredBr->getParent()->getContext())
+ .createBranchWeights(Weights));
+ }
+}
+
+/// runOnFunction - Toplevel algorithm.
bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -155,7 +301,6 @@ bool JumpThreading::runOnFunction(Function &F) {
PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
-
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
@@ -184,7 +329,6 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
-
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
LVI = LVI_;
@@ -384,7 +528,6 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
/// within the loop (forming a nested loop). This simple analysis is not rich
/// enough to track all of these properties and keep it up-to-date as the CFG
/// mutates, so we don't allow any of these transformations.
-///
void JumpThreadingPass::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
@@ -418,7 +561,6 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
/// BB in the result vector.
///
/// This returns true if there were any known values.
-///
bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Value *V, BasicBlock *BB, PredValueInfo &Result,
ConstantPreference Preference, Instruction *CxtI) {
@@ -507,8 +649,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
return true;
}
- PredValueInfoTy LHSVals, RHSVals;
-
// Handle some boolean conditions.
if (I->getType()->getPrimitiveSizeInBits() == 1) {
assert(Preference == WantInteger && "One-bit non-integer type?");
@@ -516,6 +656,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// X & false -> false
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
+ PredValueInfoTy LHSVals, RHSVals;
+
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
WantInteger, CxtI);
ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
@@ -655,6 +797,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// x as a live-in.
{
using namespace PatternMatch;
+
Value *AddLHS;
ConstantInt *AddConst;
if (isa<ConstantInt>(CmpConst) &&
@@ -751,14 +894,11 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
return !Result.empty();
}
-
-
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
/// in an undefined jump, decide which block is best to revector to.
///
/// Since we can pick an arbitrary destination, we pick the successor with the
/// fewest predecessors. This should reduce the in-degree of the others.
-///
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
TerminatorInst *BBTerm = BB->getTerminator();
unsigned MinSucc = 0;
@@ -979,7 +1119,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// for loads that are used by a switch or by the condition for the branch. If
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
- //
Value *SimplifyValue = CondInst;
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
@@ -991,10 +1130,14 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (SimplifyPartiallyRedundantLoad(LI))
return true;
+ // Before threading, try to propagate profile data backwards:
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ updatePredecessorProfileMetadata(PN, BB);
+
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
- //
if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
return true;
@@ -1036,9 +1179,9 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
return false;
- bool FalseDest = PBI->getSuccessor(1) == CurrentBB;
+ bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
Optional<bool> Implication =
- isImpliedCondition(PBI->getCondition(), Cond, DL, FalseDest);
+ isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
if (Implication) {
BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB);
BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI);
@@ -1124,7 +1267,9 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
LI->getAAMetadata(AATags);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
- typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+
+ using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
+
AvailablePredsTy AvailablePreds;
BasicBlock *OneUnavailablePred = nullptr;
SmallVector<LoadInst*, 8> CSELoads;
@@ -1283,8 +1428,8 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
/// the list.
static BasicBlock *
FindMostPopularDest(BasicBlock *BB,
- const SmallVectorImpl<std::pair<BasicBlock*,
- BasicBlock*> > &PredToDestList) {
+ const SmallVectorImpl<std::pair<BasicBlock *,
+ BasicBlock *>> &PredToDestList) {
assert(!PredToDestList.empty());
// Determine popularity. If there are multiple possible destinations, we
@@ -1502,7 +1647,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
-///
bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
@@ -1532,7 +1676,6 @@ bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) {
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-///
bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
@@ -1637,7 +1780,6 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
-
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
@@ -1677,10 +1819,15 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
- if (LoopHeaders.count(BB)) {
- DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName()
- << "' to dest BB '" << SuccBB->getName()
- << "' - it might create an irreducible loop!\n");
+ if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
+ DEBUG({
+ bool BBIsHeader = LoopHeaders.count(BB);
+ bool SuccIsHeader = LoopHeaders.count(SuccBB);
+ dbgs() << " Not threading across "
+ << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
+ << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
+ << SuccBB->getName() << "' - it might create an irreducible loop!\n";
+ });
return false;
}
@@ -1795,7 +1942,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
DEBUG(dbgs() << "\n");
}
-
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
// NewBB instead of BB. This eliminates predecessors from BB, which requires
// us to simplify any PHI nodes in BB.
@@ -2194,7 +2340,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
/// %c = cmp %p, 0
/// %s = select %c, trueval, falseval
-//
+///
/// And expand the select into a branch structure. This later enables
/// jump-threading over bb in this pass.
///
@@ -2280,6 +2426,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
/// guard is then threaded to one of them.
bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
using namespace PatternMatch;
+
// We only want to deal with two predecessors.
BasicBlock *Pred1, *Pred2;
auto PI = pred_begin(BB), PE = pred_end(BB);
@@ -2331,8 +2478,7 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
TrueDestIsSafe = true;
else {
// False dest is safe if !BranchCond => GuardCond.
- Impl =
- isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
+ Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
if (Impl && *Impl)
FalseDestIsSafe = true;
}