aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/JumpThreading.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:11 +0000
commite3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Transforms/Scalar/JumpThreading.cpp
parent08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)
Diffstat (limited to 'llvm/lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp135
1 files changed, 96 insertions, 39 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b31eab50c5ec..f41eaed2e3e7 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -54,6 +53,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
@@ -99,6 +99,11 @@ ImplicationSearchThreshold(
"condition to use to thread over a weaker condition"),
cl::init(3), cl::Hidden);
+static cl::opt<unsigned> PhiDuplicateThreshold(
+ "jump-threading-phi-threshold",
+ cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
+ cl::Hidden);
+
static cl::opt<bool> PrintLVIAfterJumpThreading(
"print-lvi-after-jump-threading",
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
@@ -216,7 +221,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
return;
uint64_t TrueWeight, FalseWeight;
- if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
+ if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
return;
if (TrueWeight + FalseWeight == 0)
@@ -279,7 +284,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
// With PGO, this can be used to refine even existing profile data with
// context information. This needs to be done after more performance
// testing.
- if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
+ if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
continue;
// We can not infer anything useful when BP >= 50%, because BP is the
@@ -346,7 +351,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
if (F.hasProfileData()) {
- LoopInfo LI{DominatorTree(F)};
+ LoopInfo LI{DT};
BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
@@ -517,8 +522,23 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
Instruction *StopAt,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
+
+ // Do not duplicate the BB if it has a lot of PHI nodes.
+ // If a threadable chain is too long then the number of PHI nodes can add up,
+ // leading to a substantial increase in compile time when rewriting the SSA.
+ unsigned PhiCount = 0;
+ Instruction *FirstNonPHI = nullptr;
+ for (Instruction &I : *BB) {
+ if (!isa<PHINode>(&I)) {
+ FirstNonPHI = &I;
+ break;
+ }
+ if (++PhiCount > PhiDuplicateThreshold)
+ return ~0U;
+ }
+
/// Ignore PHI nodes, these will be flattened when duplication happens.
- BasicBlock::const_iterator I(BB->getFirstNonPHI());
+ BasicBlock::const_iterator I(FirstNonPHI);
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
@@ -560,8 +580,8 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
if (CI->cannotDuplicate() || CI->isConvergent())
return ~0U;
- if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
- == TargetTransformInfo::TCC_Free)
+ if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free)
continue;
// All other instructions count for at least one unit.
@@ -653,22 +673,25 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Instruction *I = dyn_cast<Instruction>(V);
if (!I || I->getParent() != BB) {
- // Okay, if this is a live-in value, see if it has a known value at the end
- // of any of our predecessors.
- //
- // FIXME: This should be an edge property, not a block end property.
- /// TODO: Per PR2563, we could infer value range information about a
- /// predecessor based on its terminator.
- //
- // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
- // "I" is a non-local compare-with-a-constant instruction. This would be
- // able to handle value inequalities better, for example if the compare is
- // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
- // Perhaps getConstantOnEdge should be smart enough to do this?
+ // Okay, if this is a live-in value, see if it has a known value at the any
+ // edge from our predecessors.
for (BasicBlock *P : predecessors(BB)) {
+ using namespace PatternMatch;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
+ // If I is a non-local compare-with-constant instruction, use more-rich
+ // 'getPredicateOnEdge' method. This would be able to handle value
+ // inequalities better, for example if the compare is "X < 4" and "X < 3"
+ // is known true but "X < 4" itself is not available.
+ CmpInst::Predicate Pred;
+ Value *Val;
+ Constant *Cst;
+ if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
+ auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
+ if (Res != LazyValueInfo::Unknown)
+ PredCst = ConstantInt::getBool(V->getContext(), Res);
+ }
if (Constant *KC = getKnownConstant(PredCst, Preference))
Result.emplace_back(KC, P);
}
@@ -1250,7 +1273,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
return false;
bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
- Optional<bool> Implication =
+ std::optional<bool> Implication =
isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
// If the branch condition of BB (which is Cond) and CurrentPred are
@@ -1908,7 +1931,7 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
// If all preds provide undef, just nuke the xor, because it is undef too.
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
BO->eraseFromParent();
- } else if (SplitVal->isZero()) {
+ } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
// If all preds provide 0, replace the xor with the other input.
BO->replaceAllUsesWith(BO->getOperand(isLHS));
BO->eraseFromParent();
@@ -2060,6 +2083,30 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
// block, evaluate them to account for entry from PredBB.
DenseMap<Instruction *, Value *> ValueMapping;
+ // Retargets llvm.dbg.value to any renamed variables.
+ auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
+ auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
+ if (!DbgInstruction)
+ return false;
+
+ SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
+ for (auto DbgOperand : DbgInstruction->location_ops()) {
+ auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
+ if (!DbgOperandInstruction)
+ continue;
+
+ auto I = ValueMapping.find(DbgOperandInstruction);
+ if (I != ValueMapping.end()) {
+ OperandsToRemap.insert(
+ std::pair<Value *, Value *>(DbgOperand, I->second));
+ }
+ }
+
+ for (auto &[OldOp, MappedOp] : OperandsToRemap)
+ DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
+ return true;
+ };
+
// Clone the phi nodes of the source basic block into NewBB. The resulting
// phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
// might need to rewrite the operand of the cloned phi.
@@ -2084,10 +2131,13 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
for (; BI != BE; ++BI) {
Instruction *New = BI->clone();
New->setName(BI->getName());
- NewBB->getInstList().push_back(New);
+ New->insertInto(NewBB, NewBB->end());
ValueMapping[&*BI] = New;
adaptNoAliasScopes(New, ClonedScopes, Context);
+ if (RetargetDbgValueIfPossible(New))
+ continue;
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -2437,7 +2487,7 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
// update the edge weight of the result of splitting predecessors.
DenseMap<BasicBlock *, BlockFrequency> FreqMap;
if (HasProfileData)
- for (auto Pred : Preds)
+ for (auto *Pred : Preds)
FreqMap.insert(std::make_pair(
Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
@@ -2452,10 +2502,10 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
std::vector<DominatorTree::UpdateType> Updates;
Updates.reserve((2 * Preds.size()) + NewBBs.size());
- for (auto NewBB : NewBBs) {
+ for (auto *NewBB : NewBBs) {
BlockFrequency NewBBFreq(0);
Updates.push_back({DominatorTree::Insert, NewBB, BB});
- for (auto Pred : predecessors(NewBB)) {
+ for (auto *Pred : predecessors(NewBB)) {
Updates.push_back({DominatorTree::Delete, Pred, BB});
Updates.push_back({DominatorTree::Insert, Pred, NewBB});
if (HasProfileData) // Update frequencies between Pred -> NewBB.
@@ -2472,18 +2522,7 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
const Instruction *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "not a split");
-
- MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
- if (!WeightsNode)
- return false;
-
- MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
- if (MDName->getString() != "branch_weights")
- return false;
-
- // Ensure there are weights for all of the successors. Note that the first
- // operand to the metadata node is a name, not a weight.
- return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
+ return hasValidBranchWeightMD(*TI);
}
/// Update the block frequency of BB and branch weight and the metadata on the
@@ -2677,7 +2716,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
if (New) {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
+ New->insertInto(PredBB, OldPredBranch->getIterator());
// Update Dominance from simplified New instruction operands.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
@@ -2731,12 +2770,30 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
BB->getParent(), BB);
// Move the unconditional branch to NewBB.
PredTerm->removeFromParent();
- NewBB->getInstList().insert(NewBB->end(), PredTerm);
+ PredTerm->insertInto(NewBB, NewBB->end());
// Create a conditional branch and update PHI nodes.
auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
+ BI->copyMetadata(*SI, {LLVMContext::MD_prof});
SIUse->setIncomingValue(Idx, SI->getFalseValue());
SIUse->addIncoming(SI->getTrueValue(), NewBB);
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ uint64_t TrueWeight, FalseWeight;
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
+ (TrueWeight + FalseWeight) != 0) {
+ SmallVector<BranchProbability, 2> BP;
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight));
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ FalseWeight, TrueWeight + FalseWeight));
+ BPI->setEdgeProbability(Pred, BP);
+ }
+
+ auto NewBBFreq =
+ BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
// The select is now dead.
SI->eraseFromParent();