diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:04 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:11 +0000 |
| commit | e3b557809604d036af6e00c60f012c2025b59a5e (patch) | |
| tree | 8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Transforms/Scalar/JumpThreading.cpp | |
| parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) | |
Diffstat (limited to 'llvm/lib/Transforms/Scalar/JumpThreading.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/JumpThreading.cpp | 135 |
1 files changed, 96 insertions, 39 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index b31eab50c5ec..f41eaed2e3e7 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -54,6 +53,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -99,6 +99,11 @@ ImplicationSearchThreshold( "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden); +static cl::opt<unsigned> PhiDuplicateThreshold( + "jump-threading-phi-threshold", + cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), + cl::Hidden); + static cl::opt<bool> PrintLVIAfterJumpThreading( "print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), @@ -216,7 +221,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { return; uint64_t TrueWeight, FalseWeight; - if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight)) + if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight)) return; if (TrueWeight + FalseWeight == 0) @@ -279,7 +284,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { // With PGO, this can be used to refine even existing profile data with // context information. This needs to be done after more performance // testing. - if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight)) + if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight)) continue; // We can not infer anything useful when BP >= 50%, because BP is the @@ -346,7 +351,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; if (F.hasProfileData()) { - LoopInfo LI{DominatorTree(F)}; + LoopInfo LI{DT}; BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -517,8 +522,23 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, Instruction *StopAt, unsigned Threshold) { assert(StopAt->getParent() == BB && "Not an instruction from proper BB?"); + + // Do not duplicate the BB if it has a lot of PHI nodes. + // If a threadable chain is too long then the number of PHI nodes can add up, + // leading to a substantial increase in compile time when rewriting the SSA. + unsigned PhiCount = 0; + Instruction *FirstNonPHI = nullptr; + for (Instruction &I : *BB) { + if (!isa<PHINode>(&I)) { + FirstNonPHI = &I; + break; + } + if (++PhiCount > PhiDuplicateThreshold) + return ~0U; + } + /// Ignore PHI nodes, these will be flattened when duplication happens. - BasicBlock::const_iterator I(BB->getFirstNonPHI()); + BasicBlock::const_iterator I(FirstNonPHI); // FIXME: THREADING will delete values that are just used to compute the // branch, so they shouldn't count against the duplication cost. @@ -560,8 +580,8 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, if (CI->cannotDuplicate() || CI->isConvergent()) return ~0U; - if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) - == TargetTransformInfo::TCC_Free) + if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) == + TargetTransformInfo::TCC_Free) continue; // All other instructions count for at least one unit. @@ -653,22 +673,25 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( Instruction *I = dyn_cast<Instruction>(V); if (!I || I->getParent() != BB) { - // Okay, if this is a live-in value, see if it has a known value at the end - // of any of our predecessors. - // - // FIXME: This should be an edge property, not a block end property. - /// TODO: Per PR2563, we could infer value range information about a - /// predecessor based on its terminator. - // - // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if - // "I" is a non-local compare-with-a-constant instruction. This would be - // able to handle value inequalities better, for example if the compare is - // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. - // Perhaps getConstantOnEdge should be smart enough to do this? + // Okay, if this is a live-in value, see if it has a known value at the any + // edge from our predecessors. for (BasicBlock *P : predecessors(BB)) { + using namespace PatternMatch; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI); + // If I is a non-local compare-with-constant instruction, use more-rich + // 'getPredicateOnEdge' method. This would be able to handle value + // inequalities better, for example if the compare is "X < 4" and "X < 3" + // is known true but "X < 4" itself is not available. + CmpInst::Predicate Pred; + Value *Val; + Constant *Cst; + if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) { + auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI); + if (Res != LazyValueInfo::Unknown) + PredCst = ConstantInt::getBool(V->getContext(), Res); + } if (Constant *KC = getKnownConstant(PredCst, Preference)) Result.emplace_back(KC, P); } @@ -1250,7 +1273,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { return false; bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB; - Optional<bool> Implication = + std::optional<bool> Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); // If the branch condition of BB (which is Cond) and CurrentPred are @@ -1908,7 +1931,7 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { // If all preds provide undef, just nuke the xor, because it is undef too. BO->replaceAllUsesWith(UndefValue::get(BO->getType())); BO->eraseFromParent(); - } else if (SplitVal->isZero()) { + } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) { // If all preds provide 0, replace the xor with the other input. BO->replaceAllUsesWith(BO->getOperand(isLHS)); BO->eraseFromParent(); @@ -2060,6 +2083,30 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, // block, evaluate them to account for entry from PredBB. DenseMap<Instruction *, Value *> ValueMapping; + // Retargets llvm.dbg.value to any renamed variables. + auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool { + auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst); + if (!DbgInstruction) + return false; + + SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap; + for (auto DbgOperand : DbgInstruction->location_ops()) { + auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand); + if (!DbgOperandInstruction) + continue; + + auto I = ValueMapping.find(DbgOperandInstruction); + if (I != ValueMapping.end()) { + OperandsToRemap.insert( + std::pair<Value *, Value *>(DbgOperand, I->second)); + } + } + + for (auto &[OldOp, MappedOp] : OperandsToRemap) + DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp); + return true; + }; + // Clone the phi nodes of the source basic block into NewBB. The resulting // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater // might need to rewrite the operand of the cloned phi. @@ -2084,10 +2131,13 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, for (; BI != BE; ++BI) { Instruction *New = BI->clone(); New->setName(BI->getName()); - NewBB->getInstList().push_back(New); + New->insertInto(NewBB, NewBB->end()); ValueMapping[&*BI] = New; adaptNoAliasScopes(New, ClonedScopes, Context); + if (RetargetDbgValueIfPossible(New)) + continue; + // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { @@ -2437,7 +2487,7 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, // update the edge weight of the result of splitting predecessors. DenseMap<BasicBlock *, BlockFrequency> FreqMap; if (HasProfileData) - for (auto Pred : Preds) + for (auto *Pred : Preds) FreqMap.insert(std::make_pair( Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB))); @@ -2452,10 +2502,10 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, std::vector<DominatorTree::UpdateType> Updates; Updates.reserve((2 * Preds.size()) + NewBBs.size()); - for (auto NewBB : NewBBs) { + for (auto *NewBB : NewBBs) { BlockFrequency NewBBFreq(0); Updates.push_back({DominatorTree::Insert, NewBB, BB}); - for (auto Pred : predecessors(NewBB)) { + for (auto *Pred : predecessors(NewBB)) { Updates.push_back({DominatorTree::Delete, Pred, BB}); Updates.push_back({DominatorTree::Insert, Pred, NewBB}); if (HasProfileData) // Update frequencies between Pred -> NewBB. @@ -2472,18 +2522,7 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { const Instruction *TI = BB->getTerminator(); assert(TI->getNumSuccessors() > 1 && "not a split"); - - MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); - if (!WeightsNode) - return false; - - MDString *MDName = cast<MDString>(WeightsNode->getOperand(0)); - if (MDName->getString() != "branch_weights") - return false; - - // Ensure there are weights for all of the successors. Note that the first - // operand to the metadata node is a name, not a weight. - return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1; + return hasValidBranchWeightMD(*TI); } /// Update the block frequency of BB and branch weight and the metadata on the @@ -2677,7 +2716,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( if (New) { // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); - PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + New->insertInto(PredBB, OldPredBranch->getIterator()); // Update Dominance from simplified New instruction operands. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i))) @@ -2731,12 +2770,30 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, BB->getParent(), BB); // Move the unconditional branch to NewBB. PredTerm->removeFromParent(); - NewBB->getInstList().insert(NewBB->end(), PredTerm); + PredTerm->insertInto(NewBB, NewBB->end()); // Create a conditional branch and update PHI nodes. auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc()); + BI->copyMetadata(*SI, {LLVMContext::MD_prof}); SIUse->setIncomingValue(Idx, SI->getFalseValue()); SIUse->addIncoming(SI->getTrueValue(), NewBB); + // Set the block frequency of NewBB. + if (HasProfileData) { + uint64_t TrueWeight, FalseWeight; + if (extractBranchWeights(*SI, TrueWeight, FalseWeight) && + (TrueWeight + FalseWeight) != 0) { + SmallVector<BranchProbability, 2> BP; + BP.emplace_back(BranchProbability::getBranchProbability( + TrueWeight, TrueWeight + FalseWeight)); + BP.emplace_back(BranchProbability::getBranchProbability( + FalseWeight, TrueWeight + FalseWeight)); + BPI->setEdgeProbability(Pred, BP); + } + + auto NewBBFreq = + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB); + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } // The select is now dead. SI->eraseFromParent(); |
