diff options
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 212 |
1 files changed, 179 insertions, 33 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dc9143bebc45..6b0377e0ecb3 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -14,25 +14,50 @@ #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,8 +66,15 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> #include <memory> +#include <utility> + using namespace llvm; using namespace jumpthreading; @@ -70,6 +102,7 @@ static cl::opt<bool> PrintLVIAfterJumpThreading( cl::Hidden); namespace { + /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the /// predecessors of the block can be proven to always jump to one of the @@ -85,12 +118,12 @@ namespace { /// /// In this case, the unconditional branch at the end of the first if can be /// revectored to the false side of the second if. - /// class JumpThreading : public FunctionPass { JumpThreadingPass Impl; public: static char ID; // Pass identification + JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -108,9 +141,11 @@ namespace { void releaseMemory() override { Impl.releaseMemory(); } }; -} + +} // end anonymous namespace char JumpThreading::ID = 0; + INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) @@ -120,14 +155,125 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(int Threshold) { return new JumpThreading(Threshold); } +FunctionPass *llvm::createJumpThreadingPass(int Threshold) { + return new JumpThreading(Threshold); +} JumpThreadingPass::JumpThreadingPass(int T) { BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } -/// runOnFunction - Top level algorithm. -/// +// Update branch probability information according to conditional +// branch probablity. This is usually made possible for cloned branches +// in inline instances by the context specific profile in the caller. +// For instance, +// +// [Block PredBB] +// [Branch PredBr] +// if (t) { +// Block A; +// } else { +// Block B; +// } +// +// [Block BB] +// cond = PN([true, %A], [..., %B]); // PHI node +// [Branch CondBr] +// if (cond) { +// ... // P(cond == true) = 1% +// } +// +// Here we know that when block A is taken, cond must be true, which means +// P(cond == true | A) = 1 +// +// Given that P(cond == true) = P(cond == true | A) * P(A) + +// P(cond == true | B) * P(B) +// we get: +// P(cond == true ) = P(A) + P(cond == true | B) * P(B) +// +// which gives us: +// P(A) is less than P(cond == true), i.e. +// P(t == true) <= P(cond == true) +// +// In other words, if we know P(cond == true) is unlikely, we know +// that P(t == true) is also unlikely. +// +static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { + BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); + if (!CondBr) + return; + + BranchProbability BP; + uint64_t TrueWeight, FalseWeight; + if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight)) + return; + + // Returns the outgoing edge of the dominating predecessor block + // that leads to the PhiNode's incoming block: + auto GetPredOutEdge = + [](BasicBlock *IncomingBB, + BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> { + auto *PredBB = IncomingBB; + auto *SuccBB = PhiBB; + while (true) { + BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()); + if (PredBr && PredBr->isConditional()) + return {PredBB, SuccBB}; + auto *SinglePredBB = PredBB->getSinglePredecessor(); + if (!SinglePredBB) + return {nullptr, nullptr}; + SuccBB = PredBB; + PredBB = SinglePredBB; + } + }; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PhiOpnd = PN->getIncomingValue(i); + ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd); + + if (!CI || !CI->getType()->isIntegerTy(1)) + continue; + + BP = (CI->isOne() ? BranchProbability::getBranchProbability( + TrueWeight, TrueWeight + FalseWeight) + : BranchProbability::getBranchProbability( + FalseWeight, TrueWeight + FalseWeight)); + + auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB); + if (!PredOutEdge.first) + return; + + BasicBlock *PredBB = PredOutEdge.first; + BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator()); + + uint64_t PredTrueWeight, PredFalseWeight; + // FIXME: We currently only set the profile data when it is missing. + // With PGO, this can be used to refine even existing profile data with + // context information. This needs to be done after more performance + // testing. + if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight)) + continue; + + // We can not infer anything useful when BP >= 50%, because BP is the + // upper bound probability value. + if (BP >= BranchProbability(50, 100)) + continue; + + SmallVector<uint32_t, 2> Weights; + if (PredBr->getSuccessor(0) == PredOutEdge.second) { + Weights.push_back(BP.getNumerator()); + Weights.push_back(BP.getCompl().getNumerator()); + } else { + Weights.push_back(BP.getCompl().getNumerator()); + Weights.push_back(BP.getNumerator()); + } + PredBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(PredBr->getParent()->getContext()) + .createBranchWeights(Weights)); + } +} + +/// runOnFunction - Toplevel algorithm. bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -155,7 +301,6 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &LVI = AM.getResult<LazyValueAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); @@ -184,7 +329,6 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_, std::unique_ptr<BranchProbabilityInfo> BPI_) { - DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; @@ -384,7 +528,6 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, /// within the loop (forming a nested loop). This simple analysis is not rich /// enough to track all of these properties and keep it up-to-date as the CFG /// mutates, so we don't allow any of these transformations. -/// void JumpThreadingPass::FindLoopHeaders(Function &F) { SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; FindFunctionBackedges(F, Edges); @@ -418,7 +561,6 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { /// BB in the result vector. /// /// This returns true if there were any known values. -/// bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *V, BasicBlock *BB, PredValueInfo &Result, ConstantPreference Preference, Instruction *CxtI) { @@ -507,8 +649,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( return true; } - PredValueInfoTy LHSVals, RHSVals; - // Handle some boolean conditions. if (I->getType()->getPrimitiveSizeInBits() == 1) { assert(Preference == WantInteger && "One-bit non-integer type?"); @@ -516,6 +656,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // X & false -> false if (I->getOpcode() == Instruction::Or || I->getOpcode() == Instruction::And) { + PredValueInfoTy LHSVals, RHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, WantInteger, CxtI); ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, @@ -655,6 +797,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // x as a live-in. { using namespace PatternMatch; + Value *AddLHS; ConstantInt *AddConst; if (isa<ConstantInt>(CmpConst) && @@ -751,14 +894,11 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( return !Result.empty(); } - - /// GetBestDestForBranchOnUndef - If we determine that the specified block ends /// in an undefined jump, decide which block is best to revector to. /// /// Since we can pick an arbitrary destination, we pick the successor with the /// fewest predecessors. This should reduce the in-degree of the others. -/// static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { TerminatorInst *BBTerm = BB->getTerminator(); unsigned MinSucc = 0; @@ -979,7 +1119,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // for loads that are used by a switch or by the condition for the branch. If // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. - // Value *SimplifyValue = CondInst; if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) if (isa<Constant>(CondCmp->getOperand(1))) @@ -991,10 +1130,14 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { if (SimplifyPartiallyRedundantLoad(LI)) return true; + // Before threading, try to propagate profile data backwards: + if (PHINode *PN = dyn_cast<PHINode>(CondInst)) + if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + updatePredecessorProfileMetadata(PN, BB); + // Handle a variety of cases where we are branching on something derived from // a PHI node in the current block. If we can prove that any predecessors // compute a predictable value based on a PHI node, thread those predecessors. - // if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator)) return true; @@ -1036,9 +1179,9 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) { if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB) return false; - bool FalseDest = PBI->getSuccessor(1) == CurrentBB; + bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB; Optional<bool> Implication = - isImpliedCondition(PBI->getCondition(), Cond, DL, FalseDest); + isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); if (Implication) { BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB); BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI); @@ -1124,7 +1267,9 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LI->getAAMetadata(AATags); SmallPtrSet<BasicBlock*, 8> PredsScanned; - typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; + + using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>; + AvailablePredsTy AvailablePreds; BasicBlock *OneUnavailablePred = nullptr; SmallVector<LoadInst*, 8> CSELoads; @@ -1283,8 +1428,8 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { /// the list. static BasicBlock * FindMostPopularDest(BasicBlock *BB, - const SmallVectorImpl<std::pair<BasicBlock*, - BasicBlock*> > &PredToDestList) { + const SmallVectorImpl<std::pair<BasicBlock *, + BasicBlock *>> &PredToDestList) { assert(!PredToDestList.empty()); // Determine popularity. If there are multiple possible destinations, we @@ -1502,7 +1647,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on /// a PHI node in the current block. See if there are any simplifications we /// can do based on inputs to the phi node. -/// bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); @@ -1532,7 +1676,6 @@ bool JumpThreadingPass::ProcessBranchOnPHI(PHINode *PN) { /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on /// a xor instruction in the current block. See if there are any /// simplifications we can do based on inputs to the xor. -/// bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) { BasicBlock *BB = BO->getParent(); @@ -1637,7 +1780,6 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) { return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); } - /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// NewPred using the entries from OldPred (suitably mapped). @@ -1677,10 +1819,15 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. - if (LoopHeaders.count(BB)) { - DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() - << "' to dest BB '" << SuccBB->getName() - << "' - it might create an irreducible loop!\n"); + if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) { + DEBUG({ + bool BBIsHeader = LoopHeaders.count(BB); + bool SuccIsHeader = LoopHeaders.count(SuccBB); + dbgs() << " Not threading across " + << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName() + << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '") + << SuccBB->getName() << "' - it might create an irreducible loop!\n"; + }); return false; } @@ -1795,7 +1942,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, DEBUG(dbgs() << "\n"); } - // Ok, NewBB is good to go. Update the terminator of PredBB to jump to // NewBB instead of BB. This eliminates predecessors from BB, which requires // us to simplify any PHI nodes in BB. @@ -2194,7 +2340,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... /// %c = cmp %p, 0 /// %s = select %c, trueval, falseval -// +/// /// And expand the select into a branch structure. This later enables /// jump-threading over bb in this pass. /// @@ -2280,6 +2426,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { /// guard is then threaded to one of them. bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) { using namespace PatternMatch; + // We only want to deal with two predecessors. BasicBlock *Pred1, *Pred2; auto PI = pred_begin(BB), PE = pred_end(BB); @@ -2331,8 +2478,7 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, TrueDestIsSafe = true; else { // False dest is safe if !BranchCond => GuardCond. - Impl = - isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true); + Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false); if (Impl && *Impl) FalseDestIsSafe = true; } |