diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-06-13 19:31:46 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-06-13 19:37:19 +0000 |
commit | e8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch) | |
tree | 94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp | |
parent | bb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff) | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp | 84 |
1 files changed, 59 insertions, 25 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 783d22fafee9..048baa460e49 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty( cl::init(2), cl::Hidden); +// Heuristic for tail duplication if profile count is used in cost model. +static cl::opt<unsigned> TailDupProfilePercentThreshold( + "tail-dup-profile-percent-threshold", + cl::desc("If profile count information is used in tail duplication cost " + "model, the gained fall through number from tail duplication " + "should be at least this percent of hot count."), + cl::init(50), cl::Hidden); + // Heuristic for triangle chains. static cl::opt<unsigned> TriangleChainCount( "triangle-chain-count", @@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Partial tail duplication threshold. BlockFrequency DupThreshold; + /// True: use block profile count to compute tail duplication cost. + /// False: use block frequency to compute tail duplication cost. + bool UseProfileCount; + /// Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass { SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits; #endif + /// Get block profile count or frequency according to UseProfileCount. + /// The return value is used to model tail duplication cost. + BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) { + if (UseProfileCount) { + auto Count = MBFI->getBlockProfileCount(BB); + if (Count) + return *Count; + else + return 0; + } else + return MBFI->getBlockFreq(BB); + } + /// Scale the DupThreshold according to basic block size. BlockFrequency scaleThreshold(MachineBasicBlock *BB); void initDupThreshold(); @@ -424,10 +449,6 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *BB, const BlockChain &Chain, const BlockFilterSet *BlockFilter, SmallVector<MachineBasicBlock *, 4> &Successors); - bool shouldPredBlockBeOutlined( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - const BlockChain &Chain, const BlockFilterSet *BlockFilter, - BranchProbability SuccProb, BranchProbability HotProb); bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred, BlockFilterSet *BlockFilter); void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates, @@ -1652,11 +1673,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. - WorkList.erase(llvm::remove_if(WorkList, - [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), - WorkList.end()); + llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }); if (WorkList.empty()) return nullptr; @@ -2287,6 +2306,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (Bottom == ExitingBB) return; + // The entry block should always be the first BB in a function. + if (Top->isEntryBlock()) + return; + bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet); // If the header has viable fallthrough, check whether the current loop @@ -2361,6 +2384,11 @@ void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { auto RotationPos = LoopChain.end(); + MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); + + // The entry block should always be the first BB in a function. + if (ChainHeaderBB->isEntryBlock()) + return; BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2379,7 +2407,6 @@ void MachineBlockPlacement::rotateLoopWithProfile( // chain head is not the loop header. As we only consider natural loops with // single header, this computation can be done only once. BlockFrequency HeaderFallThroughCost(0); - MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); for (auto *Pred : ChainHeaderBB->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && @@ -2516,10 +2543,14 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { MBPI->getEdgeProbability(LoopPred, L.getHeader()); for (MachineBasicBlock *LoopBB : L.getBlocks()) { + if (LoopBlockSet.count(LoopBB)) + continue; auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency(); if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio) continue; - LoopBlockSet.insert(LoopBB); + BlockChain *Chain = BlockToChain[LoopBB]; + for (MachineBasicBlock *ChainBB : *Chain) + LoopBlockSet.insert(ChainBB); } } else LoopBlockSet.insert(L.block_begin(), L.block_end()); @@ -3011,12 +3042,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList; if (RemBB->isEHPad()) RemoveList = EHPadWorkList; - RemoveList.erase( - llvm::remove_if(RemoveList, - [RemBB](MachineBasicBlock *BB) { - return BB == RemBB; - }), - RemoveList.end()); + llvm::erase_value(RemoveList, RemBB); } // Handle the filter set @@ -3120,7 +3146,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB, // Compute the number of reduced taken branches if Pred falls through to BB // instead of another successor. Then compare it with threshold. - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + BlockFrequency PredFreq = getBlockCountOrFrequency(Pred); BlockFrequency Gain = PredFreq * (BBProb - BestProb); return Gain > scaleThreshold(BB); } @@ -3134,8 +3160,8 @@ void MachineBlockPlacement::findDuplicateCandidates( MachineBasicBlock *Fallthrough = nullptr; BranchProbability DefaultBranchProb = BranchProbability::getZero(); BlockFrequency BBDupThreshold(scaleThreshold(BB)); - SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end()); - SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end()); + SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors()); + SmallVector<MachineBasicBlock *, 8> Succs(BB->successors()); // Sort for highest frequency. auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) { @@ -3194,7 +3220,7 @@ void MachineBlockPlacement::findDuplicateCandidates( // it. But it can beneficially fall through to BB, and duplicate BB into other // predecessors. for (MachineBasicBlock *Pred : Preds) { - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + BlockFrequency PredFreq = getBlockCountOrFrequency(Pred); if (!TailDup.canTailDuplicate(BB, Pred)) { // BB can't be duplicated into Pred, but it is possible to be layout @@ -3243,6 +3269,15 @@ void MachineBlockPlacement::initDupThreshold() { if (!F->getFunction().hasProfileData()) return; + // We prefer to use prifile count. + uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); + if (HotThreshold != UINT64_MAX) { + UseProfileCount = true; + DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100; + return; + } + + // Profile count is not available, we can use block frequency instead. BlockFrequency MaxFreq = 0; for (MachineBasicBlock &MBB : *F) { BlockFrequency Freq = MBFI->getBlockFreq(&MBB); @@ -3250,10 +3285,9 @@ void MachineBlockPlacement::initDupThreshold() { MaxFreq = Freq; } - // FIXME: we may use profile count instead of frequency, - // and need more fine tuning. BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); DupThreshold = MaxFreq * ThresholdProb; + UseProfileCount = false; } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { @@ -3326,8 +3360,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { unsigned TailMergeSize = TailDupSize + 1; - BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI, PSI, TailMergeSize); + BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false, + *MBFI, *MBPI, PSI, TailMergeSize); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI, /*AfterPlacement=*/true)) { |