diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-19 21:12:03 +0000 |
| commit | c9157d925c489f07ba9c0b2ce47e5149b75969a5 (patch) | |
| tree | 08bc4a3d9cad3f9ebffa558ddf140b9d9257b219 /contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp | |
| parent | 2a66844f606a35d68ad8a8061f4bea204274b3bc (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp | 210 |
1 files changed, 117 insertions, 93 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 912e9ec993e3..a7a839688ddf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -444,9 +444,9 @@ class MachineBlockPlacement : public MachineFunctionPass { if (UseProfileCount) { auto Count = MBFI->getBlockProfileCount(BB); if (Count) - return *Count; + return BlockFrequency(*Count); else - return 0; + return BlockFrequency(0); } else return MBFI->getBlockFreq(BB); } @@ -795,10 +795,10 @@ bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { /// penalty is less than 100% /// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere. static bool greaterWithBias(BlockFrequency A, BlockFrequency B, - uint64_t EntryFreq) { + BlockFrequency EntryFreq) { BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); BlockFrequency Gain = A - B; - return (Gain / ThresholdProb).getFrequency() >= EntryFreq; + return (Gain / ThresholdProb) >= EntryFreq; } /// Check the edge frequencies to see if tail duplication will increase @@ -843,7 +843,7 @@ bool MachineBlockPlacement::isProfitableToTailDup( auto SuccFreq = MBFI->getBlockFreq(Succ); BlockFrequency P = BBFreq * PProb; BlockFrequency Qout = BBFreq * QProb; - uint64_t EntryFreq = MBFI->getEntryFreq(); + BlockFrequency EntryFreq = MBFI->getEntryFreq(); // If there are no more successors, it is profitable to copy, as it strictly // increases fallthrough. if (SuccSuccs.size() == 0) @@ -1729,8 +1729,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); - LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> " + << printBlockFreq(MBFI->getMBFI(), CandidateFreq) + << " (freq)\n"); // For ehpad, we layout the least probable first as to avoid jumping back // from least probable landingpads to more probable ones. @@ -1927,7 +1928,7 @@ BlockFrequency MachineBlockPlacement::TopFallThroughFreq( const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) { - BlockFrequency MaxFreq = 0; + BlockFrequency MaxFreq = BlockFrequency(0); for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && @@ -1986,7 +1987,7 @@ MachineBlockPlacement::FallThroughGains( const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) { BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet); - BlockFrequency FallThrough2Exit = 0; + BlockFrequency FallThrough2Exit = BlockFrequency(0); if (ExitBB) FallThrough2Exit = MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB); @@ -1994,58 +1995,58 @@ MachineBlockPlacement::FallThroughGains( MBPI->getEdgeProbability(NewTop, OldTop); // Find the best Pred of NewTop. - MachineBasicBlock *BestPred = nullptr; - BlockFrequency FallThroughFromPred = 0; - for (MachineBasicBlock *Pred : NewTop->predecessors()) { - if (!LoopBlockSet.count(Pred)) - continue; - BlockChain *PredChain = BlockToChain[Pred]; - if (!PredChain || Pred == *std::prev(PredChain->end())) { - BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * - MBPI->getEdgeProbability(Pred, NewTop); - if (EdgeFreq > FallThroughFromPred) { - FallThroughFromPred = EdgeFreq; - BestPred = Pred; - } - } - } - - // If NewTop is not placed after Pred, another successor can be placed - // after Pred. - BlockFrequency NewFreq = 0; - if (BestPred) { - for (MachineBasicBlock *Succ : BestPred->successors()) { - if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ)) - continue; - if (ComputedEdges.contains(Succ)) - continue; - BlockChain *SuccChain = BlockToChain[Succ]; - if ((SuccChain && (Succ != *SuccChain->begin())) || - (SuccChain == BlockToChain[BestPred])) - continue; - BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) * - MBPI->getEdgeProbability(BestPred, Succ); - if (EdgeFreq > NewFreq) - NewFreq = EdgeFreq; - } - BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) * - MBPI->getEdgeProbability(BestPred, NewTop); - if (NewFreq > OrigEdgeFreq) { - // If NewTop is not the best successor of Pred, then Pred doesn't - // fallthrough to NewTop. So there is no FallThroughFromPred and - // NewFreq. - NewFreq = 0; - FallThroughFromPred = 0; - } - } - - BlockFrequency Result = 0; - BlockFrequency Gains = BackEdgeFreq + NewFreq; - BlockFrequency Lost = FallThrough2Top + FallThrough2Exit + - FallThroughFromPred; - if (Gains > Lost) - Result = Gains - Lost; - return Result; + MachineBasicBlock *BestPred = nullptr; + BlockFrequency FallThroughFromPred = BlockFrequency(0); + for (MachineBasicBlock *Pred : NewTop->predecessors()) { + if (!LoopBlockSet.count(Pred)) + continue; + BlockChain *PredChain = BlockToChain[Pred]; + if (!PredChain || Pred == *std::prev(PredChain->end())) { + BlockFrequency EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, NewTop); + if (EdgeFreq > FallThroughFromPred) { + FallThroughFromPred = EdgeFreq; + BestPred = Pred; + } + } + } + + // If NewTop is not placed after Pred, another successor can be placed + // after Pred. + BlockFrequency NewFreq = BlockFrequency(0); + if (BestPred) { + for (MachineBasicBlock *Succ : BestPred->successors()) { + if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ)) + continue; + if (ComputedEdges.contains(Succ)) + continue; + BlockChain *SuccChain = BlockToChain[Succ]; + if ((SuccChain && (Succ != *SuccChain->begin())) || + (SuccChain == BlockToChain[BestPred])) + continue; + BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, Succ); + if (EdgeFreq > NewFreq) + NewFreq = EdgeFreq; + } + BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, NewTop); + if (NewFreq > OrigEdgeFreq) { + // If NewTop is not the best successor of Pred, then Pred doesn't + // fallthrough to NewTop. So there is no FallThroughFromPred and + // NewFreq. + NewFreq = BlockFrequency(0); + FallThroughFromPred = BlockFrequency(0); + } + } + + BlockFrequency Result = BlockFrequency(0); + BlockFrequency Gains = BackEdgeFreq + NewFreq; + BlockFrequency Lost = + FallThrough2Top + FallThrough2Exit + FallThroughFromPred; + if (Gains > Lost) + Result = Gains - Lost; + return Result; } /// Helper function of findBestLoopTop. Find the best loop top block @@ -2087,7 +2088,7 @@ MachineBlockPlacement::findBestLoopTopHelper( LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop) << "\n"); - BlockFrequency BestGains = 0; + BlockFrequency BestGains = BlockFrequency(0); MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock *Pred : OldTop->predecessors()) { if (!LoopBlockSet.count(Pred)) @@ -2095,8 +2096,8 @@ MachineBlockPlacement::findBestLoopTopHelper( if (Pred == L.getHeader()) continue; LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has " - << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + << Pred->succ_size() << " successors, " + << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n"); if (Pred->succ_size() > 2) continue; @@ -2112,8 +2113,9 @@ MachineBlockPlacement::findBestLoopTopHelper( BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet); - if ((Gains > 0) && (Gains > BestGains || - ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { + if ((Gains > BlockFrequency(0)) && + (Gains > BestGains || + ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { BestPred = Pred; BestGains = Gains; } @@ -2239,10 +2241,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, } BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; - LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " [L:" << SuccLoopDepth - << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + LLVM_DEBUG( + dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (" + << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking @@ -2425,14 +2427,14 @@ void MachineBlockPlacement::rotateLoopWithProfile( if (ChainHeaderBB->isEntryBlock()) return; - BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); + BlockFrequency SmallestRotationCost = BlockFrequency::max(); // A utility lambda that scales up a block frequency by dividing it by a // branch probability which is the reciprocal of the scale. auto ScaleBlockFrequency = [](BlockFrequency Freq, unsigned Scale) -> BlockFrequency { if (Scale == 0) - return 0; + return BlockFrequency(0); // Use operator / between BlockFrequency and BranchProbability to implement // saturating multiplication. return Freq / BranchProbability(1, Scale); @@ -2492,7 +2494,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( auto TailBB = *TailIter; // Calculate the cost by putting this BB to the top. - BlockFrequency Cost = 0; + BlockFrequency Cost = BlockFrequency(0); // If the current BB is the loop header, we need to take into account the // cost of the missed fall through edge from outside of the loop to the @@ -2523,8 +2525,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( if (TailBB->isSuccessor(*Iter)) { auto TailBBFreq = MBFI->getBlockFreq(TailBB); if (TailBB->succ_size() == 1) - Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(), - MisfetchCost + JumpInstCost); + Cost += ScaleBlockFrequency(TailBBFreq, MisfetchCost + JumpInstCost); else if (TailBB->succ_size() == 2) { auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter); auto TailToHeadFreq = TailBBFreq * TailToHeadProb; @@ -2537,8 +2538,8 @@ void MachineBlockPlacement::rotateLoopWithProfile( } LLVM_DEBUG(dbgs() << "The cost of loop rotation by making " - << getBlockName(*Iter) - << " to the top: " << Cost.getFrequency() << "\n"); + << getBlockName(*Iter) << " to the top: " + << printBlockFreq(MBFI->getMBFI(), Cost) << "\n"); if (Cost < SmallestRotationCost) { SmallestRotationCost = Cost; @@ -2918,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() { if (!L) continue; - const Align Align = TLI->getPrefLoopAlignment(L); - if (Align == 1) + const Align TLIAlign = TLI->getPrefLoopAlignment(L); + unsigned MDAlign = 1; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I)); + if (MD == nullptr) + continue; + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (S == nullptr) + continue; + if (S->getString() == "llvm.loop.align") { + assert(MD->getNumOperands() == 2 && + "per-loop align metadata should have two operands."); + MDAlign = + mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue(); + assert(MDAlign >= 1 && "per-loop align value must be positive."); + } + } + } + + // Use max of the TLIAlign and MDAlign + const Align LoopAlign = std::max(TLIAlign, Align(MDAlign)); + if (LoopAlign == 1) continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space @@ -2958,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() { // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. if (!LayoutPred->isSuccessor(ChainBB)) { - ChainBB->setAlignment(Align); + ChainBB->setAlignment(LoopAlign); DetermineMaxAlignmentPadding(); continue; } @@ -2971,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() { MBPI->getEdgeProbability(LayoutPred, ChainBB); BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; if (LayoutEdgeFreq <= (Freq * ColdProb)) { - ChainBB->setAlignment(Align); + ChainBB->setAlignment(LoopAlign); DetermineMaxAlignmentPadding(); } } @@ -3090,7 +3113,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList; if (RemBB->isEHPad()) RemoveList = EHPadWorkList; - llvm::erase_value(RemoveList, RemBB); + llvm::erase(RemoveList, RemBB); } // Handle the filter set @@ -3159,7 +3182,7 @@ static uint64_t countMBBInstruction(MachineBasicBlock *MBB) { // So we should scale the threshold accordingly. But the instruction size is not // available on all targets, so we use the number of instructions instead. BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) { - return DupThreshold.getFrequency() * countMBBInstruction(BB); + return BlockFrequency(DupThreshold.getFrequency() * countMBBInstruction(BB)); } // Returns true if BB is Pred's best successor. @@ -3313,7 +3336,7 @@ void MachineBlockPlacement::findDuplicateCandidates( } void MachineBlockPlacement::initDupThreshold() { - DupThreshold = 0; + DupThreshold = BlockFrequency(0); if (!F->getFunction().hasProfileData()) return; @@ -3321,12 +3344,13 @@ void MachineBlockPlacement::initDupThreshold() { uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); if (HotThreshold != UINT64_MAX) { UseProfileCount = true; - DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100; + DupThreshold = + BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100); return; } // Profile count is not available, we can use block frequency instead. - BlockFrequency MaxFreq = 0; + BlockFrequency MaxFreq = BlockFrequency(0); for (MachineBasicBlock &MBB : *F) { BlockFrequency Freq = MBFI->getBlockFreq(&MBB); if (Freq > MaxFreq) @@ -3334,7 +3358,7 @@ void MachineBlockPlacement::initDupThreshold() { } BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); - DupThreshold = MaxFreq * ThresholdProb; + DupThreshold = BlockFrequency(MaxFreq * ThresholdProb); UseProfileCount = false; } @@ -3376,7 +3400,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); // For aggressive optimization, we can adjust some thresholds to be less // conservative. - if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { + if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { // At O3 we should be more willing to copy blocks for tail duplication. This // increases size pressure, so we only do it at O3 // Do this unless only the regular threshold is explicitly set. @@ -3388,7 +3412,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // If there's no threshold provided through options, query the target // information for a threshold instead. if (TailDupPlacementThreshold.getNumOccurrences() == 0 && - (PassConfig->getOptLevel() < CodeGenOpt::Aggressive || + (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); @@ -3501,7 +3525,7 @@ void MachineBlockPlacement::applyExtTsp() { auto BlockSizes = std::vector<uint64_t>(F->size()); auto BlockCounts = std::vector<uint64_t>(F->size()); - std::vector<EdgeCountT> JumpCounts; + std::vector<codelayout::EdgeCount> JumpCounts; for (MachineBasicBlock &MBB : *F) { // Getting the block frequency. BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); @@ -3520,8 +3544,8 @@ void MachineBlockPlacement::applyExtTsp() { for (MachineBasicBlock *Succ : MBB.successors()) { auto EP = MBPI->getEdgeProbability(&MBB, Succ); BlockFrequency JumpFreq = BlockFreq * EP; - auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]); - JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency())); + JumpCounts.push_back( + {BlockIndex[&MBB], BlockIndex[Succ], JumpFreq.getFrequency()}); } } @@ -3534,7 +3558,7 @@ void MachineBlockPlacement::applyExtTsp() { calcExtTspScore(BlockSizes, BlockCounts, JumpCounts))); // Run the layout algorithm. - auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); std::vector<const MachineBasicBlock *> NewBlockOrder; NewBlockOrder.reserve(F->size()); for (uint64_t Node : NewOrder) { |
