aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 19:37:19 +0000
commite8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch)
tree94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
parentbb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff)
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp84
1 files changed, 59 insertions, 25 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 783d22fafee9..048baa460e49 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty(
cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication if profile count is used in cost model.
+static cl::opt<unsigned> TailDupProfilePercentThreshold(
+ "tail-dup-profile-percent-threshold",
+ cl::desc("If profile count information is used in tail duplication cost "
+ "model, the gained fall through number from tail duplication "
+ "should be at least this percent of hot count."),
+ cl::init(50), cl::Hidden);
+
// Heuristic for triangle chains.
static cl::opt<unsigned> TriangleChainCount(
"triangle-chain-count",
@@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Partial tail duplication threshold.
BlockFrequency DupThreshold;
+ /// True: use block profile count to compute tail duplication cost.
+ /// False: use block frequency to compute tail duplication cost.
+ bool UseProfileCount;
+
/// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif
+ /// Get block profile count or frequency according to UseProfileCount.
+ /// The return value is used to model tail duplication cost.
+ BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
+ if (UseProfileCount) {
+ auto Count = MBFI->getBlockProfileCount(BB);
+ if (Count)
+ return *Count;
+ else
+ return 0;
+ } else
+ return MBFI->getBlockFreq(BB);
+ }
+
/// Scale the DupThreshold according to basic block size.
BlockFrequency scaleThreshold(MachineBasicBlock *BB);
void initDupThreshold();
@@ -424,10 +449,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const BlockChain &Chain,
const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(
- const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
- const BlockChain &Chain, const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb, BranchProbability HotProb);
bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
BlockFilterSet *BlockFilter);
void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
@@ -1652,11 +1673,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(llvm::remove_if(WorkList,
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
- WorkList.end());
+ llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ });
if (WorkList.empty())
return nullptr;
@@ -2287,6 +2306,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (Bottom == ExitingBB)
return;
+ // The entry block should always be the first BB in a function.
+ if (Top->isEntryBlock())
+ return;
+
bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
// If the header has viable fallthrough, check whether the current loop
@@ -2361,6 +2384,11 @@ void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
auto RotationPos = LoopChain.end();
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+
+ // The entry block should always be the first BB in a function.
+ if (ChainHeaderBB->isEntryBlock())
+ return;
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2379,7 +2407,6 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
@@ -2516,10 +2543,14 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
MBPI->getEdgeProbability(LoopPred, L.getHeader());
for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ if (LoopBlockSet.count(LoopBB))
+ continue;
auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
continue;
- LoopBlockSet.insert(LoopBB);
+ BlockChain *Chain = BlockToChain[LoopBB];
+ for (MachineBasicBlock *ChainBB : *Chain)
+ LoopBlockSet.insert(ChainBB);
}
} else
LoopBlockSet.insert(L.block_begin(), L.block_end());
@@ -3011,12 +3042,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
- RemoveList.erase(
- llvm::remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {
- return BB == RemBB;
- }),
- RemoveList.end());
+ llvm::erase_value(RemoveList, RemBB);
}
// Handle the filter set
@@ -3120,7 +3146,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
// Compute the number of reduced taken branches if Pred falls through to BB
// instead of another successor. Then compare it with threshold.
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
BlockFrequency Gain = PredFreq * (BBProb - BestProb);
return Gain > scaleThreshold(BB);
}
@@ -3134,8 +3160,8 @@ void MachineBlockPlacement::findDuplicateCandidates(
MachineBasicBlock *Fallthrough = nullptr;
BranchProbability DefaultBranchProb = BranchProbability::getZero();
BlockFrequency BBDupThreshold(scaleThreshold(BB));
- SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end());
- SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
+ SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
// Sort for highest frequency.
auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
@@ -3194,7 +3220,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
// it. But it can beneficially fall through to BB, and duplicate BB into other
// predecessors.
for (MachineBasicBlock *Pred : Preds) {
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
if (!TailDup.canTailDuplicate(BB, Pred)) {
// BB can't be duplicated into Pred, but it is possible to be layout
@@ -3243,6 +3269,15 @@ void MachineBlockPlacement::initDupThreshold() {
if (!F->getFunction().hasProfileData())
return;
+ // We prefer to use prifile count.
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
+ if (HotThreshold != UINT64_MAX) {
+ UseProfileCount = true;
+ DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ return;
+ }
+
+ // Profile count is not available, we can use block frequency instead.
BlockFrequency MaxFreq = 0;
for (MachineBasicBlock &MBB : *F) {
BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
@@ -3250,10 +3285,9 @@ void MachineBlockPlacement::initDupThreshold() {
MaxFreq = Freq;
}
- // FIXME: we may use profile count instead of frequency,
- // and need more fine tuning.
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
DupThreshold = MaxFreq * ThresholdProb;
+ UseProfileCount = false;
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
@@ -3326,8 +3360,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
- BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, PSI, TailMergeSize);
+ BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false,
+ *MBFI, *MBPI, PSI, TailMergeSize);
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
/*AfterPlacement=*/true)) {