summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp84
1 files changed, 76 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index ac19bc0bd8ea..30b98ec88c24 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -48,6 +50,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
@@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A handle to the post dominator tree.
MachinePostDominatorTree *MPDT;
+ ProfileSummaryInfo *PSI;
+
/// Duplicator used to duplicate tails during placement.
///
/// Placement decisions can open up new tail duplication opportunities, but
@@ -537,6 +542,7 @@ public:
if (TailDupPlacement)
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
@@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (!shouldTailDuplicate(Succ))
return false;
+ // The result of canTailDuplicate.
+ bool Duplicate = true;
+ // Number of possible duplication.
+ unsigned int NumDup = 0;
+
// For CFG checking.
SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
BB->succ_end());
@@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// to trellises created by tail-duplication, so we just look for the
// CFG.
continue;
- return false;
+ Duplicate = false;
+ continue;
}
+ NumDup++;
}
+
+ // No possible duplication in current filter set.
+ if (NumDup == 0)
+ return false;
+
+ // This is mainly for function exit BB.
+ // The integrated tail duplication is really designed for increasing
+ // fallthrough from predecessors from Succ to its successors. We may need
+ // other machanism to handle different cases.
+ if (Succ->succ_size() == 0)
+ return true;
+
+ // Plus the already placed predecessor.
+ NumDup++;
+
+ // If the duplication candidate has more unplaced predecessors than
+ // successors, the extra duplication can't bring more fallthrough.
+ //
+ // Pred1 Pred2 Pred3
+ // \ | /
+ // \ | /
+ // \ | /
+ // Dup
+ // / \
+ // / \
+ // Succ1 Succ2
+ //
+ // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
+ // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
+ // but the duplication into Pred3 can't increase fallthrough.
+ //
+ // A small number of extra duplication may not hurt too much. We need a better
+ // heuristic to handle it.
+ //
+ // FIXME: we should selectively tail duplicate a BB into part of its
+ // predecessors.
+ if ((NumDup > Succ->succ_size()) || !Duplicate)
+ return false;
+
return true;
}
@@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
- if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (Pred == Succ || PredChain == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain ||
+ PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
// This check is redundant except for look ahead. This function is
// called for lookahead by isProfitableToTailDup when BB hasn't been
// placed yet.
@@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock* BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
if (allowTailDupPlacement())
- ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
+ ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
+ Chain,
+ BlockFilter));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
- if (F->getFunction().hasOptSize())
+ bool OptForSize = F->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI,
+ &MBFI->getMBFI());
+ if (OptForSize)
return L.getHeader();
MachineBasicBlock *OldTop = nullptr;
@@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
if (Freq < (LoopHeaderFreq * ColdProb))
continue;
+ // If the global profiles indicates so, don't align it.
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ !TLI->alignLoopsWithOptSize())
+ continue;
+
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
MachineBasicBlock *LayoutPred =
@@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
@@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction().hasOptSize())
+ bool OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
+ if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
+ TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ /* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, TailMergeSize);
+ *MBPI, PSI, TailMergeSize);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),