summaryrefslogtreecommitdiff
path: root/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/CodeGen/MachineBlockPlacement.cpp
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp132
1 files changed, 88 insertions, 44 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 447ad629885b..4ce689607730 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,4 +1,4 @@
-//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,7 +26,10 @@
//===----------------------------------------------------------------------===//
#include "BranchFolding.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -39,19 +42,33 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
-#include <functional>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <tuple>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "block-placement"
@@ -91,11 +108,17 @@ static cl::opt<unsigned> LoopToColdBlockRatio(
"(frequency of block) is greater than this ratio"),
cl::init(5), cl::Hidden);
+static cl::opt<bool> ForceLoopColdBlock(
+ "force-loop-cold-block",
+ cl::desc("Force outlining cold blocks from loops."),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool>
PreciseRotationCost("precise-rotation-cost",
cl::desc("Model the cost of loop rotation more "
"precisely by using profile data."),
cl::init(false), cl::Hidden);
+
static cl::opt<bool>
ForcePreciseRotationCost("force-precise-rotation-cost",
cl::desc("Force the use of precise cost "
@@ -138,7 +161,7 @@ static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
"tail-dup-placement-aggressive-threshold",
cl::desc("Instruction cutoff for aggressive tail duplication during "
"layout. Used at -O3. Tail merging during layout is forced to "
- "have a threshold that won't conflict."), cl::init(3),
+ "have a threshold that won't conflict."), cl::init(4),
cl::Hidden);
// Heuristic for tail duplication.
@@ -172,12 +195,12 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
extern cl::opt<std::string> ViewBlockFreqFuncName;
namespace {
+
class BlockChain;
+
/// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
-}
+using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
-namespace {
/// \brief A chain of blocks which will be laid out contiguously.
///
/// This is the datastructure representing a chain of consecutive blocks that
@@ -211,14 +234,14 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
/// \brief Iterator over blocks within the chain.
- typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
- typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
+ using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
+ using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
/// \brief Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
@@ -286,14 +309,12 @@ public:
///
/// Note: This field is reinitialized multiple times - once for each loop,
/// and then once for the function as a whole.
- unsigned UnscheduledPredecessors;
+ unsigned UnscheduledPredecessors = 0;
};
-}
-namespace {
class MachineBlockPlacement : public MachineFunctionPass {
- /// \brief A typedef for a block filter set.
- typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+ /// \brief A type for a block filter set.
+ using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
/// Pair struct containing basic block and taildup profitiability
struct BlockAndTailDupResult {
@@ -428,6 +449,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
void fillWorkLists(const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
+
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(
@@ -454,31 +476,37 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
BranchProbability AdjustedSumProb,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Check for a trellis layout.
bool isTrellis(const MachineBasicBlock *BB,
const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Get the best successor given a trellis layout.
BlockAndTailDupResult getBestTrellisSuccessor(
const MachineBasicBlock *BB,
const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
BranchProbability AdjustedSumProb, const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
+
/// Get the best pair of non-conflicting edges.
static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
const MachineBasicBlock *BB,
MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+
/// Returns true if a block can tail duplicate into all unplaced
/// predecessors. Filters based on loop.
bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB, MachineBasicBlock *Succ,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Find chains of triangles to tail-duplicate where a global analysis works,
/// but a local analysis would not find them.
void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
+
MachineBlockPlacement() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
}
@@ -495,10 +523,13 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char MachineBlockPlacement::ID = 0;
+
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -515,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
static std::string getBlockName(const MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber();
+ OS << printMBBReference(*BB);
OS << " ('" << BB->getName() << "')";
OS.flush();
return Result;
@@ -1094,6 +1125,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
void MachineBlockPlacement::precomputeTriangleChains() {
struct TriangleChain {
std::vector<MachineBasicBlock *> Edges;
+
TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
: Edges({src, dst}) {}
@@ -1203,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
const MachineBasicBlock *BB) {
- if (!BB->getParent()->getFunction()->getEntryCount())
+ if (!BB->getParent()->getFunction().getEntryCount())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
const MachineBasicBlock *Succ1 = *BB->succ_begin();
@@ -1534,10 +1566,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(remove_if(WorkList,
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ WorkList.erase(llvm::remove_if(WorkList,
+ [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
if (WorkList.empty())
@@ -1659,7 +1691,7 @@ void MachineBlockPlacement::buildChain(
const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
MachineBasicBlock *BB = *std::prev(Chain.end());
- for (;;) {
+ while (true) {
assert(BB && "null block found at end of chain in loop.");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
@@ -1737,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
- if (F->getFunction()->optForSize())
+ if (F->getFunction().optForSize())
return L.getHeader();
// Check that the header hasn't been fused with a preheader block due to
@@ -1945,7 +1977,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
}
}
- BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
+ BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -1999,7 +2031,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
- auto HeaderIter = find(LoopChain, HeaderBB);
+ auto HeaderIter = llvm::find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2146,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F->getFunction()->getEntryCount()) {
+ if (F->getFunction().getEntryCount() || ForceLoopColdBlock) {
BlockFrequency LoopFreq(0);
for (auto LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -2188,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// for better layout.
bool RotateLoopWithProfile =
ForcePreciseRotationCost ||
- (PreciseRotationCost && F->getFunction()->getEntryCount());
+ (PreciseRotationCost && F->getFunction().getEntryCount());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
@@ -2201,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
+ //
+ // Loops are processed innermost to uttermost, make sure we clear
+ // PreferredLoopExit before processing a new loop.
+ PreferredLoopExit = nullptr;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
@@ -2272,7 +2308,7 @@ void MachineBlockPlacement::buildCFGChains() {
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
- for (;;) {
+ while (true) {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
@@ -2313,7 +2349,7 @@ void MachineBlockPlacement::buildCFGChains() {
buildChain(&F->front(), FunctionChain);
#ifndef NDEBUG
- typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
#endif
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -2449,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction()->optForSize())
+ if (F->getFunction().optForSize())
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
@@ -2545,8 +2581,8 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
// duplicated from here on are already scheduled.
// Note that DuplicatedToLPred always implies Removed.
while (DuplicatedToLPred) {
- assert (Removed && "Block must have been removed to be duplicated into its "
- "layout predecessor.");
+ assert(Removed && "Block must have been removed to be duplicated into its "
+ "layout predecessor.");
MachineBasicBlock *DupBB, *DupPred;
// The removal callback causes Chain.end() to be updated when a block is
// removed. On the first pass through the loop, the chain end should be the
@@ -2629,8 +2665,10 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
RemoveList.erase(
- remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+ llvm::remove_if(RemoveList,
+ [RemBB](MachineBasicBlock *BB) {
+ return BB == RemBB;
+ }),
RemoveList.end());
}
@@ -2648,7 +2686,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
<< getBlockName(RemBB) << "\n");
};
auto RemovalCallbackRef =
- llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+ function_ref<void(MachineBasicBlock*)>(RemovalCallback);
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
bool IsSimple = TailDup.isSimpleBB(BB);
@@ -2677,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
// Check for single-block functions and skip them.
@@ -2722,9 +2760,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (TailDupPlacement) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction()->optForSize())
+ if (MF.getFunction().optForSize())
TailDupSize = 1;
- TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ bool PreRegAlloc = false;
+ TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -2778,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
- F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+ F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
MBFI->view("MBP." + MF.getName(), false);
}
@@ -2789,6 +2828,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
namespace {
+
/// \brief A pass to compute block placement statistics.
///
/// A separate pass to compute interesting statistics for evaluating block
@@ -2804,6 +2844,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
+
MachineBlockPlacementStats() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
}
@@ -2817,10 +2858,13 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char MachineBlockPlacementStats::ID = 0;
+
char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+
INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
"Basic Block Placement Stats", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)