diff options
Diffstat (limited to 'lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r-- | lib/CodeGen/MachineBlockPlacement.cpp | 132 |
1 files changed, 88 insertions, 44 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 447ad629885b..4ce689607730 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1,4 +1,4 @@ -//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===// // // The LLVM Compiler Infrastructure // @@ -26,7 +26,10 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -39,19 +42,33 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> -#include <functional> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "block-placement" @@ -91,11 +108,17 @@ static cl::opt<unsigned> LoopToColdBlockRatio( "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden); +static cl::opt<bool> ForceLoopColdBlock( + "force-loop-cold-block", + cl::desc("Force outlining cold blocks from loops."), + cl::init(false), cl::Hidden); + static cl::opt<bool> PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden); + static cl::opt<bool> ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " @@ -138,7 +161,7 @@ static cl::opt<unsigned> TailDupPlacementAggressiveThreshold( "tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " - "have a threshold that won't conflict."), cl::init(3), + "have a threshold that won't conflict."), cl::init(4), cl::Hidden); // Heuristic for tail duplication. @@ -172,12 +195,12 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI; extern cl::opt<std::string> ViewBlockFreqFuncName; namespace { + class BlockChain; + /// \brief Type for our function-wide basic block -> block chain mapping. -typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType; -} +using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>; -namespace { /// \brief A chain of blocks which will be laid out contiguously. /// /// This is the datastructure representing a chain of consecutive blocks that @@ -211,14 +234,14 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } /// \brief Iterator over blocks within the chain. - typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; - typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator; + using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator; + using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator; /// \brief Beginning of blocks within the chain. iterator begin() { return Blocks.begin(); } @@ -286,14 +309,12 @@ public: /// /// Note: This field is reinitialized multiple times - once for each loop, /// and then once for the function as a whole. - unsigned UnscheduledPredecessors; + unsigned UnscheduledPredecessors = 0; }; -} -namespace { class MachineBlockPlacement : public MachineFunctionPass { - /// \brief A typedef for a block filter set. - typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet; + /// \brief A type for a block filter set. + using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>; /// Pair struct containing basic block and taildup profitiability struct BlockAndTailDupResult { @@ -428,6 +449,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void fillWorkLists(const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds, const BlockFilterSet *BlockFilter); + void buildChain(const MachineBasicBlock *BB, BlockChain &Chain, BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop( @@ -454,31 +476,37 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *BB, const MachineBasicBlock *Succ, BranchProbability AdjustedSumProb, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Check for a trellis layout. bool isTrellis(const MachineBasicBlock *BB, const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Get the best successor given a trellis layout. BlockAndTailDupResult getBestTrellisSuccessor( const MachineBasicBlock *BB, const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs, BranchProbability AdjustedSumProb, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Get the best pair of non-conflicting edges. static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges( const MachineBasicBlock *BB, MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges); + /// Returns true if a block can tail duplicate into all unplaced /// predecessors. Filters based on loop. bool canTailDuplicateUnplacedPreds( const MachineBasicBlock *BB, MachineBasicBlock *Succ, const BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Find chains of triangles to tail-duplicate where a global analysis works, /// but a local analysis would not find them. void precomputeTriangleChains(); public: static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } @@ -495,10 +523,13 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char MachineBlockPlacement::ID = 0; + char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; + INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) @@ -515,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, static std::string getBlockName(const MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber(); + OS << printMBBReference(*BB); OS << " ('" << BB->getName() << "')"; OS.flush(); return Result; @@ -1094,6 +1125,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( void MachineBlockPlacement::precomputeTriangleChains() { struct TriangleChain { std::vector<MachineBasicBlock *> Edges; + TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst) : Edges({src, dst}) {} @@ -1203,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { // When profile is available, we need to handle the triangle-shape CFG. static BranchProbability getLayoutSuccessorProbThreshold( const MachineBasicBlock *BB) { - if (!BB->getParent()->getFunction()->getEntryCount()) + if (!BB->getParent()->getFunction().getEntryCount()) return BranchProbability(StaticLikelyProb, 100); if (BB->succ_size() == 2) { const MachineBasicBlock *Succ1 = *BB->succ_begin(); @@ -1534,10 +1566,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. - WorkList.erase(remove_if(WorkList, - [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + WorkList.erase(llvm::remove_if(WorkList, + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); if (WorkList.empty()) @@ -1659,7 +1691,7 @@ void MachineBlockPlacement::buildChain( const MachineBasicBlock *LoopHeaderBB = HeadBB; markChainSuccessors(Chain, LoopHeaderBB, BlockFilter); MachineBasicBlock *BB = *std::prev(Chain.end()); - for (;;) { + while (true) { assert(BB && "null block found at end of chain in loop."); assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop."); assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain."); @@ -1737,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return L.getHeader(); // Check that the header hasn't been fused with a preheader block due to @@ -1945,7 +1977,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, } } - BlockChain::iterator ExitIt = find(LoopChain, ExitingBB); + BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -1999,7 +2031,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { auto HeaderBB = L.getHeader(); - auto HeaderIter = find(LoopChain, HeaderBB); + auto HeaderIter = llvm::find(LoopChain, HeaderBB); auto RotationPos = LoopChain.end(); BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2146,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { // will be merged into the first outer loop chain for which this block is not // cold anymore. This needs precise profile data and we only do this when // profile data is available. - if (F->getFunction()->getEntryCount()) { + if (F->getFunction().getEntryCount() || ForceLoopColdBlock) { BlockFrequency LoopFreq(0); for (auto LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) @@ -2188,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // for better layout. bool RotateLoopWithProfile = ForcePreciseRotationCost || - (PreciseRotationCost && F->getFunction()->getEntryCount()); + (PreciseRotationCost && F->getFunction().getEntryCount()); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the @@ -2201,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. + // + // Loops are processed innermost to uttermost, make sure we clear + // PreferredLoopExit before processing a new loop. + PreferredLoopExit = nullptr; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); @@ -2272,7 +2308,7 @@ void MachineBlockPlacement::buildCFGChains() { new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve // the exact fallthrough behavior for. - for (;;) { + while (true) { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) @@ -2313,7 +2349,7 @@ void MachineBlockPlacement::buildCFGChains() { buildChain(&F->front(), FunctionChain); #ifndef NDEBUG - typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; + using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>; #endif DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -2449,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) @@ -2545,8 +2581,8 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( // duplicated from here on are already scheduled. // Note that DuplicatedToLPred always implies Removed. while (DuplicatedToLPred) { - assert (Removed && "Block must have been removed to be duplicated into its " - "layout predecessor."); + assert(Removed && "Block must have been removed to be duplicated into its " + "layout predecessor."); MachineBasicBlock *DupBB, *DupPred; // The removal callback causes Chain.end() to be updated when a block is // removed. On the first pass through the loop, the chain end should be the @@ -2629,8 +2665,10 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( if (RemBB->isEHPad()) RemoveList = EHPadWorkList; RemoveList.erase( - remove_if(RemoveList, - [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}), + llvm::remove_if(RemoveList, + [RemBB](MachineBasicBlock *BB) { + return BB == RemBB; + }), RemoveList.end()); } @@ -2648,7 +2686,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( << getBlockName(RemBB) << "\n"); }; auto RemovalCallbackRef = - llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback); + function_ref<void(MachineBasicBlock*)>(RemovalCallback); SmallVector<MachineBasicBlock *, 8> DuplicatedPreds; bool IsSimple = TailDup.isSimpleBB(BB); @@ -2677,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Check for single-block functions and skip them. @@ -2722,9 +2760,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (TailDupPlacement) { MPDT = &getAnalysis<MachinePostDominatorTree>(); - if (MF.getFunction()->optForSize()) + if (MF.getFunction().optForSize()) TailDupSize = 1; - TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); + bool PreRegAlloc = false; + TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } @@ -2778,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - F->getFunction()->getName().equals(ViewBlockFreqFuncName))) { + F->getFunction().getName().equals(ViewBlockFreqFuncName))) { MBFI->view("MBP." + MF.getName(), false); } @@ -2789,6 +2828,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } namespace { + /// \brief A pass to compute block placement statistics. /// /// A separate pass to compute interesting statistics for evaluating block @@ -2804,6 +2844,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } @@ -2817,10 +2858,13 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char MachineBlockPlacementStats::ID = 0; + char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; + INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", "Basic Block Placement Stats", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) |