diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/MachineBlockPlacement.cpp | 84 | 
1 files changed, 76 insertions, 8 deletions
| diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index ac19bc0bd8ea..30b98ec88c24 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,6 +33,7 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"  #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -41,6 +42,7 @@  #include "llvm/CodeGen/MachineLoopInfo.h"  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineSizeOpts.h"  #include "llvm/CodeGen/TailDuplicator.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -48,6 +50,7 @@  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h"  #include "llvm/Pass.h"  #include "llvm/Support/Allocator.h"  #include "llvm/Support/BlockFrequency.h" @@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {    /// A handle to the post dominator tree.    MachinePostDominatorTree *MPDT; +  ProfileSummaryInfo *PSI; +    /// Duplicator used to duplicate tails during placement.    ///    /// Placement decisions can open up new tail duplication opportunities, but @@ -537,6 +542,7 @@ public:      if (TailDupPlacement)        AU.addRequired<MachinePostDominatorTree>();      AU.addRequired<MachineLoopInfo>(); +    AU.addRequired<ProfileSummaryInfoWrapperPass>();      AU.addRequired<TargetPassConfig>();      MachineFunctionPass::getAnalysisUsage(AU);    } @@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)  INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)  INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)  INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,                      "Branch Probability Basic Block Placement", false, false) @@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(    if (!shouldTailDuplicate(Succ))      return false; +  // The result of canTailDuplicate. +  bool Duplicate = true; +  // Number of possible duplication. +  unsigned int NumDup = 0; +    // For CFG checking.    SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),                                                         BB->succ_end()); @@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(          // to trellises created by tail-duplication, so we just look for the          // CFG.          continue; -      return false; +      Duplicate = false; +      continue;      } +    NumDup++;    } + +  // No possible duplication in current filter set. +  if (NumDup == 0) +    return false; + +  // This is mainly for function exit BB. +  // The integrated tail duplication is really designed for increasing +  // fallthrough from predecessors from Succ to its successors. We may need +  // other machanism to handle different cases. +  if (Succ->succ_size() == 0) +    return true; + +  // Plus the already placed predecessor. +  NumDup++; + +  // If the duplication candidate has more unplaced predecessors than +  // successors, the extra duplication can't bring more fallthrough. +  // +  //     Pred1 Pred2 Pred3 +  //         \   |   / +  //          \  |  / +  //           \ | / +  //            Dup +  //            / \ +  //           /   \ +  //       Succ1  Succ2 +  // +  // In this example Dup has 2 successors and 3 predecessors, duplication of Dup +  // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2, +  // but the duplication into Pred3 can't increase fallthrough. +  // +  // A small number of extra duplication may not hurt too much. We need a better +  // heuristic to handle it. +  // +  // FIXME: we should selectively tail duplicate a BB into part of its +  // predecessors. +  if ((NumDup > Succ->succ_size()) || !Duplicate) +    return false; +    return true;  } @@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(    bool BadCFGConflict = false;    for (MachineBasicBlock *Pred : Succ->predecessors()) { -    if (Pred == Succ || BlockToChain[Pred] == &SuccChain || +    BlockChain *PredChain = BlockToChain[Pred]; +    if (Pred == Succ || PredChain == &SuccChain ||          (BlockFilter && !BlockFilter->count(Pred)) || -        BlockToChain[Pred] == &Chain || +        PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||          // This check is redundant except for look ahead. This function is          // called for lookahead by isProfitableToTailDup when BB hasn't been          // placed yet. @@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain(      MachineBasicBlock* BestSucc = Result.BB;      bool ShouldTailDup = Result.ShouldTailDup;      if (allowTailDupPlacement()) -      ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); +      ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc, +                                                                  Chain, +                                                                  BlockFilter));      // If an immediate successor isn't available, look for the best viable      // block among those we've identified as not violating the loop's CFG at @@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,    // i.e. when the layout predecessor does not fallthrough to the loop header.    // In practice this never happens though: there always seems to be a preheader    // that can fallthrough and that is also placed before the header. -  if (F->getFunction().hasOptSize()) +  bool OptForSize = F->getFunction().hasOptSize() || +                    llvm::shouldOptimizeForSize(L.getHeader(), PSI, +                                                &MBFI->getMBFI()); +  if (OptForSize)      return L.getHeader();    MachineBasicBlock *OldTop = nullptr; @@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {      if (Freq < (LoopHeaderFreq * ColdProb))        continue; +    // If the global profiles indicates so, don't align it. +    if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && +        !TLI->alignLoopsWithOptSize()) +      continue; +      // Check for the existence of a non-layout predecessor which would benefit      // from aligning this block.      MachineBasicBlock *LayoutPred = @@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {    TII = MF.getSubtarget().getInstrInfo();    TLI = MF.getSubtarget().getTargetLowering();    MPDT = nullptr; +  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();    // Initialize PreferredLoopExit to nullptr here since it may never be set if    // there are no MachineLoops. @@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {    if (allowTailDupPlacement()) {      MPDT = &getAnalysis<MachinePostDominatorTree>(); -    if (MF.getFunction().hasOptSize()) +    bool OptForSize = MF.getFunction().hasOptSize() || +                      llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); +    if (OptForSize)        TailDupSize = 1;      bool PreRegAlloc = false; -    TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); +    TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, +                   /* LayoutMode */ true, TailDupSize);      precomputeTriangleChains();    } @@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {    if (MF.size() > 3 && EnableTailMerge) {      unsigned TailMergeSize = TailDupSize + 1;      BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, -                    *MBPI, TailMergeSize); +                    *MBPI, PSI, TailMergeSize);      auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();      if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), | 
