diff options
Diffstat (limited to 'lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp')
| -rw-r--r-- | lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 74 |
1 files changed, 48 insertions, 26 deletions
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 95eb3680403a..2c71e75dadcc 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -44,7 +46,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <cassert> #include <cstdint> @@ -112,6 +114,7 @@ private: AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } }; } // end anonymous namespace @@ -133,8 +136,8 @@ namespace { class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) - : Func(Func), BFI(BFI), ORE(ORE), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -151,8 +154,9 @@ public: if (perform(MI)) { Changed = true; ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() - << "is Transformed.\n"); + LLVM_DEBUG(dbgs() << "MemOP call: " + << MI->getCalledFunction()->getName() + << "is Transformed.\n"); } } } @@ -169,6 +173,7 @@ private: Function &Func; BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; + DominatorTree *DT; bool Changed; std::vector<MemIntrinsic *> WorkList; // Start of the previse range. @@ -245,9 +250,9 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { } ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); - DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount - << "\n"); - DEBUG( + LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count " + << ActualCount << "\n"); + LLVM_DEBUG( for (auto &VD : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); @@ -260,8 +265,8 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { TotalCount = ActualCount; if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount - << " denominator = " << SavedTotalCount << "\n"); + LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); // Keeping track of the count of the default case: uint64_t RemainCount = TotalCount; @@ -310,9 +315,9 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { uint64_t SumForOpt = TotalCount - RemainCount; - DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions (covering " << SumForOpt << " out of " - << TotalCount << ")\n"); + LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); // mem_op(..., size) // ==> @@ -331,19 +336,20 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { // merge_bb: BasicBlock *BB = MI->getParent(); - DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); - DEBUG(dbgs() << *BB << "\n"); + LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); MergeBB->setName("MemOP.Merge"); BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); DefaultBB->setName("MemOP.Default"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); @@ -358,7 +364,11 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), SavedRemainCount, IPVK_MemOPSize, NumVals); - DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + std::vector<DominatorTree::UpdateType> Updates; + if (DT) + Updates.reserve(2 * SizeIds.size()); for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( @@ -374,13 +384,20 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); - DEBUG(dbgs() << *CaseBB << "\n"); + if (DT) { + Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); + Updates.push_back({DominatorTree::Insert, BB, CaseBB}); + } + LLVM_DEBUG(dbgs() << *CaseBB << "\n"); } + DTU.applyUpdates(Updates); + Updates.clear(); + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); - DEBUG(dbgs() << *BB << "\n"); - DEBUG(dbgs() << *DefaultBB << "\n"); - DEBUG(dbgs() << *MergeBB << "\n"); + LLVM_DEBUG(dbgs() << *BB << "\n"); + LLVM_DEBUG(dbgs() << *DefaultBB << "\n"); + LLVM_DEBUG(dbgs() << *MergeBB << "\n"); ORE.emit([&]() { using namespace ore; @@ -396,13 +413,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { } // namespace static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, + DominatorTree *DT) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -411,7 +429,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { BlockFrequencyInfo &BFI = getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - return PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); } namespace llvm { @@ -421,11 +441,13 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, FunctionAnalysisManager &FAM) { auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); return PA; } } // namespace llvm |
