diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp')
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 192 |
1 files changed, 143 insertions, 49 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index d0afe2959b39..2b7b859891dc 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -23,7 +23,6 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -38,7 +37,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -97,6 +95,11 @@ extern cl::opt<std::string> MemOPSizeRange; // This option sets the value that groups large memop sizes extern cl::opt<unsigned> MemOPSizeLarge; +cl::opt<bool> + MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), + cl::Hidden, + cl::desc("Size-specialize memcmp and bcmp calls")); + namespace { class PGOMemOPSizeOptLegacyPass : public FunctionPass { public: @@ -115,6 +118,7 @@ private: AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } }; } // end anonymous namespace @@ -124,6 +128,7 @@ INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) @@ -133,11 +138,90 @@ FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { } namespace { + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp). +struct MemOp { + Instruction *I; + MemOp(MemIntrinsic *MI) : I(MI) {} + MemOp(CallInst *CI) : I(CI) {} + MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); } + CallInst *asCI() { return cast<CallInst>(I); } + MemOp clone() { + if (auto MI = asMI()) + return MemOp(cast<MemIntrinsic>(MI->clone())); + return MemOp(cast<CallInst>(asCI()->clone())); + } + Value *getLength() { + if (auto MI = asMI()) + return MI->getLength(); + return asCI()->getArgOperand(2); + } + void setLength(Value *Length) { + if (auto MI = asMI()) + return MI->setLength(Length); + asCI()->setArgOperand(2, Length); + } + StringRef getFuncName() { + if (auto MI = asMI()) + return MI->getCalledFunction()->getName(); + return asCI()->getCalledFunction()->getName(); + } + bool isMemmove() { + if (auto MI = asMI()) + if (MI->getIntrinsicID() == Intrinsic::memmove) + return true; + return false; + } + bool isMemcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_memcmp) { + return true; + } + return false; + } + bool isBcmp(TargetLibraryInfo &TLI) { + LibFunc Func; + if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && + Func == LibFunc_bcmp) { + return true; + } + return false; + } + const char *getName(TargetLibraryInfo &TLI) { + if (auto MI = asMI()) + return getMIName(MI); + LibFunc Func; + if (TLI.getLibFunc(*asCI(), Func)) { + if (Func == LibFunc_memcmp) + return "memcmp"; + if (Func == LibFunc_bcmp) + return "bcmp"; + } + llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst"); + return nullptr; + } +}; + class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { public: MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, - OptimizationRemarkEmitter &ORE, DominatorTree *DT) - : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { + OptimizationRemarkEmitter &ORE, DominatorTree *DT, + TargetLibraryInfo &TLI) + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) { ValueDataArray = std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, @@ -149,13 +233,12 @@ public: WorkList.clear(); visit(Func); - for (auto &MI : WorkList) { + for (auto &MO : WorkList) { ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { + if (perform(MO)) { Changed = true; ++NumOfPGOMemOPOpt; - LLVM_DEBUG(dbgs() << "MemOP call: " - << MI->getCalledFunction()->getName() + LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName() << "is Transformed.\n"); } } @@ -166,7 +249,16 @@ public: // Not perform on constant length calls. if (dyn_cast<ConstantInt>(Length)) return; - WorkList.push_back(&MI); + WorkList.push_back(MemOp(&MI)); + } + + void visitCallInst(CallInst &CI) { + LibFunc Func; + if (TLI.getLibFunc(CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && + !dyn_cast<ConstantInt>(CI.getArgOperand(2))) { + WorkList.push_back(MemOp(&CI)); + } } private: @@ -174,15 +266,16 @@ private: BlockFrequencyInfo &BFI; OptimizationRemarkEmitter &ORE; DominatorTree *DT; + TargetLibraryInfo &TLI; bool Changed; - std::vector<MemIntrinsic *> WorkList; + std::vector<MemOp> WorkList; // Start of the previse range. int64_t PreciseRangeStart; // Last value of the previse range. int64_t PreciseRangeLast; // The space to read the profile annotation. std::unique_ptr<InstrProfValueData[]> ValueDataArray; - bool perform(MemIntrinsic *MI); + bool perform(MemOp MO); // This kind shows which group the value falls in. For PreciseValue, we have // the profile count for that value. LargeGroup groups the values that are in @@ -198,19 +291,6 @@ private: } }; -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - static bool isProfitable(uint64_t Count, uint64_t TotalCount) { assert(Count <= TotalCount); if (Count < MemOPCountThreshold) @@ -229,21 +309,23 @@ static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, return ScaleCount / Denom; } -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) +bool MemOPSizeOpt::perform(MemOp MO) { + assert(MO.I); + if (MO.isMemmove()) + return false; + if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI))) return false; uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions, ValueDataArray.get(), NumVals, TotalCount)) return false; uint64_t ActualCount = TotalCount; uint64_t SavedTotalCount = TotalCount; if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent()); if (!BBEdgeCount) return false; ActualCount = *BBEdgeCount; @@ -335,13 +417,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { // } // merge_bb: - BasicBlock *BB = MI->getParent(); + BasicBlock *BB = MO.I->getParent(); LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); LLVM_DEBUG(dbgs() << *BB << "\n"); auto OrigBBFreq = BFI.getBlockFreq(BB); - BasicBlock *DefaultBB = SplitBlock(BB, MI, DT); - BasicBlock::iterator It(*MI); + BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT); + BasicBlock::iterator It(*MO.I); ++It; assert(It != DefaultBB->end()); BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); @@ -353,15 +435,24 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); + Value *SizeVar = MO.getLength(); SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + Type *MemOpTy = MO.I->getType(); + PHINode *PHI = nullptr; + if (!MemOpTy->isVoidTy()) { + // Insert a phi for the return values at the merge block. + IRBuilder<> IRBM(MergeBB->getFirstNonPHI()); + PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge"); + MO.I->replaceAllUsesWith(PHI); + PHI->addIncoming(MO.I, DefaultBB); + } // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); + MO.I->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (SavedRemainCount > 0 || Version != NumVals) // Otherwise we need update with the un-promoted records back. - annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version), SavedRemainCount, IPVK_MemOPSize, NumVals); LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); @@ -373,17 +464,18 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { for (uint64_t SizeId : SizeIds) { BasicBlock *CaseBB = BasicBlock::Create( Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); + MemOp NewMO = MO.clone(); // Fix the argument. - auto *MemI = cast<MemIntrinsic>(NewInst); - auto *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType()); + auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType()); assert(SizeType && "Expected integer type size argument."); ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); - MemI->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); + NewMO.setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewMO.I); IRBuilder<> IRBCase(CaseBB); IRBCase.CreateBr(MergeBB); SI->addCase(CaseSizeId, CaseBB); + if (!MemOpTy->isVoidTy()) + PHI->addIncoming(NewMO.I, CaseBB); if (DT) { Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); Updates.push_back({DominatorTree::Insert, BB, CaseBB}); @@ -401,11 +493,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { ORE.emit([&]() { using namespace ore; - return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) - << "optimized " << NV("Intrinsic", StringRef(getMIName(MI))) - << " with count " << NV("Count", SumForOpt) << " out of " - << NV("Total", TotalCount) << " for " << NV("Versions", Version) - << " versions"; + return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I) + << "optimized " << NV("Memop", MO.getName(TLI)) << " with count " + << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount) + << " for " << NV("Versions", Version) << " versions"; }); return true; @@ -414,13 +505,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, - DominatorTree *DT) { + DominatorTree *DT, TargetLibraryInfo &TLI) { if (DisableMemOPOPT) return false; if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT); + MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI); MemOPSizeOpt.perform(); return MemOPSizeOpt.isChanged(); } @@ -431,7 +522,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - return PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); } namespace llvm { @@ -442,7 +535,8 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT); + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); if (!Changed) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); |