diff options
Diffstat (limited to 'lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 152 |
1 files changed, 124 insertions, 28 deletions
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index a40079ca8e76..5d6fbc3325ff 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -20,10 +20,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" @@ -40,8 +42,8 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -1107,26 +1109,23 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { bool DTCalculated = false; Function *CalledFunc = CS.getCalledFunction(); - for (Function::arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); - I != E; ++I) { - unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; - if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + for (Argument &Arg : CalledFunc->args()) { + unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; + if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { - DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent() - ->getParent())); + DT.recalculate(*CS.getCaller()); DTCalculated = true; } // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. - Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align) + Value *ArgVal = CS.getArgument(Arg.getArgNo()); + if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) continue; - CallInst *NewAssumption = IRBuilder<>(CS.getInstruction()) - .CreateAlignmentAssumption(DL, Arg, Align); - AC->registerAssumption(NewAssumption); + CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, ArgVal, Align); + AC->registerAssumption(NewAsmp); } } } @@ -1140,7 +1139,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, ValueToValueMapTy &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; - const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Caller = CS.getCaller(); const Function *Callee = CS.getCalledFunction(); CallGraphNode *CalleeNode = CG[Callee]; CallGraphNode *CallerNode = CG[Caller]; @@ -1225,7 +1224,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, PointerType *ArgTy = cast<PointerType>(Arg->getType()); Type *AggTy = ArgTy->getElementType(); - Function *Caller = TheCall->getParent()->getParent(); + Function *Caller = TheCall->getFunction(); + const DataLayout &DL = Caller->getParent()->getDataLayout(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -1239,31 +1239,30 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; - const DataLayout &DL = Caller->getParent()->getDataLayout(); // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= ByValAlignment) return Arg; - + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = - Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + unsigned Align = DL.getPrefTypeAlignment(AggTy); // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). Align = std::max(Align, ByValAlignment); - - Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), + + Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(), + nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); - + // Uses of the argument in the function should use our new alloca // instead. return NewAlloca; @@ -1393,6 +1392,89 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } } +/// Update the block frequencies of the caller after a callee has been inlined. +/// +/// Each block cloned into the caller has its block frequency scaled by the +/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of +/// callee's entry block gets the same frequency as the callsite block and the +/// relative frequencies of all cloned blocks remain the same after cloning. +static void updateCallerBFI(BasicBlock *CallSiteBlock, + const ValueToValueMapTy &VMap, + BlockFrequencyInfo *CallerBFI, + BlockFrequencyInfo *CalleeBFI, + const BasicBlock &CalleeEntryBlock) { + SmallPtrSet<BasicBlock *, 16> ClonedBBs; + for (auto const &Entry : VMap) { + if (!isa<BasicBlock>(Entry.first) || !Entry.second) + continue; + auto *OrigBB = cast<BasicBlock>(Entry.first); + auto *ClonedBB = cast<BasicBlock>(Entry.second); + uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); + if (!ClonedBBs.insert(ClonedBB).second) { + // Multiple blocks in the callee might get mapped to one cloned block in + // the caller since we prune the callee as we clone it. When that happens, + // we want to use the maximum among the original blocks' frequencies. + uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); + if (NewFreq > Freq) + Freq = NewFreq; + } + CallerBFI->setBlockFreq(ClonedBB, Freq); + } + BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); + CallerBFI->setBlockFreqAndScale( + EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), + ClonedBBs); +} + +/// Update the branch metadata for cloned call instructions. +static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, + const Optional<uint64_t> &CalleeEntryCount, + const Instruction *TheCall) { + if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) + return; + Optional<uint64_t> CallSiteCount = + ProfileSummaryInfo::getProfileCount(TheCall, nullptr); + uint64_t CallCount = + std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, + CalleeEntryCount.getValue()); + + for (auto const &Entry : VMap) + if (isa<CallInst>(Entry.first)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (VMap.count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) + CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, + CalleeEntryCount.getValue()); +} + +/// Update the entry count of callee after inlining. +/// +/// The callsite's block count is subtracted from the callee's function entry +/// count. +static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, + Instruction *CallInst, Function *Callee) { + // If the callee has a original count of N, and the estimated count of + // callsite is M, the new callee count is set to N - M. M is estimated from + // the caller's entry count, its entry block frequency and the block frequency + // of the callsite. + Optional<uint64_t> CalleeCount = Callee->getEntryCount(); + if (!CalleeCount.hasValue()) + return; + Optional<uint64_t> CallCount = + ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI); + if (!CallCount.hasValue()) + return; + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0. + if (CallCount.getValue() > CalleeCount.getValue()) + Callee->setEntryCount(0); + else + Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); +} /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. @@ -1405,13 +1487,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); - assert(TheCall->getParent() && TheCall->getParent()->getParent() && - "Instruction not in function!"); + assert(TheCall->getParent() && TheCall->getFunction() + && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - - const Function *CalledFunc = CS.getCalledFunction(); + + Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; @@ -1548,7 +1630,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; - for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + for (Function::arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; @@ -1578,10 +1660,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, TheCall); - // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; + if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) + // Update the BFI of blocks cloned into the caller. + updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, + CalledFunc->front()); + + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall); + // Update the profile count of callee. + updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc); + // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), @@ -2087,6 +2177,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->getName() + ".exit"); } + if (IFI.CallerBFI) { + // Copy original BB's block frequency to AfterCallBB + IFI.CallerBFI->setBlockFreq( + AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); + } + // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // |