diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 | 
| commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
| tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Transforms/Utils/InlineFunction.cpp | |
| parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) | |
Notes
Diffstat (limited to 'lib/Transforms/Utils/InlineFunction.cpp')
| -rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 152 | 
1 files changed, 124 insertions, 28 deletions
| diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index a40079ca8e76..5d6fbc3325ff 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -20,10 +20,12 @@  #include "llvm/ADT/StringExtras.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/CallGraph.h"  #include "llvm/Analysis/CaptureTracking.h"  #include "llvm/Analysis/EHPersonalities.h"  #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/CallSite.h" @@ -40,8 +42,8 @@  #include "llvm/IR/Intrinsics.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Local.h"  #include <algorithm>  using namespace llvm; @@ -1107,26 +1109,23 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {    bool DTCalculated = false;    Function *CalledFunc = CS.getCalledFunction(); -  for (Function::arg_iterator I = CalledFunc->arg_begin(), -                              E = CalledFunc->arg_end(); -       I != E; ++I) { -    unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; -    if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { +  for (Argument &Arg : CalledFunc->args()) { +    unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; +    if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {        if (!DTCalculated) { -        DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent() -                                               ->getParent())); +        DT.recalculate(*CS.getCaller());          DTCalculated = true;        }        // If we can already prove the asserted alignment in the context of the        // caller, then don't bother inserting the assumption. -      Value *Arg = CS.getArgument(I->getArgNo()); -      if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align) +      Value *ArgVal = CS.getArgument(Arg.getArgNo()); +      if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)          continue; -      CallInst *NewAssumption = IRBuilder<>(CS.getInstruction()) -                                    .CreateAlignmentAssumption(DL, Arg, Align); -      AC->registerAssumption(NewAssumption); +      CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) +                              .CreateAlignmentAssumption(DL, ArgVal, Align); +      AC->registerAssumption(NewAsmp);      }    }  } @@ -1140,7 +1139,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,                                           ValueToValueMapTy &VMap,                                           InlineFunctionInfo &IFI) {    CallGraph &CG = *IFI.CG; -  const Function *Caller = CS.getInstruction()->getParent()->getParent(); +  const Function *Caller = CS.getCaller();    const Function *Callee = CS.getCalledFunction();    CallGraphNode *CalleeNode = CG[Callee];    CallGraphNode *CallerNode = CG[Caller]; @@ -1225,7 +1224,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,    PointerType *ArgTy = cast<PointerType>(Arg->getType());    Type *AggTy = ArgTy->getElementType(); -  Function *Caller = TheCall->getParent()->getParent(); +  Function *Caller = TheCall->getFunction(); +  const DataLayout &DL = Caller->getParent()->getDataLayout();    // If the called function is readonly, then it could not mutate the caller's    // copy of the byval'd memory.  In this case, it is safe to elide the copy and @@ -1239,31 +1239,30 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,      AssumptionCache *AC =          IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; -    const DataLayout &DL = Caller->getParent()->getDataLayout();      // If the pointer is already known to be sufficiently aligned, or if we can      // round it up to a larger alignment, then we don't need a temporary.      if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=          ByValAlignment)        return Arg; -     +      // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad      // for code quality, but rarely happens and is required for correctness.    }    // Create the alloca.  If we have DataLayout, use nice alignment. -  unsigned Align = -      Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); +  unsigned Align = DL.getPrefTypeAlignment(AggTy);    // If the byval had an alignment specified, we *must* use at least that    // alignment, as it is required by the byval argument (and uses of the    // pointer inside the callee).    Align = std::max(Align, ByValAlignment); -   -  Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),  + +  Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(), +                                    nullptr, Align, Arg->getName(),                                      &*Caller->begin()->begin());    IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); -   +    // Uses of the argument in the function should use our new alloca    // instead.    return NewAlloca; @@ -1393,6 +1392,89 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,      }    }  } +/// Update the block frequencies of the caller after a callee has been inlined. +/// +/// Each block cloned into the caller has its block frequency scaled by the +/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of +/// callee's entry block gets the same frequency as the callsite block and the +/// relative frequencies of all cloned blocks remain the same after cloning. +static void updateCallerBFI(BasicBlock *CallSiteBlock, +                            const ValueToValueMapTy &VMap, +                            BlockFrequencyInfo *CallerBFI, +                            BlockFrequencyInfo *CalleeBFI, +                            const BasicBlock &CalleeEntryBlock) { +  SmallPtrSet<BasicBlock *, 16> ClonedBBs; +  for (auto const &Entry : VMap) { +    if (!isa<BasicBlock>(Entry.first) || !Entry.second) +      continue; +    auto *OrigBB = cast<BasicBlock>(Entry.first); +    auto *ClonedBB = cast<BasicBlock>(Entry.second); +    uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); +    if (!ClonedBBs.insert(ClonedBB).second) { +      // Multiple blocks in the callee might get mapped to one cloned block in +      // the caller since we prune the callee as we clone it. When that happens, +      // we want to use the maximum among the original blocks' frequencies. +      uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); +      if (NewFreq > Freq) +        Freq = NewFreq; +    } +    CallerBFI->setBlockFreq(ClonedBB, Freq); +  } +  BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); +  CallerBFI->setBlockFreqAndScale( +      EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), +      ClonedBBs); +} + +/// Update the branch metadata for cloned call instructions. +static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, +                              const Optional<uint64_t> &CalleeEntryCount, +                              const Instruction *TheCall) { +  if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) +    return; +  Optional<uint64_t> CallSiteCount = +      ProfileSummaryInfo::getProfileCount(TheCall, nullptr); +  uint64_t CallCount = +      std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, +               CalleeEntryCount.getValue()); + +  for (auto const &Entry : VMap) +    if (isa<CallInst>(Entry.first)) +      if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) +        CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); +  for (BasicBlock &BB : *Callee) +    // No need to update the callsite if it is pruned during inlining. +    if (VMap.count(&BB)) +      for (Instruction &I : BB) +        if (CallInst *CI = dyn_cast<CallInst>(&I)) +          CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, +                               CalleeEntryCount.getValue()); +} + +/// Update the entry count of callee after inlining. +/// +/// The callsite's block count is subtracted from the callee's function entry +/// count. +static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, +                              Instruction *CallInst, Function *Callee) { +  // If the callee has a original count of N, and the estimated count of +  // callsite is M, the new callee count is set to N - M. M is estimated from +  // the caller's entry count, its entry block frequency and the block frequency +  // of the callsite. +  Optional<uint64_t> CalleeCount = Callee->getEntryCount(); +  if (!CalleeCount.hasValue()) +    return; +  Optional<uint64_t> CallCount = +      ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI); +  if (!CallCount.hasValue()) +    return; +  // Since CallSiteCount is an estimate, it could exceed the original callee +  // count and has to be set to 0. +  if (CallCount.getValue() > CalleeCount.getValue()) +    Callee->setEntryCount(0); +  else +    Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); +}  /// This function inlines the called function into the basic block of the  /// caller. This returns false if it is not possible to inline this call. @@ -1405,13 +1487,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,  bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,                            AAResults *CalleeAAR, bool InsertLifetime) {    Instruction *TheCall = CS.getInstruction(); -  assert(TheCall->getParent() && TheCall->getParent()->getParent() && -         "Instruction not in function!"); +  assert(TheCall->getParent() && TheCall->getFunction() +         && "Instruction not in function!");    // If IFI has any state in it, zap it before we fill it in.    IFI.reset(); -   -  const Function *CalledFunc = CS.getCalledFunction(); + +  Function *CalledFunc = CS.getCalledFunction();    if (!CalledFunc ||              // Can't inline external function or indirect        CalledFunc->isDeclaration() || // call, or call to a vararg function!        CalledFunc->getFunctionType()->isVarArg()) return false; @@ -1548,7 +1630,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      // matches up the formal to the actual argument values.      CallSite::arg_iterator AI = CS.arg_begin();      unsigned ArgNo = 0; -    for (Function::const_arg_iterator I = CalledFunc->arg_begin(), +    for (Function::arg_iterator I = CalledFunc->arg_begin(),           E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {        Value *ActualArg = *AI; @@ -1578,10 +1660,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,                                /*ModuleLevelChanges=*/false, Returns, ".i",                                &InlinedFunctionInfo, TheCall); -      // Remember the first block that is newly cloned over.      FirstNewBlock = LastBlock; ++FirstNewBlock; +    if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) +      // Update the BFI of blocks cloned into the caller. +      updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, +                      CalledFunc->front()); + +    updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall); +    // Update the profile count of callee. +    updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc); +      // Inject byval arguments initialization.      for (std::pair<Value*, Value*> &Init : ByValInit)        HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), @@ -2087,6 +2177,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,                                            CalledFunc->getName() + ".exit");    } +  if (IFI.CallerBFI) { +    // Copy original BB's block frequency to AfterCallBB +    IFI.CallerBFI->setBlockFreq( +        AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); +  } +    // Change the branch that used to go to AfterCallBB to branch to the first    // basic block of the inlined function.    // | 
