summaryrefslogtreecommitdiff
path: root/lib/Analysis/InlineCost.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis/InlineCost.cpp')
-rw-r--r--lib/Analysis/InlineCost.cpp312
1 files changed, 220 insertions, 92 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a86a703ed9d6d..dcb724abc02d0 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
@@ -39,6 +40,32 @@ using namespace llvm;
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+// Threshold to use when optsize is specified (and there is no
+// -inline-threshold).
+const int OptSizeThreshold = 75;
+
+// Threshold to use when -Oz is specified (and there is no -inline-threshold).
+const int OptMinSizeThreshold = 25;
+
+// Threshold to use when -O[34] is specified (and there is no
+// -inline-threshold).
+const int OptAggressiveThreshold = 275;
+
+static cl::opt<int> DefaultInlineThreshold(
+ "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int> HintThreshold(
+ "inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
+
+// We introduce this threshold to help performance of instrumentation based
+// PGO before we actually hook up inliner with analysis passes such as BPI and
+// BFI.
+static cl::opt<int> ColdThreshold(
+ "inlinecold-threshold", cl::Hidden, cl::init(225),
+ cl::desc("Threshold for inlining functions with cold attribute"));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -51,6 +78,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The cache of @llvm.assume intrinsics.
AssumptionCacheTracker *ACT;
+ /// Profile summary information.
+ ProfileSummaryInfo *PSI;
+
// The called function.
Function &F;
@@ -96,7 +126,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
DenseMap<Value *, int> SROAArgCosts;
// Keep track of values which map to a pointer base and constant offset.
- DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+ DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
@@ -117,19 +147,31 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// attributes since these can be more precise than the ones on the callee
/// itself.
bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
-
+
/// Return true if the given value is known non null within the callee if
/// inlined through this particular callsite.
bool isKnownNonNullInCallee(Value *V);
+ /// Update Threshold based on callsite properties such as callee
+ /// attributes and callee hotness for PGO builds. The Callee is explicitly
+ /// passed to support analyzing indirect calls whose target is inferred by
+ /// analysis.
+ void updateThreshold(CallSite CS, Function &Callee);
+
+ /// Return true if size growth is allowed when inlining the callee at CS.
+ bool allowSizeGrowth(CallSite CS);
+
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
- void visit(Module *); void visit(Module &);
- void visit(Function *); void visit(Function &);
- void visit(BasicBlock *); void visit(BasicBlock &);
+ void visit(Module *);
+ void visit(Module &);
+ void visit(Function *);
+ void visit(Function &);
+ void visit(BasicBlock *);
+ void visit(BasicBlock &);
// Provide base case for our instruction visit.
bool visitInstruction(Instruction &I);
@@ -162,17 +204,19 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
- Function &Callee, int Threshold, CallSite CSArg)
- : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
- Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
+ CallSite CSArg)
+ : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
+ Threshold(Threshold), Cost(0), IsCallerRecursive(false),
+ IsRecursiveCall(false), ExposesReturnsTwice(false),
+ HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
+ HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -272,7 +316,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
OpC = dyn_cast<ConstantInt>(SimpleOp);
if (!OpC)
return false;
- if (OpC->isZero()) continue;
+ if (OpC->isZero())
+ continue;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
@@ -290,13 +335,14 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Check whether inlining will turn a dynamic alloca into a static
- // alloca, and handle that case.
+ // alloca and handle that case.
if (I.isArrayAllocation()) {
- if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
- ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
- assert(AllocSize && "Allocation size not a constant int?");
+ Constant *Size = SimplifiedValues.lookup(I.getArraySize());
+ if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
- AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
+ AllocatedSize = SaturatingMultiplyAdd(
+ AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
return Base::visitAlloca(I);
}
}
@@ -305,7 +351,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (I.isStaticAlloca()) {
const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
- AllocatedSize += DL.getTypeAllocSize(Ty);
+ AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
}
// We will happily inline static alloca instructions.
@@ -336,8 +382,8 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
- bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
- SROAArg, CostIt);
+ bool SROACandidate =
+ lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
// Try to fold GEPs of constant-offset call site argument pointers. This
// requires target data and inbounds GEPs.
@@ -393,8 +439,8 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
}
// Track base/offsets through casts
- std::pair<Value *, APInt> BaseAndOffset
- = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ std::pair<Value *, APInt> BaseAndOffset =
+ ConstantOffsetPtrs.lookup(I.getOperand(0));
// Casts don't change the offset, just wrap it up.
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -425,8 +471,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
const DataLayout &DL = F.getParent()->getDataLayout();
if (IntegerSize >= DL.getPointerSizeInBits()) {
- std::pair<Value *, APInt> BaseAndOffset
- = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ std::pair<Value *, APInt> BaseAndOffset =
+ ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
}
@@ -501,8 +547,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
COp = SimplifiedValues.lookup(Operand);
if (COp) {
const DataLayout &DL = F.getParent()->getDataLayout();
- if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
- COp, DL)) {
+ if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
SimplifiedValues[&I] = C;
return true;
}
@@ -516,7 +561,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
unsigned ArgNo = A->getArgNo();
- return CandidateCS.paramHasAttr(ArgNo+1, Attr);
+ return CandidateCS.paramHasAttr(ArgNo + 1, Attr);
}
bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -528,7 +573,7 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
if (Argument *A = dyn_cast<Argument>(V))
if (paramHasAttr(A, Attribute::NonNull))
return true;
-
+
// Is this an alloca in the caller? This is distinct from the attribute case
// above because attributes aren't updated within the inliner itself and we
// always want to catch the alloca derived case.
@@ -537,10 +582,86 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
// alloca-derived value and null. Note that this fires regardless of
// SROA firing.
return true;
-
+
return false;
}
+bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
+ // If the normal destination of the invoke or the parent block of the call
+ // site is unreachable-terminated, there is little point in inlining this
+ // unless there is literally zero cost.
+ // FIXME: Note that it is possible that an unreachable-terminated block has a
+ // hot entry. For example, in below scenario inlining hot_call_X() may be
+ // beneficial :
+ // main() {
+ // hot_call_1();
+ // ...
+ // hot_call_N()
+ // exit(0);
+ // }
+ // For now, we are not handling this corner case here as it is rare in real
+ // code. In future, we should elaborate this based on BPI and BFI in more
+ // general threshold adjusting heuristics in updateThreshold().
+ Instruction *Instr = CS.getInstruction();
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
+ return false;
+ } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
+ return false;
+
+ return true;
+}
+
+void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
+ // If no size growth is allowed for this inlining, set Threshold to 0.
+ if (!allowSizeGrowth(CS)) {
+ Threshold = 0;
+ return;
+ }
+
+ Function *Caller = CS.getCaller();
+ if (DefaultInlineThreshold.getNumOccurrences() > 0) {
+ // Explicitly specified -inline-threhold overrides the threshold passed to
+ // CallAnalyzer's constructor.
+ Threshold = DefaultInlineThreshold;
+ } else {
+ // If -inline-threshold is not given, listen to the optsize and minsize
+ // attributes when they would decrease the threshold.
+ if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
+ Threshold = OptMinSizeThreshold;
+ else if (Caller->optForSize() && OptSizeThreshold < Threshold)
+ Threshold = OptSizeThreshold;
+ }
+
+ bool HotCallsite = false;
+ uint64_t TotalWeight;
+ if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+ PSI->isHotCount(TotalWeight))
+ HotCallsite = true;
+
+ // Listen to the inlinehint attribute or profile based hotness information
+ // when it would increase the threshold and the caller does not need to
+ // minimize its size.
+ bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
+ PSI->isHotFunction(&Callee) ||
+ HotCallsite;
+ if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
+ Threshold = HintThreshold;
+
+ bool ColdCallee = PSI->isColdFunction(&Callee);
+ // Command line argument for DefaultInlineThreshold will override the default
+ // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+ // do not use the default cold threshold even if it is smaller.
+ if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
+ ColdThreshold.getNumOccurrences() > 0) &&
+ ColdCallee && ColdThreshold < Threshold)
+ Threshold = ColdThreshold;
+
+ // Finally, take the target-specific inlining threshold multiplier into
+ // account.
+ Threshold *= TTI.getInliningThresholdMultiplier();
+}
+
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// First try to handle simplified comparisons.
@@ -552,7 +673,8 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
RHS = SimpleRHS;
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+ if (Constant *C =
+ ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
SimplifiedValues[&I] = C;
return true;
}
@@ -713,8 +835,8 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
if (!InsertedC)
InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
if (AggC && InsertedC) {
- SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
- I.getIndices());
+ SimplifiedValues[&I] =
+ ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices());
return true;
}
@@ -739,8 +861,8 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
// Try to re-map the arguments to constants.
SmallVector<Constant *, 4> ConstantArgs;
ConstantArgs.reserve(CS.arg_size());
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I) {
+ for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
+ ++I) {
Constant *C = dyn_cast<Constant>(*I);
if (!C)
C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
@@ -764,8 +886,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
ExposesReturnsTwice = true;
return false;
}
- if (CS.isCall() &&
- cast<CallInst>(CS.getInstruction())->cannotDuplicate())
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
ContainsNoDuplicateCall = true;
if (Function *F = CS.getCalledFunction()) {
@@ -780,6 +901,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
default:
return Base::visitCallSite(CS);
+ case Intrinsic::load_relative:
+ // This is normally lowered to 4 LLVM instructions.
+ Cost += 3 * InlineConstants::InstrCost;
+ return false;
+
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
@@ -831,7 +957,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
+ CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
+ CS);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -938,7 +1065,6 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
return false;
}
-
/// \brief Analyze a basic block for its contribution to the inline cost.
///
/// This method walks the analyzer over every instruction in the given basic
@@ -1044,7 +1170,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden())
+ if (GA->isInterposable())
break;
V = GA->getAliasee();
} else {
@@ -1079,6 +1205,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// nice to base the bonus values on something more scientific.
assert(NumInstructions == 0);
assert(NumVectorInstructions == 0);
+
+ // Update the threshold based on callsite properties
+ updateThreshold(CS, F);
+
FiftyPercentVectorBonus = 3 * Threshold / 2;
TenPercentVectorBonus = 3 * Threshold / 4;
const DataLayout &DL = F.getParent()->getDataLayout();
@@ -1124,22 +1254,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
- bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
- &F == CS.getCalledFunction();
+ bool OnlyOneCallAndLocalLinkage =
+ F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
if (OnlyOneCallAndLocalLinkage)
Cost += InlineConstants::LastCallToStaticBonus;
- // If the instruction after the call, or if the normal destination of the
- // invoke is an unreachable instruction, the function is noreturn. As such,
- // there is little point in inlining this unless there is literally zero
- // cost.
- Instruction *Instr = CS.getInstruction();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
- if (isa<UnreachableInst>(II->getNormalDest()->begin()))
- Threshold = 0;
- } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
- Threshold = 0;
-
// If this function uses the coldcc calling convention, prefer not to inline
// it.
if (F.getCallingConv() == CallingConv::Cold)
@@ -1193,7 +1312,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
+ CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
+ EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@@ -1203,7 +1323,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// accomplish this, prioritizing for small iterations because we exit after
// crossing our threshold, we use a small-size optimized SetVector.
typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
- SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+ SmallPtrSet<BasicBlock *, 16>>
+ BBSetVector;
BBSetVector BBWorklist;
BBWorklist.insert(&F.getEntryBlock());
// Note that we *must not* cache the size, this loop grows the worklist.
@@ -1228,20 +1349,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB, EphValues)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr || HasFrameEscape)
- return false;
-
- // If the caller is a recursive function then we don't want to inline
- // functions which allocate a lot of stack space because it would increase
- // the caller stack usage dramatically.
- if (IsCallerRecursive &&
- AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
- return false;
-
- break;
- }
+ if (!analyzeBlock(BB, EphValues))
+ return false;
TerminatorInst *TI = BB->getTerminator();
@@ -1250,16 +1359,16 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional()) {
Value *Cond = BI->getCondition();
- if (ConstantInt *SimpleCond
- = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ if (ConstantInt *SimpleCond =
+ dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
continue;
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *Cond = SI->getCondition();
- if (ConstantInt *SimpleCond
- = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+ if (ConstantInt *SimpleCond =
+ dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
continue;
}
@@ -1296,12 +1405,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
- return Cost <= std::max(0, Threshold);
+ return Cost < std::max(1, Threshold);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Dump stats about this call's analysis.
-void CallAnalyzer::dump() {
+LLVM_DUMP_METHOD void CallAnalyzer::dump() {
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
@@ -1321,7 +1430,7 @@ void CallAnalyzer::dump() {
/// \brief Test that two functions either have or have not the given attribute
/// at the same time.
-template<typename AttrKind>
+template <typename AttrKind>
static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
}
@@ -1335,15 +1444,33 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
+InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT) {
- return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
+ AssumptionCacheTracker *ACT,
+ ProfileSummaryInfo *PSI) {
+ return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
+ ACT, PSI);
+}
+
+int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ if (OptLevel > 2)
+ return OptAggressiveThreshold;
+ if (SizeOptLevel == 1) // -Os
+ return OptSizeThreshold;
+ if (SizeOptLevel == 2) // -Oz
+ return OptMinSizeThreshold;
+ return DefaultInlineThreshold;
}
-InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
+int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
+
+InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
+ int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT) {
+ AssumptionCacheTracker *ACT,
+ ProfileSummaryInfo *PSI) {
+
// Cannot inline indirect calls.
if (!Callee)
return llvm::InlineCost::getNever();
@@ -1365,17 +1492,18 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
return llvm::InlineCost::getNever();
- // Don't inline functions which can be redefined at link-time to mean
- // something else. Don't inline functions marked noinline or call sites
- // marked noinline.
- if (Callee->mayBeOverridden() ||
- Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
+ // Don't inline functions which can be interposed at link-time. Don't inline
+ // functions marked noinline or call sites marked noinline.
+ // Note: inlining non-exact non-interposable fucntions is fine, since we know
+ // we have *a* correct implementation of the source level function.
+ if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
+ CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
- << "...\n");
+ << "...\n");
- CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
+ CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());