aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUInline.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
commite6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/AMDGPU/AMDGPUInline.cpp
parent1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUInline.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUInline.cpp45
1 files changed, 31 insertions, 14 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp
index 945c9acd379a..f4df20b8f03e 100644
--- a/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,7 +39,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
@@ -50,6 +49,12 @@ static cl::opt<unsigned>
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
cl::desc("Maximum alloca size to use for inline cost"));
+// Inliner constraint to achieve reasonable compilation time
+static cl::opt<size_t>
+MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
+ cl::desc("Maximum BB number allowed in a function after inlining"
+ " (compile time constraint)"));
+
namespace {
class AMDGPUInliner : public LegacyInlinerBase {
@@ -112,7 +117,8 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
Callee->hasFnAttribute(Attribute::InlineHint);
if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
&& !Caller->hasFnAttribute(Attribute::MinSize))
- Thres = Params.HintThreshold.getValue();
+ Thres = Params.HintThreshold.getValue() *
+ TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
const DataLayout &DL = Caller->getParent()->getDataLayout();
if (!Callee)
@@ -124,10 +130,11 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
uint64_t AllocaSize = 0;
SmallPtrSet<const AllocaInst *, 8> AIVisited;
for (Value *PtrArg : CS.args()) {
- Type *Ty = PtrArg->getType();
- if (!Ty->isPointerTy() ||
- Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
+ if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
+ Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
continue;
+
PtrArg = GetUnderlyingObject(PtrArg, DL);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
@@ -170,7 +177,6 @@ static bool isWrapperOnlyCall(CallSite CS) {
InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
- TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
if (!Callee || Callee->isDeclaration())
return llvm::InlineCost::getNever("undefined callee");
@@ -178,13 +184,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
if (CS.isNoInline())
return llvm::InlineCost::getNever("noinline");
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
if (!TTI.areInlineCompatible(Caller, Callee))
return llvm::InlineCost::getNever("incompatible");
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
- if (isInlineViable(*Callee))
+ auto IsViable = isInlineViable(*Callee);
+ if (IsViable)
return llvm::InlineCost::getAlways("alwaysinline viable");
- return llvm::InlineCost::getNever("alwaysinline unviable");
+ return llvm::InlineCost::getNever(IsViable.message);
}
if (isWrapperOnlyCall(CS))
@@ -206,6 +214,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Callee, LocalParams, TTI, GetAssumptionCache,
- None, PSI, RemarksEnabled ? &ORE : nullptr);
+ auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
+ LocalParams, TTI, GetAssumptionCache, None, PSI,
+ RemarksEnabled ? &ORE : nullptr);
+
+ if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
+ // Single BB does not increase total BB amount, thus subtract 1
+ size_t Size = Caller->size() + Callee->size() - 1;
+ if (MaxBB && Size > MaxBB)
+ return llvm::InlineCost::getNever("max number of bb exceeded");
+ }
+ return IC;
}