summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/Target/AMDGPU/AMDGPUSubtarget.cpp
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Notes
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp64
1 files changed, 34 insertions, 30 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1eb9b83456c5..3bb6dd4571c0 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -175,6 +175,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
HasTrigReducedRange(false),
+ MaxWavesPerEU(10),
LocalMemorySize(0),
WavefrontSize(0)
{ }
@@ -261,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
LDSMisalignedBug(false),
+ HasMFMAInlineLiteralBug(false),
ScalarizeGlobal(false),
@@ -278,9 +280,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
+ MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
- RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
InstSelector.reset(new AMDGPUInstructionSelector(
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
@@ -489,28 +492,28 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t ExplicitArgBytes = 0;
- MaxAlign = 1;
+ MaxAlign = Align::None();
for (const Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(ArgTy);
+ const Align Alignment(DL.getABITypeAlignment(ArgTy));
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
- ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
- MaxAlign = std::max(MaxAlign, Align);
+ ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
return ExplicitArgBytes;
}
unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const {
+ Align &MaxAlign) const {
uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
@@ -518,7 +521,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
if (ImplicitBytes != 0) {
- unsigned Alignment = getAlignmentForImplicitArgPtr();
+ const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
}
@@ -566,7 +569,7 @@ bool GCNSubtarget::hasMadF16() const {
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
- return 10;
+ return getMaxWavesPerEU();
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
@@ -591,25 +594,12 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
}
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
- if (VGPRs <= 24)
- return 10;
- if (VGPRs <= 28)
- return 9;
- if (VGPRs <= 32)
- return 8;
- if (VGPRs <= 36)
- return 7;
- if (VGPRs <= 40)
- return 6;
- if (VGPRs <= 48)
- return 5;
- if (VGPRs <= 64)
- return 4;
- if (VGPRs <= 84)
- return 3;
- if (VGPRs <= 128)
- return 2;
- return 1;
+ unsigned MaxWaves = getMaxWavesPerEU();
+ unsigned Granule = getVGPRAllocGranule();
+ if (VGPRs < Granule)
+ return MaxWaves;
+ unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
+ return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
}
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
@@ -629,6 +619,20 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
return 2; // VCC.
}
+unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF,
+ unsigned LDSSize,
+ unsigned NumSGPRs,
+ unsigned NumVGPRs) const {
+ unsigned Occupancy =
+ std::min(getMaxWavesPerEU(),
+ getOccupancyWithLocalMemSize(LDSSize, MF.getFunction()));
+ if (NumSGPRs)
+ Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
+ if (NumVGPRs)
+ Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs));
+ return Occupancy;
+}
+
unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -878,8 +882,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
- Mutations.push_back(llvm::make_unique<FillMFMAShadowMutation>(&InstrInfo));
+ Mutations.push_back(std::make_unique<MemOpClusterMutation>(&InstrInfo));
+ Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {