summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp220
1 files changed, 153 insertions, 67 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 80feaa44766f..98b49070fa99 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+/// Implements the AMDGPU specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
@@ -20,8 +20,10 @@
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
@@ -32,12 +34,37 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
+#define AMDGPUSubtarget GCNSubtarget
#include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#undef AMDGPUSubtarget
+#include "R600GenSubtargetInfo.inc"
-AMDGPUSubtarget::~AMDGPUSubtarget() = default;
+GCNSubtarget::~GCNSubtarget() = default;
+
+R600Subtarget &
+R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS) {
+ SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+ FullFS += FS;
+ ParseSubtargetFeatures(GPU, FullFS);
+
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere
+ // if someone tries to enable these?
+ if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ FP32Denormals = false;
+ }
+
+ HasMulU24 = getGeneration() >= EVERGREEN;
+ HasMulI24 = hasCaymanISA();
+
+ return *this;
+}
-AMDGPUSubtarget &
-AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
+GCNSubtarget &
+GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
@@ -92,26 +119,43 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
HasMovrel = true;
}
+ HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM)
- : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
+ const FeatureBitset &FeatureBits) :
+ TargetTriple(TT),
+ SubtargetFeatureBits(FeatureBits),
+ Has16BitInsts(false),
+ HasMadMixInsts(false),
+ FP32Denormals(false),
+ FPExceptions(false),
+ HasSDWA(false),
+ HasVOP3PInsts(false),
+ HasMulI24(true),
+ HasMulU24(true),
+ HasFminFmaxLegacy(true),
+ EnablePromoteAlloca(false),
+ LocalMemorySize(0),
+ WavefrontSize(0)
+ { }
+
+GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const GCNTargetMachine &TM) :
+ AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ AMDGPUSubtarget(TT, getFeatureBits()),
TargetTriple(TT),
- Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+ Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
- WavefrontSize(0),
- LocalMemorySize(0),
LDSBankCount(0),
MaxPrivateElementSize(0),
FastFMAF32(false),
HalfRate64Ops(false),
- FP32Denormals(false),
FP64FP16Denormals(false),
- FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
@@ -123,57 +167,56 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
EnableXNACK(false),
TrapHandler(false),
DebuggerInsertNops(false),
- DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
EnableHugePrivateBuffer(false),
EnableVGPRSpilling(false),
- EnablePromoteAlloca(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
+ EnableDS128(false),
DumpCode(false),
FP64(false),
- FMA(false),
- IsGCN(false),
GCN3Encoding(false),
CIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
- Has16BitInsts(false),
HasIntClamp(false),
- HasVOP3PInsts(false),
- HasMadMixInsts(false),
+ HasFmaMixInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
+ HasScalarAtomics(false),
HasInv2PiInlineImm(false),
- HasSDWA(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasDLInsts(false),
+ D16PreservesUnusedBits(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
AddNoCarryInsts(false),
+ HasUnpackedD16VMem(false),
- R600ALUInst(false),
- CaymanISA(false),
- CFALUBug(false),
- HasVertexCache(false),
- TexVTXClauseSize(0),
ScalarizeGlobal(false),
FeatureDisable(false),
- InstrItins(getInstrItineraryForCPU(GPU)) {
+ InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
+ TLInfo(TM, *this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
AS = AMDGPU::getAMDGPUAS(TT);
- initializeSubtargetDependencies(TT, GPU, FS);
+ CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
+ Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
+ RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ InstSelector.reset(new AMDGPUInstructionSelector(
+ *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
@@ -198,6 +241,12 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
return NumWaves;
}
+unsigned
+AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
+}
+
std::pair<unsigned, unsigned>
AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
switch (CC) {
@@ -357,27 +406,64 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
return true;
}
-R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM) :
- AMDGPUSubtarget(TT, GPU, FS, TM),
- InstrInfo(*this),
- FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- TLInfo(TM, *this) {}
+uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
+ unsigned &MaxAlign) const {
+ assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ F.getCallingConv() == CallingConv::SPIR_KERNEL);
-SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM)
- : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
- FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- TLInfo(TM, *this) {
- CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
- Legalizer.reset(new AMDGPULegalizerInfo());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ uint64_t ExplicitArgBytes = 0;
+ MaxAlign = 1;
- RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
- InstSelector.reset(new AMDGPUInstructionSelector(
- *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
+ for (const Argument &Arg : F.args()) {
+ Type *ArgTy = Arg.getType();
+
+ unsigned Align = DL.getABITypeAlignment(ArgTy);
+ uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
+ ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
+ MaxAlign = std::max(MaxAlign, Align);
+ }
+
+ return ExplicitArgBytes;
}
-void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
+ unsigned &MaxAlign) const {
+ uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
+
+ unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
+
+ uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
+ unsigned ImplicitBytes = getImplicitArgNumBytes(F);
+ if (ImplicitBytes != 0) {
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ }
+
+ // Being able to dereference past the end is useful for emitting scalar loads.
+ return alignTo(TotalSize, 4);
+}
+
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM) :
+ R600GenSubtargetInfo(TT, GPU, FS),
+ AMDGPUSubtarget(TT, getFeatureBits()),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ FMA(false),
+ CaymanISA(false),
+ CFALUBug(false),
+ DX10Clamp(false),
+ HasVertexCache(false),
+ R600ALUInst(false),
+ FP64(false),
+ TexVTXClauseSize(0),
+ Gen(R600),
+ TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
+ InstrItins(getInstrItineraryForCPU(GPU)),
+ AS (AMDGPU::getAMDGPUAS(TT)) { }
+
+void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
// Track register pressure so the scheduler can try to decrease
// pressure once register usage is above the threshold defined by
@@ -394,22 +480,12 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
-bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
+bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
-unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
- unsigned ExplicitArgBytes) const {
- unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
- if (ImplicitBytes == 0)
- return ExplicitArgBytes;
-
- unsigned Alignment = getAlignmentForImplicitArgPtr();
- return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
-}
-
-unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
- if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
return 10;
if (SGPRs <= 88)
@@ -431,7 +507,7 @@ unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
return 5;
}
-unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
+unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
if (VGPRs <= 24)
return 10;
if (VGPRs <= 28)
@@ -453,7 +529,7 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}
-unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
+unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
@@ -467,7 +543,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
return 2; // VCC.
}
-unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
+unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -517,7 +593,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
MaxAddressableNumSGPRs);
}
-unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -532,10 +608,6 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-vgpr", MaxNumVGPRs);
- // Make sure requested value does not violate subtarget's specifications.
- if (Requested && Requested <= getReservedNumVGPRs(MF))
- Requested = 0;
-
// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
@@ -548,7 +620,7 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
MaxNumVGPRs = Requested;
}
- return MaxNumVGPRs - getReservedNumVGPRs(MF);
+ return MaxNumVGPRs;
}
namespace {
@@ -602,7 +674,21 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
};
} // namespace
-void SISubtarget::getPostRAMutations(
+void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
}
+
+const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
+ if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
+ else
+ return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
+}
+
+const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) {
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
+ else
+ return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
+}