aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h293
1 files changed, 247 insertions, 46 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0e3cb7dc1f87..36bc2498781f 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "SIISelLowering.h"
#include "SIFrameLowering.h"
+#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
@@ -51,6 +52,7 @@ public:
SOUTHERN_ISLANDS,
SEA_ISLANDS,
VOLCANIC_ISLANDS,
+ GFX9,
};
enum {
@@ -64,6 +66,28 @@ public:
ISAVersion8_0_3,
ISAVersion8_0_4,
ISAVersion8_1_0,
+ ISAVersion9_0_0,
+ ISAVersion9_0_1
+ };
+
+ enum TrapHandlerAbi {
+ TrapHandlerAbiNone = 0,
+ TrapHandlerAbiHsa = 1
+ };
+
+ enum TrapID {
+ TrapIDHardwareReserved = 0,
+ TrapIDHSADebugTrap = 1,
+ TrapIDLLVMTrap = 2,
+ TrapIDLLVMDebugTrap = 3,
+ TrapIDDebugBreakpoint = 7,
+ TrapIDDebugReserved8 = 8,
+ TrapIDDebugReservedFE = 0xfe,
+ TrapIDDebugReservedFF = 0xff
+ };
+
+ enum TrapRegValues {
+ LLVMTrapHandlerRegValue = 1
};
protected:
@@ -81,14 +105,16 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
+ bool DX10Clamp;
bool FlatForGlobal;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
+ bool HasApertureRegs;
bool EnableXNACK;
+ bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
@@ -107,13 +133,17 @@ protected:
bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
+ bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasInv2PiInlineImm;
+ bool HasSDWA;
+ bool HasDPP;
bool FlatAddressSpace;
bool R600ALUInst;
bool CaymanISA;
@@ -127,6 +157,7 @@ protected:
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
+ AMDGPUAS AS;
public:
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@@ -184,10 +215,18 @@ public:
return MaxPrivateElementSize;
}
+ AMDGPUAS getAMDGPUAS() const {
+ return AS;
+ }
+
bool has16BitInsts() const {
return Has16BitInsts;
}
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
bool hasHWFP64() const {
return FP64;
}
@@ -243,6 +282,10 @@ public:
return (getGeneration() >= EVERGREEN);
}
+ bool hasMed3_16() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
@@ -255,6 +298,10 @@ public:
return CaymanISA;
}
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -267,20 +314,22 @@ public:
return DumpCode;
}
- bool enableIEEEBit(const MachineFunction &MF) const {
- return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
- }
-
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
- unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
/// the given LDS memory size is the only constraint.
- unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+ unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
+ }
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -288,13 +337,21 @@ public:
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {
return FPExceptions;
}
+ bool enableDX10Clamp() const {
+ return DX10Clamp;
+ }
+
+ bool enableIEEEBit(const MachineFunction &MF) const {
+ return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -307,10 +364,22 @@ public:
return UnalignedScratchAccess;
}
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
bool isXNACKEnabled() const {
return EnableXNACK;
}
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
@@ -324,6 +393,10 @@ public:
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasFminFmaxLegacy() const {
+ return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
@@ -355,72 +428,71 @@ public:
return true;
}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return 4;
+ return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
}
/// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return getMaxWavesPerEU() * getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return getWavesPerWorkGroup(FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- // FIXME: Need to take scratch memory into account.
- return 10;
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
- getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
- return 2048;
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
}
- /// \returns Number of waves per work group given the flat work group size.
+ /// \returns Number of waves per work group supported by the subtarget and
+ /// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
+ FlatWorkGroupSize);
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
-
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -440,6 +512,9 @@ public:
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+ /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ bool makeLIDRangeMetadata(Instruction *I) const;
};
class R600Subtarget final : public AMDGPUSubtarget {
@@ -482,13 +557,6 @@ public:
};
class SISubtarget final : public AMDGPUSubtarget {
-public:
- enum {
- // The closed Vulkan driver sets 96, which limits the wave count to 8 but
- // doesn't spill SGPRs as much as when 80 is set.
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
- };
-
private:
SIInstrInfo InstrInfo;
SIFrameLowering FrameLowering;
@@ -516,6 +584,21 @@ public:
return GISel->getCallLowering();
}
+ const InstructionSelector *getInstructionSelector() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getInstructionSelector();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getLegalizerInfo();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getRegBankInfo();
+ }
+
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -524,6 +607,11 @@ public:
this->GISel.reset(&GISel);
}
+ // XXX - Why is this here if it isn't in the default pass set?
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -533,10 +621,6 @@ public:
return 16;
}
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
bool hasSMemRealTime() const {
return HasSMemRealTime;
}
@@ -549,6 +633,10 @@ public:
return HasVGPRIndexMode;
}
+ bool useVGPRIndexMode(bool UserEnable) const {
+ return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
+ }
+
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
}
@@ -561,6 +649,14 @@ public:
return HasInv2PiInlineImm;
}
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
+ bool hasDPP() const {
+ return HasDPP;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -594,6 +690,14 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ bool hasSMovFedHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0Hazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
@@ -605,10 +709,107 @@ public:
/// \returns True if waitcnt instruction is needed before barrier instruction,
/// false otherwise.
bool needWaitcntBeforeBarrier() const {
- return true;
+ return getGeneration() < GFX9;
+ }
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns SGPR allocation granularity supported by the subtarget.
+ unsigned getSGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+ }
+
+ /// \returns SGPR encoding granularity supported by the subtarget.
+ unsigned getSGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
}
- unsigned getMaxNumSGPRs() const;
+ /// \returns Total number of SGPRs supported by the subtarget.
+ unsigned getTotalNumSGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of SGPRs supported by the subtarget.
+ unsigned getAddressableNumSGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
+ Addressable);
+ }
+
+ /// \returns Reserved number of SGPRs for given function \p MF.
+ unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns VGPR allocation granularity supported by the subtarget.
+ unsigned getVGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+ }
+
+ /// \returns VGPR encoding granularity supported by the subtarget.
+ unsigned getVGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+ }
+
+ /// \returns Total number of VGPRs supported by the subtarget.
+ unsigned getTotalNumVGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of VGPRs supported by the subtarget.
+ unsigned getAddressableNumVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Reserved number of VGPRs for given function \p MF.
+ unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
+ return debuggerReserveRegs() ? 4 : 0;
+ }
+
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm