diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 293 |
1 files changed, 247 insertions, 46 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 0e3cb7dc1f87..36bc2498781f 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -22,6 +22,7 @@ #include "SIInstrInfo.h" #include "SIISelLowering.h" #include "SIFrameLowering.h" +#include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" @@ -51,6 +52,7 @@ public: SOUTHERN_ISLANDS, SEA_ISLANDS, VOLCANIC_ISLANDS, + GFX9, }; enum { @@ -64,6 +66,28 @@ public: ISAVersion8_0_3, ISAVersion8_0_4, ISAVersion8_1_0, + ISAVersion9_0_0, + ISAVersion9_0_1 + }; + + enum TrapHandlerAbi { + TrapHandlerAbiNone = 0, + TrapHandlerAbiHsa = 1 + }; + + enum TrapID { + TrapIDHardwareReserved = 0, + TrapIDHSADebugTrap = 1, + TrapIDLLVMTrap = 2, + TrapIDLLVMDebugTrap = 3, + TrapIDDebugBreakpoint = 7, + TrapIDDebugReserved8 = 8, + TrapIDDebugReservedFE = 0xfe, + TrapIDDebugReservedFF = 0xff + }; + + enum TrapRegValues { + LLVMTrapHandlerRegValue = 1 }; protected: @@ -81,14 +105,16 @@ protected: bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP16Denormals; bool FP32Denormals; - bool FP64Denormals; + bool FP64FP16Denormals; bool FPExceptions; + bool DX10Clamp; bool FlatForGlobal; bool UnalignedScratchAccess; bool UnalignedBufferAccess; + bool HasApertureRegs; bool EnableXNACK; + bool TrapHandler; bool DebuggerInsertNops; bool DebuggerReserveRegs; bool DebuggerEmitPrologue; @@ -107,13 +133,17 @@ protected: bool GCN1Encoding; bool GCN3Encoding; bool CIInsts; + bool GFX9Insts; bool SGPRInitBug; bool HasSMemRealTime; bool Has16BitInsts; + bool HasVOP3PInsts; bool HasMovrel; bool HasVGPRIndexMode; bool HasScalarStores; bool HasInv2PiInlineImm; + bool HasSDWA; + bool HasDPP; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; @@ -127,6 +157,7 @@ protected: InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, @@ -184,10 +215,18 @@ public: return MaxPrivateElementSize; } + AMDGPUAS getAMDGPUAS() const { + return AS; + } + bool has16BitInsts() const { return Has16BitInsts; } + bool hasVOP3PInsts() const { + return HasVOP3PInsts; + } + bool hasHWFP64() const { return FP64; } @@ -243,6 +282,10 @@ public: return (getGeneration() >= EVERGREEN); } + bool hasMed3_16() const { + return getGeneration() >= GFX9; + } + bool hasCARRY() const { return (getGeneration() >= EVERGREEN); } @@ -255,6 +298,10 @@ public: return CaymanISA; } + TrapHandlerAbi getTrapHandlerAbi() const { + return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; + } + bool isPromoteAllocaEnabled() const { return EnablePromoteAlloca; } @@ -267,20 +314,22 @@ public: return DumpCode; } - bool enableIEEEBit(const MachineFunction &MF) const { - return AMDGPU::isCompute(MF.getFunction()->getCallingConv()); - } - /// Return the amount of LDS that can be used that will not restrict the /// occupancy lower than WaveCount. - unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const; + unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, + const Function &) const; /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if /// the given LDS memory size is the only constraint. - unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; + unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; + + unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const { + const auto *MFI = MF.getInfo<SIMachineFunctionInfo>(); + return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction()); + } bool hasFP16Denormals() const { - return FP16Denormals; + return FP64FP16Denormals; } bool hasFP32Denormals() const { @@ -288,13 +337,21 @@ public: } bool hasFP64Denormals() const { - return FP64Denormals; + return FP64FP16Denormals; } bool hasFPExceptions() const { return FPExceptions; } + bool enableDX10Clamp() const { + return DX10Clamp; + } + + bool enableIEEEBit(const MachineFunction &MF) const { + return AMDGPU::isCompute(MF.getFunction()->getCallingConv()); + } + bool useFlatForGlobal() const { return FlatForGlobal; } @@ -307,10 +364,22 @@ public: return UnalignedScratchAccess; } + bool hasApertureRegs() const { + return HasApertureRegs; + } + + bool isTrapHandlerEnabled() const { + return TrapHandler; + } + bool isXNACKEnabled() const { return EnableXNACK; } + bool hasFlatAddressSpace() const { + return FlatAddressSpace; + } + bool isMesaKernel(const MachineFunction &MF) const { return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); } @@ -324,6 +393,10 @@ public: return isAmdHsaOS() || isMesaKernel(MF); } + bool hasFminFmaxLegacy() const { + return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { @@ -355,72 +428,71 @@ public: return true; } + void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} + bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} + /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return 4; + return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits()); } /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given flat work group size. + /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 8; - return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16; + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(), + FlatWorkGroupSize); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return getMaxWavesPerEU() * getEUsPerCU(); + return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits()); } /// \returns Maximum number of waves per compute unit supported by the - /// subtarget and limited by given flat work group size. + /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return getWavesPerWorkGroup(FlatWorkGroupSize); + return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(), + FlatWorkGroupSize); } /// \returns Minimum number of waves per execution unit supported by the /// subtarget. unsigned getMinWavesPerEU() const { - return 1; + return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits()); } /// \returns Maximum number of waves per execution unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerEU() const { - if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 8; - // FIXME: Need to take scratch memory into account. - return 10; + return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits()); } /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given flat work group size. + /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) / - getEUsPerCU(); + return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(), + FlatWorkGroupSize); } /// \returns Minimum flat work group size supported by the subtarget. unsigned getMinFlatWorkGroupSize() const { - return 1; + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits()); } /// \returns Maximum flat work group size supported by the subtarget. unsigned getMaxFlatWorkGroupSize() const { - return 2048; + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits()); } - /// \returns Number of waves per work group given the flat work group size. + /// \returns Number of waves per work group supported by the subtarget and + /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize(); + return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(), + FlatWorkGroupSize); } - void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} - bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} - /// \returns Subtarget's default pair of minimum/maximum flat work group sizes /// for function \p F, or minimum/maximum flat work group sizes explicitly /// requested using "amdgpu-flat-work-group-size" attribute attached to @@ -440,6 +512,9 @@ public: /// compatible with minimum/maximum number of waves limited by flat work group /// size, register usage, and/or lds usage. std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; + + /// Creates value range metadata on an workitemid.* inrinsic call or load. + bool makeLIDRangeMetadata(Instruction *I) const; }; class R600Subtarget final : public AMDGPUSubtarget { @@ -482,13 +557,6 @@ public: }; class SISubtarget final : public AMDGPUSubtarget { -public: - enum { - // The closed Vulkan driver sets 96, which limits the wave count to 8 but - // doesn't spill SGPRs as much as when 80 is set. - FIXED_SGPR_COUNT_FOR_INIT_BUG = 96 - }; - private: SIInstrInfo InstrInfo; SIFrameLowering FrameLowering; @@ -516,6 +584,21 @@ public: return GISel->getCallLowering(); } + const InstructionSelector *getInstructionSelector() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getInstructionSelector(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getLegalizerInfo(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getRegBankInfo(); + } + const SIRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } @@ -524,6 +607,11 @@ public: this->GISel.reset(&GISel); } + // XXX - Why is this here if it isn't in the default pass set? + bool enableEarlyIfConversion() const override { + return true; + } + void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; @@ -533,10 +621,6 @@ public: return 16; } - bool hasFlatAddressSpace() const { - return FlatAddressSpace; - } - bool hasSMemRealTime() const { return HasSMemRealTime; } @@ -549,6 +633,10 @@ public: return HasVGPRIndexMode; } + bool useVGPRIndexMode(bool UserEnable) const { + return !hasMovrel() || (UserEnable && hasVGPRIndexMode()); + } + bool hasScalarCompareEq64() const { return getGeneration() >= VOLCANIC_ISLANDS; } @@ -561,6 +649,14 @@ public: return HasInv2PiInlineImm; } + bool hasSDWA() const { + return HasSDWA; + } + + bool hasDPP() const { + return HasDPP; + } + bool enableSIScheduler() const { return EnableSIScheduler; } @@ -594,6 +690,14 @@ public: return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } + bool hasSMovFedHazard() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + bool hasReadM0Hazard() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs @@ -605,10 +709,107 @@ public: /// \returns True if waitcnt instruction is needed before barrier instruction, /// false otherwise. bool needWaitcntBeforeBarrier() const { - return true; + return getGeneration() < GFX9; + } + + /// \returns true if the flat_scratch register should be initialized with the + /// pointer to the wave's scratch memory rather than a size and offset. + bool flatScratchIsPointer() const { + return getGeneration() >= GFX9; + } + + /// \returns SGPR allocation granularity supported by the subtarget. + unsigned getSGPRAllocGranule() const { + return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits()); + } + + /// \returns SGPR encoding granularity supported by the subtarget. + unsigned getSGPREncodingGranule() const { + return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits()); } - unsigned getMaxNumSGPRs() const; + /// \returns Total number of SGPRs supported by the subtarget. + unsigned getTotalNumSGPRs() const { + return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits()); + } + + /// \returns Addressable number of SGPRs supported by the subtarget. + unsigned getAddressableNumSGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits()); + } + + /// \returns Minimum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumSGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU); + } + + /// \returns Maximum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { + return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU, + Addressable); + } + + /// \returns Reserved number of SGPRs for given function \p MF. + unsigned getReservedNumSGPRs(const MachineFunction &MF) const; + + /// \returns Maximum number of SGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of SGPRs explicitly + /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumSGPRs(const MachineFunction &MF) const; + + /// \returns VGPR allocation granularity supported by the subtarget. + unsigned getVGPRAllocGranule() const { + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + } + + /// \returns VGPR encoding granularity supported by the subtarget. + unsigned getVGPREncodingGranule() const { + return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits()); + } + + /// \returns Total number of VGPRs supported by the subtarget. + unsigned getTotalNumVGPRs() const { + return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits()); + } + + /// \returns Addressable number of VGPRs supported by the subtarget. + unsigned getAddressableNumVGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits()); + } + + /// \returns Minimum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU); + } + + /// \returns Maximum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU); + } + + /// \returns Reserved number of VGPRs for given function \p MF. + unsigned getReservedNumVGPRs(const MachineFunction &MF) const { + return debuggerReserveRegs() ? 4 : 0; + } + + /// \returns Maximum number of VGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of VGPRs explicitly + /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumVGPRs(const MachineFunction &MF) const; }; } // end namespace llvm |