diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h | 1177 |
1 files changed, 1177 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h new file mode 100644 index 000000000000..d8bc0b2df2bd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -0,0 +1,1177 @@ +//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// AMD GCN specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H + +#include "AMDGPUCallLowering.h" +#include "AMDGPUSubtarget.h" +#include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AMDGPUGenSubtargetInfo.inc" + +namespace llvm { + +class GCNTargetMachine; + +class GCNSubtarget final : public AMDGPUGenSubtargetInfo, + public AMDGPUSubtarget { + + using AMDGPUSubtarget::getMaxWavesPerEU; + +public: + // Following 2 enums are documented at: + // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi + enum class TrapHandlerAbi { + NONE = 0x00, + AMDHSA = 0x01, + }; + + enum class TrapID { + LLVMAMDHSATrap = 0x02, + LLVMAMDHSADebugTrap = 0x03, + }; + +private: + /// GlobalISel related APIs. + std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; + std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + +protected: + // Basic subtarget description. + Triple TargetTriple; + AMDGPU::IsaInfo::AMDGPUTargetID TargetID; + unsigned Gen; + InstrItineraryData InstrItins; + int LDSBankCount; + unsigned MaxPrivateElementSize; + + // Possibly statically set by tablegen, but may want to be overridden. + bool FastFMAF32; + bool FastDenormalF32; + bool HalfRate64Ops; + bool FullRate64Ops; + + // Dynamically set bits that enable features. + bool FlatForGlobal; + bool AutoWaitcntBeforeBarrier; + bool UnalignedScratchAccess; + bool UnalignedAccessMode; + bool HasApertureRegs; + bool SupportsXNACK; + + // This should not be used directly. 'TargetID' tracks the dynamic settings + // for XNACK. + bool EnableXNACK; + + bool EnableTgSplit; + bool EnableCuMode; + bool TrapHandler; + + // Used as options. + bool EnableLoadStoreOpt; + bool EnableUnsafeDSOffsetFolding; + bool EnableSIScheduler; + bool EnableDS128; + bool EnablePRTStrictNull; + bool DumpCode; + + // Subtarget statically properties set by tablegen + bool FP64; + bool FMA; + bool MIMG_R128; + bool CIInsts; + bool GFX8Insts; + bool GFX9Insts; + bool GFX90AInsts; + bool GFX10Insts; + bool GFX10_3Insts; + bool GFX7GFX8GFX9Insts; + bool SGPRInitBug; + bool NegativeScratchOffsetBug; + bool NegativeUnalignedScratchOffsetBug; + bool HasSMemRealTime; + bool HasIntClamp; + bool HasFmaMixInsts; + bool HasMovrel; + bool HasVGPRIndexMode; + bool HasScalarStores; + bool HasScalarAtomics; + bool HasSDWAOmod; + bool HasSDWAScalar; + bool HasSDWASdst; + bool HasSDWAMac; + bool HasSDWAOutModsVOPC; + bool HasDPP; + bool HasDPP8; + bool Has64BitDPP; + bool HasPackedFP32Ops; + bool HasExtendedImageInsts; + bool HasR128A16; + bool HasGFX10A16; + bool HasG16; + bool HasNSAEncoding; + unsigned NSAMaxSize; + bool GFX10_AEncoding; + bool GFX10_BEncoding; + bool HasDLInsts; + bool HasDot1Insts; + bool HasDot2Insts; + bool HasDot3Insts; + bool HasDot4Insts; + bool HasDot5Insts; + bool HasDot6Insts; + bool HasDot7Insts; + bool HasMAIInsts; + bool HasPkFmacF16Inst; + bool HasAtomicFaddInsts; + bool SupportsSRAMECC; + + // This should not be used directly. 'TargetID' tracks the dynamic settings + // for SRAMECC. + bool EnableSRAMECC; + + bool HasNoSdstCMPX; + bool HasVscnt; + bool HasGetWaveIdInst; + bool HasSMemTimeInst; + bool HasShaderCyclesRegister; + bool HasRegisterBanking; + bool HasVOP3Literal; + bool HasNoDataDepHazard; + bool FlatAddressSpace; + bool FlatInstOffsets; + bool FlatGlobalInsts; + bool FlatScratchInsts; + bool ScalarFlatScratchInsts; + bool HasArchitectedFlatScratch; + bool AddNoCarryInsts; + bool HasUnpackedD16VMem; + bool LDSMisalignedBug; + bool HasMFMAInlineLiteralBug; + bool UnalignedBufferAccess; + bool UnalignedDSAccess; + bool HasPackedTID; + bool ScalarizeGlobal; + + bool HasVcmpxPermlaneHazard; + bool HasVMEMtoScalarWriteHazard; + bool HasSMEMtoVectorWriteHazard; + bool HasInstFwdPrefetchBug; + bool HasVcmpxExecWARHazard; + bool HasLdsBranchVmemWARHazard; + bool HasNSAtoVMEMBug; + bool HasNSAClauseBug; + bool HasOffset3fBug; + bool HasFlatSegmentOffsetBug; + bool HasImageStoreD16Bug; + bool HasImageGather4D16Bug; + + // Dummy feature to use for assembler in tablegen. + bool FeatureDisable; + + SelectionDAGTargetInfo TSInfo; +private: + SIInstrInfo InstrInfo; + SITargetLowering TLInfo; + SIFrameLowering FrameLowering; + +public: + // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. + static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); + + GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM); + ~GCNSubtarget() override; + + GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS); + + const SIInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + + const SIFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + const SITargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const SIRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InlineAsmLowering *getInlineAsmLowering() const override { + return InlineAsmLoweringInfo.get(); + } + + InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } + + const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { + return TargetID; + } + + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + Generation getGeneration() const { + return (Generation)Gen; + } + + /// Return the number of high bits known to be zero for a frame index. + unsigned getKnownHighZeroBitsForFrameIndex() const { + return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); + } + + int getLDSBankCount() const { + return LDSBankCount; + } + + unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { + return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; + } + + unsigned getConstantBusLimit(unsigned Opcode) const; + + /// Returns if the result of this instruction with a 16-bit result returned in + /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve + /// the original value. + bool zeroesHigh16BitsOfDest(unsigned Opcode) const; + + bool hasIntClamp() const { + return HasIntClamp; + } + + bool hasFP64() const { + return FP64; + } + + bool hasMIMG_R128() const { + return MIMG_R128; + } + + bool hasHWFP64() const { + return FP64; + } + + bool hasFastFMAF32() const { + return FastFMAF32; + } + + bool hasHalfRate64Ops() const { + return HalfRate64Ops; + } + + bool hasFullRate64Ops() const { + return FullRate64Ops; + } + + bool hasAddr64() const { + return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); + } + + bool hasFlat() const { + return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); + } + + // Return true if the target only has the reverse operand versions of VALU + // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). + bool hasOnlyRevVALUShifts() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasFractBug() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + bool hasBFE() const { + return true; + } + + bool hasBFI() const { + return true; + } + + bool hasBFM() const { + return hasBFE(); + } + + bool hasBCNT(unsigned Size) const { + return true; + } + + bool hasFFBL() const { + return true; + } + + bool hasFFBH() const { + return true; + } + + bool hasMed3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + bool hasMin3Max3_16() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + bool hasFmaMixInsts() const { + return HasFmaMixInsts; + } + + bool hasCARRY() const { + return true; + } + + bool hasFMA() const { + return FMA; + } + + bool hasSwap() const { + return GFX9Insts; + } + + bool hasScalarPackInsts() const { + return GFX9Insts; + } + + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + + TrapHandlerAbi getTrapHandlerAbi() const { + return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; + } + + bool supportsGetDoorbellID() const { + // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. + return getGeneration() >= GFX9; + } + + /// True if the offset field of DS instructions works as expected. On SI, the + /// offset uses a 16-bit adder and does not always wrap properly. + bool hasUsableDSOffset() const { + return getGeneration() >= SEA_ISLANDS; + } + + bool unsafeDSOffsetFoldingEnabled() const { + return EnableUnsafeDSOffsetFolding; + } + + /// Condition output from div_scale is usable. + bool hasUsableDivScaleConditionOutput() const { + return getGeneration() != SOUTHERN_ISLANDS; + } + + /// Extra wait hazard is needed in some cases before + /// s_cbranch_vccnz/s_cbranch_vccz. + bool hasReadVCCZBug() const { + return getGeneration() <= SEA_ISLANDS; + } + + /// Writes to VCC_LO/VCC_HI update the VCCZ flag. + bool partialVCCWritesUpdateVCCZ() const { + return getGeneration() >= GFX10; + } + + /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR + /// was written by a VALU instruction. + bool hasSMRDReadVALUDefHazard() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + + /// A read of an SGPR by a VMEM instruction requires 5 wait states when the + /// SGPR was written by a VALU Instruction. + bool hasVMEMReadSGPRVALUDefHazard() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasRFEHazards() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. + unsigned getSetRegWaitStates() const { + return getGeneration() <= SEA_ISLANDS ? 1 : 2; + } + + bool dumpCode() const { + return DumpCode; + } + + /// Return the amount of LDS that can be used that will not restrict the + /// occupancy lower than WaveCount. + unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, + const Function &) const; + + bool supportsMinMaxDenormModes() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + /// \returns If target supports S_DENORM_MODE. + bool hasDenormModeInst() const { + return getGeneration() >= AMDGPUSubtarget::GFX10; + } + + bool useFlatForGlobal() const { + return FlatForGlobal; + } + + /// \returns If target supports ds_read/write_b128 and user enables generation + /// of ds_read/write_b128. + bool useDS128() const { + return CIInsts && EnableDS128; + } + + /// \return If target supports ds_read/write_b96/128. + bool hasDS96AndDS128() const { + return CIInsts; + } + + /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 + bool haveRoundOpsF64() const { + return CIInsts; + } + + /// \returns If MUBUF instructions always perform range checking, even for + /// buffer resources used for private memory access. + bool privateMemoryResourceIsRangeChecked() const { + return getGeneration() < AMDGPUSubtarget::GFX9; + } + + /// \returns If target requires PRT Struct NULL support (zero result registers + /// for sparse texture support). + bool usePRTStrictNull() const { + return EnablePRTStrictNull; + } + + bool hasAutoWaitcntBeforeBarrier() const { + return AutoWaitcntBeforeBarrier; + } + + bool hasUnalignedBufferAccess() const { + return UnalignedBufferAccess; + } + + bool hasUnalignedBufferAccessEnabled() const { + return UnalignedBufferAccess && UnalignedAccessMode; + } + + bool hasUnalignedDSAccess() const { + return UnalignedDSAccess; + } + + bool hasUnalignedDSAccessEnabled() const { + return UnalignedDSAccess && UnalignedAccessMode; + } + + bool hasUnalignedScratchAccess() const { + return UnalignedScratchAccess; + } + + bool hasUnalignedAccessMode() const { + return UnalignedAccessMode; + } + + bool hasApertureRegs() const { + return HasApertureRegs; + } + + bool isTrapHandlerEnabled() const { + return TrapHandler; + } + + bool isXNACKEnabled() const { + return TargetID.isXnackOnOrAny(); + } + + bool isTgSplitEnabled() const { + return EnableTgSplit; + } + + bool isCuModeEnabled() const { + return EnableCuMode; + } + + bool hasFlatAddressSpace() const { + return FlatAddressSpace; + } + + bool hasFlatScrRegister() const { + return hasFlatAddressSpace(); + } + + bool hasFlatInstOffsets() const { + return FlatInstOffsets; + } + + bool hasFlatGlobalInsts() const { + return FlatGlobalInsts; + } + + bool hasFlatScratchInsts() const { + return FlatScratchInsts; + } + + // Check if target supports ST addressing mode with FLAT scratch instructions. + // The ST addressing mode means no registers are used, either VGPR or SGPR, + // but only immediate offset is swizzled and added to the FLAT scratch base. + bool hasFlatScratchSTMode() const { + return hasFlatScratchInsts() && hasGFX10_3Insts(); + } + + bool hasScalarFlatScratchInsts() const { + return ScalarFlatScratchInsts; + } + + bool hasGlobalAddTidInsts() const { + return GFX10_BEncoding; + } + + bool hasAtomicCSub() const { + return GFX10_BEncoding; + } + + bool hasMultiDwordFlatScratchAddressing() const { + return getGeneration() >= GFX9; + } + + bool hasFlatSegmentOffsetBug() const { + return HasFlatSegmentOffsetBug; + } + + bool hasFlatLgkmVMemCountInOrder() const { + return getGeneration() > GFX9; + } + + bool hasD16LoadStore() const { + return getGeneration() >= GFX9; + } + + bool d16PreservesUnusedBits() const { + return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); + } + + bool hasD16Images() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + /// Return if most LDS instructions have an m0 use that require m0 to be + /// initialized. + bool ldsRequiresM0Init() const { + return getGeneration() < GFX9; + } + + // True if the hardware rewinds and replays GWS operations if a wave is + // preempted. + // + // If this is false, a GWS operation requires testing if a nack set the + // MEM_VIOL bit, and repeating if so. + bool hasGWSAutoReplay() const { + return getGeneration() >= GFX9; + } + + /// \returns if target has ds_gws_sema_release_all instruction. + bool hasGWSSemaReleaseAll() const { + return CIInsts; + } + + /// \returns true if the target has integer add/sub instructions that do not + /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, + /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier + /// for saturation. + bool hasAddNoCarry() const { + return AddNoCarryInsts; + } + + bool hasUnpackedD16VMem() const { + return HasUnpackedD16VMem; + } + + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const Function &F) const { + return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); + } + + bool hasMad64_32() const { + return getGeneration() >= SEA_ISLANDS; + } + + bool hasSDWAOmod() const { + return HasSDWAOmod; + } + + bool hasSDWAScalar() const { + return HasSDWAScalar; + } + + bool hasSDWASdst() const { + return HasSDWASdst; + } + + bool hasSDWAMac() const { + return HasSDWAMac; + } + + bool hasSDWAOutModsVOPC() const { + return HasSDWAOutModsVOPC; + } + + bool hasDLInsts() const { + return HasDLInsts; + } + + bool hasDot1Insts() const { + return HasDot1Insts; + } + + bool hasDot2Insts() const { + return HasDot2Insts; + } + + bool hasDot3Insts() const { + return HasDot3Insts; + } + + bool hasDot4Insts() const { + return HasDot4Insts; + } + + bool hasDot5Insts() const { + return HasDot5Insts; + } + + bool hasDot6Insts() const { + return HasDot6Insts; + } + + bool hasDot7Insts() const { + return HasDot7Insts; + } + + bool hasMAIInsts() const { + return HasMAIInsts; + } + + bool hasPkFmacF16Inst() const { + return HasPkFmacF16Inst; + } + + bool hasAtomicFaddInsts() const { + return HasAtomicFaddInsts; + } + + bool hasNoSdstCMPX() const { + return HasNoSdstCMPX; + } + + bool hasVscnt() const { + return HasVscnt; + } + + bool hasGetWaveIdInst() const { + return HasGetWaveIdInst; + } + + bool hasSMemTimeInst() const { + return HasSMemTimeInst; + } + + bool hasShaderCyclesRegister() const { + return HasShaderCyclesRegister; + } + + bool hasRegisterBanking() const { + return HasRegisterBanking; + } + + bool hasVOP3Literal() const { + return HasVOP3Literal; + } + + bool hasNoDataDepHazard() const { + return HasNoDataDepHazard; + } + + bool vmemWriteNeedsExpWaitcnt() const { + return getGeneration() < SEA_ISLANDS; + } + + // Scratch is allocated in 256 dword per wave blocks for the entire + // wavefront. When viewed from the perspective of an arbitrary workitem, this + // is 4-byte aligned. + // + // Only 4-byte alignment is really needed to access anything. Transformations + // on the pointer value itself may rely on the alignment / known low bits of + // the pointer. Set this to something above the minimum to avoid needing + // dynamic realignment in common cases. + Align getStackAlignment() const { return Align(16); } + + bool enableMachineScheduler() const override { + return true; + } + + bool useAA() const override; + + bool enableSubRegLiveness() const override { + return true; + } + + void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } + bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } + + // static wrappers + static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); + + // XXX - Why is this here if it isn't in the default pass set? + bool enableEarlyIfConversion() const override { + return true; + } + + bool enableFlatScratch() const; + + void overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const override; + + unsigned getMaxNumUserSGPRs() const { + return 16; + } + + bool hasSMemRealTime() const { + return HasSMemRealTime; + } + + bool hasMovrel() const { + return HasMovrel; + } + + bool hasVGPRIndexMode() const { + return HasVGPRIndexMode; + } + + bool useVGPRIndexMode() const; + + bool hasScalarCompareEq64() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + + bool hasScalarStores() const { + return HasScalarStores; + } + + bool hasScalarAtomics() const { + return HasScalarAtomics; + } + + bool hasLDSFPAtomicAdd() const { return GFX8Insts; } + + /// \returns true if the subtarget has the v_permlanex16_b32 instruction. + bool hasPermLaneX16() const { return getGeneration() >= GFX10; } + + bool hasDPP() const { + return HasDPP; + } + + bool hasDPPBroadcasts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPPWavefrontShifts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPP8() const { + return HasDPP8; + } + + bool has64BitDPP() const { + return Has64BitDPP; + } + + bool hasPackedFP32Ops() const { + return HasPackedFP32Ops; + } + + bool hasFmaakFmamkF32Insts() const { + return getGeneration() >= GFX10; + } + + bool hasExtendedImageInsts() const { + return HasExtendedImageInsts; + } + + bool hasR128A16() const { + return HasR128A16; + } + + bool hasGFX10A16() const { + return HasGFX10A16; + } + + bool hasA16() const { return hasR128A16() || hasGFX10A16(); } + + bool hasG16() const { return HasG16; } + + bool hasOffset3fBug() const { + return HasOffset3fBug; + } + + bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } + + bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } + + bool hasNSAEncoding() const { return HasNSAEncoding; } + + unsigned getNSAMaxSize() const { return NSAMaxSize; } + + bool hasGFX10_AEncoding() const { + return GFX10_AEncoding; + } + + bool hasGFX10_BEncoding() const { + return GFX10_BEncoding; + } + + bool hasGFX10_3Insts() const { + return GFX10_3Insts; + } + + bool hasMadF16() const; + + bool enableSIScheduler() const { + return EnableSIScheduler; + } + + bool loadStoreOptEnabled() const { + return EnableLoadStoreOpt; + } + + bool hasSGPRInitBug() const { + return SGPRInitBug; + } + + bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } + + bool hasNegativeUnalignedScratchOffsetBug() const { + return NegativeUnalignedScratchOffsetBug; + } + + bool hasMFMAInlineLiteralBug() const { + return HasMFMAInlineLiteralBug; + } + + bool has12DWordStoreHazard() const { + return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; + } + + // \returns true if the subtarget supports DWORDX3 load/store instructions. + bool hasDwordx3LoadStores() const { + return CIInsts; + } + + bool hasReadM0MovRelInterpHazard() const { + return getGeneration() == AMDGPUSubtarget::GFX9; + } + + bool hasReadM0SendMsgHazard() const { + return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && + getGeneration() <= AMDGPUSubtarget::GFX9; + } + + bool hasVcmpxPermlaneHazard() const { + return HasVcmpxPermlaneHazard; + } + + bool hasVMEMtoScalarWriteHazard() const { + return HasVMEMtoScalarWriteHazard; + } + + bool hasSMEMtoVectorWriteHazard() const { + return HasSMEMtoVectorWriteHazard; + } + + bool hasLDSMisalignedBug() const { + return LDSMisalignedBug && !EnableCuMode; + } + + bool hasInstFwdPrefetchBug() const { + return HasInstFwdPrefetchBug; + } + + bool hasVcmpxExecWARHazard() const { + return HasVcmpxExecWARHazard; + } + + bool hasLdsBranchVmemWARHazard() const { + return HasLdsBranchVmemWARHazard; + } + + bool hasNSAtoVMEMBug() const { + return HasNSAtoVMEMBug; + } + + bool hasNSAClauseBug() const { return HasNSAClauseBug; } + + bool hasHardClauses() const { return getGeneration() >= GFX10; } + + bool hasGFX90AInsts() const { return GFX90AInsts; } + + /// Return if operations acting on VGPR tuples require even alignment. + bool needsAlignedVGPRs() const { return GFX90AInsts; } + + bool hasPackedTID() const { return HasPackedTID; } + + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs + /// SGPRs + unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; + + /// Return the maximum number of waves per SIMD for kernels using \p VGPRs + /// VGPRs + unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + + /// Return occupancy for the given function. Used LDS and a number of + /// registers if provided. + /// Note, occupancy can be affected by the scratch allocation as well, but + /// we do not have enough information to compute it. + unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, + unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; + + /// \returns true if the flat_scratch register should be initialized with the + /// pointer to the wave's scratch memory rather than a size and offset. + bool flatScratchIsPointer() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + + /// \returns true if the flat_scratch register is initialized by the HW. + /// In this case it is readonly. + bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } + + /// \returns true if the machine has merged shaders in which s0-s7 are + /// reserved by the hardware and user SGPRs start at s8 + bool hasMergedShaders() const { + return getGeneration() >= GFX9; + } + + /// \returns SGPR allocation granularity supported by the subtarget. + unsigned getSGPRAllocGranule() const { + return AMDGPU::IsaInfo::getSGPRAllocGranule(this); + } + + /// \returns SGPR encoding granularity supported by the subtarget. + unsigned getSGPREncodingGranule() const { + return AMDGPU::IsaInfo::getSGPREncodingGranule(this); + } + + /// \returns Total number of SGPRs supported by the subtarget. + unsigned getTotalNumSGPRs() const { + return AMDGPU::IsaInfo::getTotalNumSGPRs(this); + } + + /// \returns Addressable number of SGPRs supported by the subtarget. + unsigned getAddressableNumSGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); + } + + /// \returns Minimum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumSGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); + } + + /// \returns Maximum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { + return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); + } + + /// \returns Reserved number of SGPRs. This is common + /// utility function called by MachineFunction and + /// Function variants of getReservedNumSGPRs. + unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const; + /// \returns Reserved number of SGPRs for given machine function \p MF. + unsigned getReservedNumSGPRs(const MachineFunction &MF) const; + + /// \returns Reserved number of SGPRs for given function \p F. + unsigned getReservedNumSGPRs(const Function &F) const; + + /// \returns max num SGPRs. This is the common utility + /// function called by MachineFunction and Function + /// variants of getMaxNumSGPRs. + unsigned getBaseMaxNumSGPRs(const Function &F, + std::pair<unsigned, unsigned> WavesPerEU, + unsigned PreloadedSGPRs, + unsigned ReservedNumSGPRs) const; + + /// \returns Maximum number of SGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of SGPRs explicitly + /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumSGPRs(const MachineFunction &MF) const; + + /// \returns Maximum number of SGPRs that meets number of waves per execution + /// unit requirement for function \p F, or number of SGPRs explicitly + /// requested using "amdgpu-num-sgpr" attribute attached to function \p F. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumSGPRs(const Function &F) const; + + /// \returns VGPR allocation granularity supported by the subtarget. + unsigned getVGPRAllocGranule() const { + return AMDGPU::IsaInfo::getVGPRAllocGranule(this); + } + + /// \returns VGPR encoding granularity supported by the subtarget. + unsigned getVGPREncodingGranule() const { + return AMDGPU::IsaInfo::getVGPREncodingGranule(this); + } + + /// \returns Total number of VGPRs supported by the subtarget. + unsigned getTotalNumVGPRs() const { + return AMDGPU::IsaInfo::getTotalNumVGPRs(this); + } + + /// \returns Addressable number of VGPRs supported by the subtarget. + unsigned getAddressableNumVGPRs() const { + return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); + } + + /// \returns Minimum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); + } + + /// \returns Maximum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { + return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); + } + + /// \returns max num VGPRs. This is the common utility function + /// called by MachineFunction and Function variants of getMaxNumVGPRs. + unsigned getBaseMaxNumVGPRs(const Function &F, + std::pair<unsigned, unsigned> WavesPerEU) const; + /// \returns Maximum number of VGPRs that meets number of waves per execution + /// unit requirement for function \p F, or number of VGPRs explicitly + /// requested using "amdgpu-num-vgpr" attribute attached to function \p F. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumVGPRs(const Function &F) const; + + /// \returns Maximum number of VGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of VGPRs explicitly + /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + + void getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) + const override; + + std::unique_ptr<ScheduleDAGMutation> + createFillMFMAShadowMutation(const TargetInstrInfo *TII) const; + + bool isWave32() const { + return getWavefrontSize() == 32; + } + + bool isWave64() const { + return getWavefrontSize() == 64; + } + + const TargetRegisterClass *getBoolRC() const { + return getRegisterInfo()->getBoolRC(); + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } + + void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, + SDep &Dep) const override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H |
