aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h1177
1 files changed, 1177 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
new file mode 100644
index 000000000000..d8bc0b2df2bd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -0,0 +1,1177 @@
+//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// AMD GCN specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
+
+#include "AMDGPUCallLowering.h"
+#include "AMDGPUSubtarget.h"
+#include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class GCNTargetMachine;
+
+class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+ public AMDGPUSubtarget {
+
+ using AMDGPUSubtarget::getMaxWavesPerEU;
+
+public:
+ // Following 2 enums are documented at:
+ // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
+ enum class TrapHandlerAbi {
+ NONE = 0x00,
+ AMDHSA = 0x01,
+ };
+
+ enum class TrapID {
+ LLVMAMDHSATrap = 0x02,
+ LLVMAMDHSADebugTrap = 0x03,
+ };
+
+private:
+ /// GlobalISel related APIs.
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+protected:
+ // Basic subtarget description.
+ Triple TargetTriple;
+ AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
+ unsigned Gen;
+ InstrItineraryData InstrItins;
+ int LDSBankCount;
+ unsigned MaxPrivateElementSize;
+
+ // Possibly statically set by tablegen, but may want to be overridden.
+ bool FastFMAF32;
+ bool FastDenormalF32;
+ bool HalfRate64Ops;
+ bool FullRate64Ops;
+
+ // Dynamically set bits that enable features.
+ bool FlatForGlobal;
+ bool AutoWaitcntBeforeBarrier;
+ bool UnalignedScratchAccess;
+ bool UnalignedAccessMode;
+ bool HasApertureRegs;
+ bool SupportsXNACK;
+
+ // This should not be used directly. 'TargetID' tracks the dynamic settings
+ // for XNACK.
+ bool EnableXNACK;
+
+ bool EnableTgSplit;
+ bool EnableCuMode;
+ bool TrapHandler;
+
+ // Used as options.
+ bool EnableLoadStoreOpt;
+ bool EnableUnsafeDSOffsetFolding;
+ bool EnableSIScheduler;
+ bool EnableDS128;
+ bool EnablePRTStrictNull;
+ bool DumpCode;
+
+ // Subtarget statically properties set by tablegen
+ bool FP64;
+ bool FMA;
+ bool MIMG_R128;
+ bool CIInsts;
+ bool GFX8Insts;
+ bool GFX9Insts;
+ bool GFX90AInsts;
+ bool GFX10Insts;
+ bool GFX10_3Insts;
+ bool GFX7GFX8GFX9Insts;
+ bool SGPRInitBug;
+ bool NegativeScratchOffsetBug;
+ bool NegativeUnalignedScratchOffsetBug;
+ bool HasSMemRealTime;
+ bool HasIntClamp;
+ bool HasFmaMixInsts;
+ bool HasMovrel;
+ bool HasVGPRIndexMode;
+ bool HasScalarStores;
+ bool HasScalarAtomics;
+ bool HasSDWAOmod;
+ bool HasSDWAScalar;
+ bool HasSDWASdst;
+ bool HasSDWAMac;
+ bool HasSDWAOutModsVOPC;
+ bool HasDPP;
+ bool HasDPP8;
+ bool Has64BitDPP;
+ bool HasPackedFP32Ops;
+ bool HasExtendedImageInsts;
+ bool HasR128A16;
+ bool HasGFX10A16;
+ bool HasG16;
+ bool HasNSAEncoding;
+ unsigned NSAMaxSize;
+ bool GFX10_AEncoding;
+ bool GFX10_BEncoding;
+ bool HasDLInsts;
+ bool HasDot1Insts;
+ bool HasDot2Insts;
+ bool HasDot3Insts;
+ bool HasDot4Insts;
+ bool HasDot5Insts;
+ bool HasDot6Insts;
+ bool HasDot7Insts;
+ bool HasMAIInsts;
+ bool HasPkFmacF16Inst;
+ bool HasAtomicFaddInsts;
+ bool SupportsSRAMECC;
+
+ // This should not be used directly. 'TargetID' tracks the dynamic settings
+ // for SRAMECC.
+ bool EnableSRAMECC;
+
+ bool HasNoSdstCMPX;
+ bool HasVscnt;
+ bool HasGetWaveIdInst;
+ bool HasSMemTimeInst;
+ bool HasShaderCyclesRegister;
+ bool HasRegisterBanking;
+ bool HasVOP3Literal;
+ bool HasNoDataDepHazard;
+ bool FlatAddressSpace;
+ bool FlatInstOffsets;
+ bool FlatGlobalInsts;
+ bool FlatScratchInsts;
+ bool ScalarFlatScratchInsts;
+ bool HasArchitectedFlatScratch;
+ bool AddNoCarryInsts;
+ bool HasUnpackedD16VMem;
+ bool LDSMisalignedBug;
+ bool HasMFMAInlineLiteralBug;
+ bool UnalignedBufferAccess;
+ bool UnalignedDSAccess;
+ bool HasPackedTID;
+ bool ScalarizeGlobal;
+
+ bool HasVcmpxPermlaneHazard;
+ bool HasVMEMtoScalarWriteHazard;
+ bool HasSMEMtoVectorWriteHazard;
+ bool HasInstFwdPrefetchBug;
+ bool HasVcmpxExecWARHazard;
+ bool HasLdsBranchVmemWARHazard;
+ bool HasNSAtoVMEMBug;
+ bool HasNSAClauseBug;
+ bool HasOffset3fBug;
+ bool HasFlatSegmentOffsetBug;
+ bool HasImageStoreD16Bug;
+ bool HasImageGather4D16Bug;
+
+ // Dummy feature to use for assembler in tablegen.
+ bool FeatureDisable;
+
+ SelectionDAGTargetInfo TSInfo;
+private:
+ SIInstrInfo InstrInfo;
+ SITargetLowering TLInfo;
+ SIFrameLowering FrameLowering;
+
+public:
+ // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
+ static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
+
+ GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const GCNTargetMachine &TM);
+ ~GCNSubtarget() override;
+
+ GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
+
+ const SIInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
+ }
+
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ const SITargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+
+ const SIRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InlineAsmLowering *getInlineAsmLowering() const override {
+ return InlineAsmLoweringInfo.get();
+ }
+
+ InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+
+ const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
+ return TargetID;
+ }
+
+ // Nothing implemented, just prevent crashes on use.
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ Generation getGeneration() const {
+ return (Generation)Gen;
+ }
+
+ /// Return the number of high bits known to be zero for a frame index.
+ unsigned getKnownHighZeroBitsForFrameIndex() const {
+ return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+ }
+
+ int getLDSBankCount() const {
+ return LDSBankCount;
+ }
+
+ unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
+ return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
+ }
+
+ unsigned getConstantBusLimit(unsigned Opcode) const;
+
+ /// Returns if the result of this instruction with a 16-bit result returned in
+ /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
+ /// the original value.
+ bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
+
+ bool hasIntClamp() const {
+ return HasIntClamp;
+ }
+
+ bool hasFP64() const {
+ return FP64;
+ }
+
+ bool hasMIMG_R128() const {
+ return MIMG_R128;
+ }
+
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
+ bool hasHalfRate64Ops() const {
+ return HalfRate64Ops;
+ }
+
+ bool hasFullRate64Ops() const {
+ return FullRate64Ops;
+ }
+
+ bool hasAddr64() const {
+ return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
+ }
+
+ bool hasFlat() const {
+ return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
+ }
+
+ // Return true if the target only has the reverse operand versions of VALU
+ // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
+ bool hasOnlyRevVALUShifts() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
+ bool hasBFE() const {
+ return true;
+ }
+
+ bool hasBFI() const {
+ return true;
+ }
+
+ bool hasBFM() const {
+ return hasBFE();
+ }
+
+ bool hasBCNT(unsigned Size) const {
+ return true;
+ }
+
+ bool hasFFBL() const {
+ return true;
+ }
+
+ bool hasFFBH() const {
+ return true;
+ }
+
+ bool hasMed3_16() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasMin3Max3_16() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasFmaMixInsts() const {
+ return HasFmaMixInsts;
+ }
+
+ bool hasCARRY() const {
+ return true;
+ }
+
+ bool hasFMA() const {
+ return FMA;
+ }
+
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
+
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
+ }
+
+ bool supportsGetDoorbellID() const {
+ // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
+ return getGeneration() >= GFX9;
+ }
+
+ /// True if the offset field of DS instructions works as expected. On SI, the
+ /// offset uses a 16-bit adder and does not always wrap properly.
+ bool hasUsableDSOffset() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
+ bool unsafeDSOffsetFoldingEnabled() const {
+ return EnableUnsafeDSOffsetFolding;
+ }
+
+ /// Condition output from div_scale is usable.
+ bool hasUsableDivScaleConditionOutput() const {
+ return getGeneration() != SOUTHERN_ISLANDS;
+ }
+
+ /// Extra wait hazard is needed in some cases before
+ /// s_cbranch_vccnz/s_cbranch_vccz.
+ bool hasReadVCCZBug() const {
+ return getGeneration() <= SEA_ISLANDS;
+ }
+
+ /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
+ bool partialVCCWritesUpdateVCCZ() const {
+ return getGeneration() >= GFX10;
+ }
+
+ /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
+ /// was written by a VALU instruction.
+ bool hasSMRDReadVALUDefHazard() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
+ /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
+ /// SGPR was written by a VALU Instruction.
+ bool hasVMEMReadSGPRVALUDefHazard() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasRFEHazards() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
+ unsigned getSetRegWaitStates() const {
+ return getGeneration() <= SEA_ISLANDS ? 1 : 2;
+ }
+
+ bool dumpCode() const {
+ return DumpCode;
+ }
+
+ /// Return the amount of LDS that can be used that will not restrict the
+ /// occupancy lower than WaveCount.
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
+
+ bool supportsMinMaxDenormModes() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns If target supports S_DENORM_MODE.
+ bool hasDenormModeInst() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX10;
+ }
+
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
+ }
+
+ /// \returns If target supports ds_read/write_b128 and user enables generation
+ /// of ds_read/write_b128.
+ bool useDS128() const {
+ return CIInsts && EnableDS128;
+ }
+
+ /// \return If target supports ds_read/write_b96/128.
+ bool hasDS96AndDS128() const {
+ return CIInsts;
+ }
+
+ /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
+ bool haveRoundOpsF64() const {
+ return CIInsts;
+ }
+
+ /// \returns If MUBUF instructions always perform range checking, even for
+ /// buffer resources used for private memory access.
+ bool privateMemoryResourceIsRangeChecked() const {
+ return getGeneration() < AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns If target requires PRT Struct NULL support (zero result registers
+ /// for sparse texture support).
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
+
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
+
+ bool hasUnalignedBufferAccess() const {
+ return UnalignedBufferAccess;
+ }
+
+ bool hasUnalignedBufferAccessEnabled() const {
+ return UnalignedBufferAccess && UnalignedAccessMode;
+ }
+
+ bool hasUnalignedDSAccess() const {
+ return UnalignedDSAccess;
+ }
+
+ bool hasUnalignedDSAccessEnabled() const {
+ return UnalignedDSAccess && UnalignedAccessMode;
+ }
+
+ bool hasUnalignedScratchAccess() const {
+ return UnalignedScratchAccess;
+ }
+
+ bool hasUnalignedAccessMode() const {
+ return UnalignedAccessMode;
+ }
+
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
+ bool isXNACKEnabled() const {
+ return TargetID.isXnackOnOrAny();
+ }
+
+ bool isTgSplitEnabled() const {
+ return EnableTgSplit;
+ }
+
+ bool isCuModeEnabled() const {
+ return EnableCuMode;
+ }
+
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
+ bool hasFlatScrRegister() const {
+ return hasFlatAddressSpace();
+ }
+
+ bool hasFlatInstOffsets() const {
+ return FlatInstOffsets;
+ }
+
+ bool hasFlatGlobalInsts() const {
+ return FlatGlobalInsts;
+ }
+
+ bool hasFlatScratchInsts() const {
+ return FlatScratchInsts;
+ }
+
+ // Check if target supports ST addressing mode with FLAT scratch instructions.
+ // The ST addressing mode means no registers are used, either VGPR or SGPR,
+ // but only immediate offset is swizzled and added to the FLAT scratch base.
+ bool hasFlatScratchSTMode() const {
+ return hasFlatScratchInsts() && hasGFX10_3Insts();
+ }
+
+ bool hasScalarFlatScratchInsts() const {
+ return ScalarFlatScratchInsts;
+ }
+
+ bool hasGlobalAddTidInsts() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasAtomicCSub() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasMultiDwordFlatScratchAddressing() const {
+ return getGeneration() >= GFX9;
+ }
+
+ bool hasFlatSegmentOffsetBug() const {
+ return HasFlatSegmentOffsetBug;
+ }
+
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
+
+ bool hasD16LoadStore() const {
+ return getGeneration() >= GFX9;
+ }
+
+ bool d16PreservesUnusedBits() const {
+ return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
+ }
+
+ bool hasD16Images() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ /// Return if most LDS instructions have an m0 use that require m0 to be
+ /// initialized.
+ bool ldsRequiresM0Init() const {
+ return getGeneration() < GFX9;
+ }
+
+ // True if the hardware rewinds and replays GWS operations if a wave is
+ // preempted.
+ //
+ // If this is false, a GWS operation requires testing if a nack set the
+ // MEM_VIOL bit, and repeating if so.
+ bool hasGWSAutoReplay() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns if target has ds_gws_sema_release_all instruction.
+ bool hasGWSSemaReleaseAll() const {
+ return CIInsts;
+ }
+
+ /// \returns true if the target has integer add/sub instructions that do not
+ /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
+ /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
+ /// for saturation.
+ bool hasAddNoCarry() const {
+ return AddNoCarryInsts;
+ }
+
+ bool hasUnpackedD16VMem() const {
+ return HasUnpackedD16VMem;
+ }
+
+ // Covers VS/PS/CS graphics shaders
+ bool isMesaGfxShader(const Function &F) const {
+ return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
+ }
+
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
+ bool hasSDWAOmod() const {
+ return HasSDWAOmod;
+ }
+
+ bool hasSDWAScalar() const {
+ return HasSDWAScalar;
+ }
+
+ bool hasSDWASdst() const {
+ return HasSDWASdst;
+ }
+
+ bool hasSDWAMac() const {
+ return HasSDWAMac;
+ }
+
+ bool hasSDWAOutModsVOPC() const {
+ return HasSDWAOutModsVOPC;
+ }
+
+ bool hasDLInsts() const {
+ return HasDLInsts;
+ }
+
+ bool hasDot1Insts() const {
+ return HasDot1Insts;
+ }
+
+ bool hasDot2Insts() const {
+ return HasDot2Insts;
+ }
+
+ bool hasDot3Insts() const {
+ return HasDot3Insts;
+ }
+
+ bool hasDot4Insts() const {
+ return HasDot4Insts;
+ }
+
+ bool hasDot5Insts() const {
+ return HasDot5Insts;
+ }
+
+ bool hasDot6Insts() const {
+ return HasDot6Insts;
+ }
+
+ bool hasDot7Insts() const {
+ return HasDot7Insts;
+ }
+
+ bool hasMAIInsts() const {
+ return HasMAIInsts;
+ }
+
+ bool hasPkFmacF16Inst() const {
+ return HasPkFmacF16Inst;
+ }
+
+ bool hasAtomicFaddInsts() const {
+ return HasAtomicFaddInsts;
+ }
+
+ bool hasNoSdstCMPX() const {
+ return HasNoSdstCMPX;
+ }
+
+ bool hasVscnt() const {
+ return HasVscnt;
+ }
+
+ bool hasGetWaveIdInst() const {
+ return HasGetWaveIdInst;
+ }
+
+ bool hasSMemTimeInst() const {
+ return HasSMemTimeInst;
+ }
+
+ bool hasShaderCyclesRegister() const {
+ return HasShaderCyclesRegister;
+ }
+
+ bool hasRegisterBanking() const {
+ return HasRegisterBanking;
+ }
+
+ bool hasVOP3Literal() const {
+ return HasVOP3Literal;
+ }
+
+ bool hasNoDataDepHazard() const {
+ return HasNoDataDepHazard;
+ }
+
+ bool vmemWriteNeedsExpWaitcnt() const {
+ return getGeneration() < SEA_ISLANDS;
+ }
+
+ // Scratch is allocated in 256 dword per wave blocks for the entire
+ // wavefront. When viewed from the perspective of an arbitrary workitem, this
+ // is 4-byte aligned.
+ //
+ // Only 4-byte alignment is really needed to access anything. Transformations
+ // on the pointer value itself may rely on the alignment / known low bits of
+ // the pointer. Set this to something above the minimum to avoid needing
+ // dynamic realignment in common cases.
+ Align getStackAlignment() const { return Align(16); }
+
+ bool enableMachineScheduler() const override {
+ return true;
+ }
+
+ bool useAA() const override;
+
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
+
+ // static wrappers
+ static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
+
+ // XXX - Why is this here if it isn't in the default pass set?
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
+
+ bool enableFlatScratch() const;
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
+
+ unsigned getMaxNumUserSGPRs() const {
+ return 16;
+ }
+
+ bool hasSMemRealTime() const {
+ return HasSMemRealTime;
+ }
+
+ bool hasMovrel() const {
+ return HasMovrel;
+ }
+
+ bool hasVGPRIndexMode() const {
+ return HasVGPRIndexMode;
+ }
+
+ bool useVGPRIndexMode() const;
+
+ bool hasScalarCompareEq64() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasScalarStores() const {
+ return HasScalarStores;
+ }
+
+ bool hasScalarAtomics() const {
+ return HasScalarAtomics;
+ }
+
+ bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
+
+ /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
+ bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
+
+ bool hasDPP() const {
+ return HasDPP;
+ }
+
+ bool hasDPPBroadcasts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPPWavefrontShifts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPP8() const {
+ return HasDPP8;
+ }
+
+ bool has64BitDPP() const {
+ return Has64BitDPP;
+ }
+
+ bool hasPackedFP32Ops() const {
+ return HasPackedFP32Ops;
+ }
+
+ bool hasFmaakFmamkF32Insts() const {
+ return getGeneration() >= GFX10;
+ }
+
+ bool hasExtendedImageInsts() const {
+ return HasExtendedImageInsts;
+ }
+
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
+ bool hasGFX10A16() const {
+ return HasGFX10A16;
+ }
+
+ bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
+
+ bool hasG16() const { return HasG16; }
+
+ bool hasOffset3fBug() const {
+ return HasOffset3fBug;
+ }
+
+ bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
+
+ bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
+
+ bool hasNSAEncoding() const { return HasNSAEncoding; }
+
+ unsigned getNSAMaxSize() const { return NSAMaxSize; }
+
+ bool hasGFX10_AEncoding() const {
+ return GFX10_AEncoding;
+ }
+
+ bool hasGFX10_BEncoding() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasGFX10_3Insts() const {
+ return GFX10_3Insts;
+ }
+
+ bool hasMadF16() const;
+
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
+ }
+
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
+ bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
+
+ bool hasNegativeUnalignedScratchOffsetBug() const {
+ return NegativeUnalignedScratchOffsetBug;
+ }
+
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
+
+ bool has12DWordStoreHazard() const {
+ return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
+ }
+
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
+ bool hasReadM0MovRelInterpHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0SendMsgHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ getGeneration() <= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasVcmpxPermlaneHazard() const {
+ return HasVcmpxPermlaneHazard;
+ }
+
+ bool hasVMEMtoScalarWriteHazard() const {
+ return HasVMEMtoScalarWriteHazard;
+ }
+
+ bool hasSMEMtoVectorWriteHazard() const {
+ return HasSMEMtoVectorWriteHazard;
+ }
+
+ bool hasLDSMisalignedBug() const {
+ return LDSMisalignedBug && !EnableCuMode;
+ }
+
+ bool hasInstFwdPrefetchBug() const {
+ return HasInstFwdPrefetchBug;
+ }
+
+ bool hasVcmpxExecWARHazard() const {
+ return HasVcmpxExecWARHazard;
+ }
+
+ bool hasLdsBranchVmemWARHazard() const {
+ return HasLdsBranchVmemWARHazard;
+ }
+
+ bool hasNSAtoVMEMBug() const {
+ return HasNSAtoVMEMBug;
+ }
+
+ bool hasNSAClauseBug() const { return HasNSAClauseBug; }
+
+ bool hasHardClauses() const { return getGeneration() >= GFX10; }
+
+ bool hasGFX90AInsts() const { return GFX90AInsts; }
+
+ /// Return if operations acting on VGPR tuples require even alignment.
+ bool needsAlignedVGPRs() const { return GFX90AInsts; }
+
+ bool hasPackedTID() const { return HasPackedTID; }
+
+ /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+ /// SGPRs
+ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
+
+ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+ /// VGPRs
+ unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+
+ /// Return occupancy for the given function. Used LDS and a number of
+ /// registers if provided.
+ /// Note, occupancy can be affected by the scratch allocation as well, but
+ /// we do not have enough information to compute it.
+ unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
+ unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ /// \returns true if the flat_scratch register is initialized by the HW.
+ /// In this case it is readonly.
+ bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
+
+ /// \returns true if the machine has merged shaders in which s0-s7 are
+ /// reserved by the hardware and user SGPRs start at s8
+ bool hasMergedShaders() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns SGPR allocation granularity supported by the subtarget.
+ unsigned getSGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
+ }
+
+ /// \returns SGPR encoding granularity supported by the subtarget.
+ unsigned getSGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
+ }
+
+ /// \returns Total number of SGPRs supported by the subtarget.
+ unsigned getTotalNumSGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
+ }
+
+ /// \returns Addressable number of SGPRs supported by the subtarget.
+ unsigned getAddressableNumSGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
+ }
+
+ /// \returns Minimum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
+ }
+
+ /// \returns Maximum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
+ }
+
+ /// \returns Reserved number of SGPRs. This is common
+ /// utility function called by MachineFunction and
+ /// Function variants of getReservedNumSGPRs.
+ unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
+ /// \returns Reserved number of SGPRs for given machine function \p MF.
+ unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Reserved number of SGPRs for given function \p F.
+ unsigned getReservedNumSGPRs(const Function &F) const;
+
+ /// \returns max num SGPRs. This is the common utility
+ /// function called by MachineFunction and Function
+ /// variants of getMaxNumSGPRs.
+ unsigned getBaseMaxNumSGPRs(const Function &F,
+ std::pair<unsigned, unsigned> WavesPerEU,
+ unsigned PreloadedSGPRs,
+ unsigned ReservedNumSGPRs) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p F, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const Function &F) const;
+
+ /// \returns VGPR allocation granularity supported by the subtarget.
+ unsigned getVGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
+ }
+
+ /// \returns VGPR encoding granularity supported by the subtarget.
+ unsigned getVGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
+ }
+
+ /// \returns Total number of VGPRs supported by the subtarget.
+ unsigned getTotalNumVGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
+ }
+
+ /// \returns Addressable number of VGPRs supported by the subtarget.
+ unsigned getAddressableNumVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
+ }
+
+ /// \returns Minimum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ }
+
+ /// \returns max num VGPRs. This is the common utility function
+ /// called by MachineFunction and Function variants of getMaxNumVGPRs.
+ unsigned getBaseMaxNumVGPRs(const Function &F,
+ std::pair<unsigned, unsigned> WavesPerEU) const;
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p F, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const Function &F) const;
+
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+
+ void getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
+ const override;
+
+ std::unique_ptr<ScheduleDAGMutation>
+ createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
+
+ bool isWave32() const {
+ return getWavefrontSize() == 32;
+ }
+
+ bool isWave64() const {
+ return getWavefrontSize() == 64;
+ }
+
+ const TargetRegisterClass *getBoolRC() const {
+ return getRegisterInfo()->getBoolRC();
+ }
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
+
+ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+ SDep &Dep) const override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H