diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 172 |
1 files changed, 102 insertions, 70 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 19a240800ba14..c833bfbcf9366 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUCallLowering.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600FrameLowering.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" @@ -24,6 +25,7 @@ #include "SIInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -65,8 +67,8 @@ private: protected: bool Has16BitInsts; bool HasMadMixInsts; - bool FP32Denormals; - bool FPExceptions; + bool HasMadMacF32Insts; + bool HasDsSrc2Insts; bool HasSDWA; bool HasVOP3PInsts; bool HasMulI24; @@ -77,7 +79,7 @@ protected: bool HasTrigReducedRange; unsigned MaxWavesPerEU; int LocalMemorySize; - unsigned WavefrontSize; + char WavefrontSizeLog2; public: AMDGPUSubtarget(const Triple &TT); @@ -140,6 +142,10 @@ public: return isAmdHsaOS() || isMesaKernel(F); } + bool isGCN() const { + return TargetTriple.getArch() == Triple::amdgcn; + } + bool has16BitInsts() const { return Has16BitInsts; } @@ -148,17 +154,12 @@ public: return HasMadMixInsts; } - bool hasFP32Denormals(const Function &F) const { - // FIXME: This should not be a property of the subtarget. This should be a - // property with a default set by the calling convention which can be - // overridden by attributes. For now, use the subtarget feature as a - // placeholder attribute. The function arguments only purpose is to - // discourage use without a function context until this is removed. - return FP32Denormals; + bool hasMadMacF32Insts() const { + return HasMadMacF32Insts || !isGCN(); } - bool hasFPExceptions() const { - return FPExceptions; + bool hasDsSrc2Insts() const { + return HasDsSrc2Insts; } bool hasSDWA() const { @@ -194,7 +195,11 @@ public: } unsigned getWavefrontSize() const { - return WavefrontSize; + return 1 << WavefrontSizeLog2; + } + + unsigned getWavefrontSizeLog2() const { + return WavefrontSizeLog2; } int getLocalMemorySize() const { @@ -221,9 +226,10 @@ public: /// \returns Maximum flat work group size supported by the subtarget. virtual unsigned getMaxFlatWorkGroupSize() const = 0; - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0; + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + virtual unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum number of waves per execution unit supported by the /// subtarget. @@ -246,6 +252,13 @@ public: uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; + /// \returns Corresponsing DWARF register number mapping flavour for the + /// \p WavefrontSize. + AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const { + return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 + : AMDGPUDwarfFlavour::Wave64; + } + virtual ~AMDGPUSubtarget() {} }; @@ -278,6 +291,7 @@ public: private: /// GlobalISel related APIs. std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; + std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; std::unique_ptr<InstructionSelector> InstSelector; std::unique_ptr<LegalizerInfo> Legalizer; std::unique_ptr<RegisterBankInfo> RegBankInfo; @@ -292,10 +306,10 @@ protected: // Possibly statically set by tablegen, but may want to be overridden. bool FastFMAF32; + bool FastDenormalF32; bool HalfRate64Ops; // Dynamially set bits that enable features. - bool FP64FP16Denormals; bool FlatForGlobal; bool AutoWaitcntBeforeBarrier; bool CodeObjectV3; @@ -325,6 +339,7 @@ protected: bool GFX8Insts; bool GFX9Insts; bool GFX10Insts; + bool GFX10_3Insts; bool GFX7GFX8GFX9Insts; bool SGPRInitBug; bool HasSMemRealTime; @@ -342,7 +357,10 @@ protected: bool HasDPP; bool HasDPP8; bool HasR128A16; + bool HasGFX10A16; + bool HasG16; bool HasNSAEncoding; + bool GFX10_BEncoding; bool HasDLInsts; bool HasDot1Insts; bool HasDot2Insts; @@ -357,6 +375,8 @@ protected: bool DoesNotSupportSRAMECC; bool HasNoSdstCMPX; bool HasVscnt; + bool HasGetWaveIdInst; + bool HasSMemTimeInst; bool HasRegisterBanking; bool HasVOP3Literal; bool HasNoDataDepHazard; @@ -426,6 +446,10 @@ public: return CallLoweringInfo.get(); } + const InlineAsmLowering *getInlineAsmLowering() const override { + return InlineAsmLoweringInfo.get(); + } + InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } @@ -453,10 +477,6 @@ public: return (Generation)Gen; } - unsigned getWavefrontSizeLog2() const { - return Log2_32(WavefrontSize); - } - /// Return the number of high bits known to be zero fror a frame index. unsigned getKnownHighZeroBitsForFrameIndex() const { return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); @@ -506,6 +526,10 @@ public: return getGeneration() >= VOLCANIC_ISLANDS; } + bool hasFractBug() const { + return getGeneration() == SOUTHERN_ISLANDS; + } + bool hasBFE() const { return true; } @@ -587,6 +611,11 @@ public: return getGeneration() <= SEA_ISLANDS; } + /// Writes to VCC_LO/VCC_HI update the VCCZ flag. + bool partialVCCWritesUpdateVCCZ() const { + return getGeneration() >= GFX10; + } + /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR /// was written by a VALU instruction. bool hasSMRDReadVALUDefHazard() const { @@ -617,20 +646,6 @@ public: unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const; - /// Alias for hasFP64FP16Denormals - bool hasFP16Denormals(const Function &F) const { - return FP64FP16Denormals; - } - - /// Alias for hasFP64FP16Denormals - bool hasFP64Denormals(const Function &F) const { - return FP64FP16Denormals; - } - - bool hasFP64FP16Denormals(const Function &F) const { - return FP64FP16Denormals; - } - bool supportsMinMaxDenormModes() const { return getGeneration() >= AMDGPUSubtarget::GFX9; } @@ -724,6 +739,18 @@ public: return ScalarFlatScratchInsts; } + bool hasGlobalAddTidInsts() const { + return GFX10_BEncoding; + } + + bool hasAtomicCSub() const { + return GFX10_BEncoding; + } + + bool hasMultiDwordFlatScratchAddressing() const { + return getGeneration() >= GFX9; + } + bool hasFlatSegmentOffsetBug() const { return HasFlatSegmentOffsetBug; } @@ -853,6 +880,14 @@ public: return HasVscnt; } + bool hasGetWaveIdInst() const { + return HasGetWaveIdInst; + } + + bool hasSMemTimeInst() const { + return HasSMemTimeInst; + } + bool hasRegisterBanking() const { return HasRegisterBanking; } @@ -890,30 +925,6 @@ public: void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } - /// \returns Number of execution units per compute unit supported by the - /// subtarget. - unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(this); - } - - /// \returns Maximum number of waves per compute unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(this); - } - - /// \returns Maximum number of waves per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); - } - - /// \returns Number of waves per work group supported by the subtarget and - /// limited by given \p FlatWorkGroupSize. - unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize); - } - // static wrappers static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); @@ -979,6 +990,14 @@ public: return HasR128A16; } + bool hasGFX10A16() const { + return HasGFX10A16; + } + + bool hasA16() const { return hasR128A16() || hasGFX10A16(); } + + bool hasG16() const { return HasG16; } + bool hasOffset3fBug() const { return HasOffset3fBug; } @@ -987,6 +1006,14 @@ public: return HasNSAEncoding; } + bool hasGFX10_BEncoding() const { + return GFX10_BEncoding; + } + + bool hasGFX10_3Insts() const { + return GFX10_3Insts; + } + bool hasMadF16() const; bool enableSIScheduler() const { @@ -1059,6 +1086,8 @@ public: return HasNSAtoVMEMBug; } + bool hasHardClauses() const { return getGeneration() >= GFX10; } + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs /// SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; @@ -1071,7 +1100,7 @@ public: /// registers if provided. /// Note, occupancy can be affected by the scratch allocation as well, but /// we do not have enough information to compute it. - unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0, + unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; /// \returns true if the flat_scratch register should be initialized with the @@ -1178,7 +1207,7 @@ public: const override; bool isWave32() const { - return WavefrontSize == 32; + return getWavefrontSize() == 32; } const TargetRegisterClass *getBoolRC() const { @@ -1201,10 +1230,11 @@ public: return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); } - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); } /// \returns Minimum number of waves per execution unit supported by the @@ -1213,7 +1243,8 @@ public: return AMDGPU::IsaInfo::getMinWavesPerEU(this); } - void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override; + void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, + SDep &Dep) const override; }; class R600Subtarget final : public R600GenSubtargetInfo, @@ -1338,10 +1369,11 @@ public: return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); } - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); } /// \returns Minimum number of waves per execution unit supported by the |