diff options
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 410 |
1 files changed, 343 insertions, 67 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 54c866bdc63c..e90f40e6abea 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1,9 +1,8 @@ //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -11,6 +10,7 @@ #include "AMDGPUTargetTransformInfo.h" #include "AMDGPU.h" #include "SIDefines.h" +#include "AMDGPUAsmUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" @@ -85,7 +85,9 @@ unsigned getExpcntBitWidth() { return 3; } unsigned getLgkmcntBitShift() { return 8; } /// \returns Lgkmcnt bit width. -unsigned getLgkmcntBitWidth() { return 4; } +unsigned getLgkmcntBitWidth(unsigned VersionMajor) { + return (VersionMajor >= 10) ? 6 : 4; +} /// \returns Vmcnt bit shift (higher bits). unsigned getVmcntBitShiftHi() { return 14; } @@ -99,18 +101,11 @@ namespace llvm { namespace AMDGPU { -struct MIMGInfo { - uint16_t Opcode; - uint16_t BaseOpcode; - uint8_t MIMGEncoding; - uint8_t VDataDwords; - uint8_t VAddrDwords; -}; - #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL +#define GET_MIMGMIPMappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -120,6 +115,11 @@ int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, return Info ? Info->Opcode : -1; } +const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { + const MIMGInfo *Info = getMIMGInfo(Opc); + return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; +} + int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { const MIMGInfo *OrigInfo = getMIMGInfo(Opc); const MIMGInfo *NewInfo = @@ -230,7 +230,8 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) { unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - if (!STI->getFeatureBits().test(FeatureGCN)) + assert(FlatWorkGroupSize != 0); + if (STI->getTargetTriple().getArch() != Triple::amdgcn) return 8; unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); if (N == 1) @@ -279,6 +280,8 @@ unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return getAddressableNumSGPRs(STI); if (Version.Major >= 8) return 16; return 8; @@ -300,6 +303,8 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { return FIXED_NUM_SGPRS_FOR_INIT_BUG; IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return 106; if (Version.Major >= 8) return 102; return 104; @@ -308,6 +313,10 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); + IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return 0; + if (WavesPerEU >= getMaxWavesPerEU()) return 0; @@ -322,8 +331,10 @@ unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(STI->getCPU()); unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); + IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return Addressable ? AddressableNumSGPRs : 108; if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; @@ -340,6 +351,9 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, ExtraSGPRs = 2; IsaVersion Version = getIsaVersion(STI->getCPU()); + if (Version.Major >= 10) + return ExtraSGPRs; + if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -366,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { return NumSGPRs / getSGPREncodingGranule(STI) - 1; } -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) { - return 4; +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, + Optional<bool> EnableWavefrontSize32) { + bool IsWave32 = EnableWavefrontSize32 ? + *EnableWavefrontSize32 : + STI->getFeatureBits().test(FeatureWavefrontSize32); + return IsWave32 ? 8 : 4; } -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) { - return getVGPRAllocGranule(STI); +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, + Optional<bool> EnableWavefrontSize32) { + return getVGPRAllocGranule(STI, EnableWavefrontSize32); } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { @@ -402,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI)); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + Optional<bool> EnableWavefrontSize32) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), + getVGPREncodingGranule(STI, EnableWavefrontSize32)); // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI) - 1; + return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; } } // end namespace IsaInfo @@ -423,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.amd_machine_version_minor = Version.Minor; Header.amd_machine_version_stepping = Version.Stepping; Header.kernel_code_entry_byte_offset = sizeof(Header); - // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; // If the code object does not support indirect functions, then the value must @@ -435,11 +455,25 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.kernarg_segment_alignment = 4; Header.group_segment_alignment = 4; Header.private_segment_alignment = 4; + + if (Version.Major >= 10) { + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { + Header.wavefront_size = 5; + Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; + } + Header.compute_pgm_resource_registers |= + S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | + S_00B848_MEM_ORDERED(1); + } } -amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() { +amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( + const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); + amdhsa::kernel_descriptor_t KD; memset(&KD, 0, sizeof(KD)); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); @@ -449,6 +483,16 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() { amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); + if (Version.Major >= 10) { + AMDHSA_BITS_SET(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, + STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); + } return KD; } @@ -523,13 +567,14 @@ unsigned getExpcntBitMask(const IsaVersion &Version) { } unsigned getLgkmcntBitMask(const IsaVersion &Version) { - return (1 << getLgkmcntBitWidth()) - 1; + return (1 << getLgkmcntBitWidth(Version.Major)) - 1; } unsigned getWaitcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; if (Version.Major < 9) return Waitcnt; @@ -555,7 +600,8 @@ unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { } unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); + return unpackBits(Waitcnt, getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); } void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, @@ -591,7 +637,8 @@ unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { - return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), + getLgkmcntBitWidth(Version.Major)); } unsigned encodeWaitcnt(const IsaVersion &Version, @@ -607,6 +654,181 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); } +//===----------------------------------------------------------------------===// +// hwreg +//===----------------------------------------------------------------------===// + +namespace Hwreg { + +int64_t getHwregId(const StringRef Name) { + for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { + if (IdSymbolic[Id] && Name == IdSymbolic[Id]) + return Id; + } + return ID_UNKNOWN_; +} + +static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { + if (isSI(STI) || isCI(STI) || isVI(STI)) + return ID_SYMBOLIC_FIRST_GFX9_; + else if (isGFX9(STI)) + return ID_SYMBOLIC_FIRST_GFX10_; + else + return ID_SYMBOLIC_LAST_; +} + +bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { + return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && + IdSymbolic[Id]; +} + +bool isValidHwreg(int64_t Id) { + return 0 <= Id && isUInt<ID_WIDTH_>(Id); +} + +bool isValidHwregOffset(int64_t Offset) { + return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); +} + +bool isValidHwregWidth(int64_t Width) { + return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); +} + +uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { + return (Id << ID_SHIFT_) | + (Offset << OFFSET_SHIFT_) | + ((Width - 1) << WIDTH_M1_SHIFT_); +} + +StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { + return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; +} + +void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { + Id = (Val & ID_MASK_) >> ID_SHIFT_; + Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; + Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; +} + +} // namespace Hwreg + +//===----------------------------------------------------------------------===// +// SendMsg +//===----------------------------------------------------------------------===// + +namespace SendMsg { + +int64_t getMsgId(const StringRef Name) { + for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { + if (IdSymbolic[i] && Name == IdSymbolic[i]) + return i; + } + return ID_UNKNOWN_; +} + +static bool isValidMsgId(int64_t MsgId) { + return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId]; +} + +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { + if (Strict) { + if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL) + return isGFX9(STI) || isGFX10(STI); + else + return isValidMsgId(MsgId); + } else { + return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId); + } +} + +StringRef getMsgName(int64_t MsgId) { + return isValidMsgId(MsgId)? IdSymbolic[MsgId] : ""; +} + +int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { + const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; + const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; + const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; + for (int i = F; i < L; ++i) { + if (Name == S[i]) { + return i; + } + } + return OP_UNKNOWN_; +} + +bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) { + + if (!Strict) + return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); + + switch(MsgId) + { + case ID_GS: + return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; + case ID_GS_DONE: + return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; + case ID_SYSMSG: + return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; + default: + return OpId == OP_NONE_; + } +} + +StringRef getMsgOpName(int64_t MsgId, int64_t OpId) { + assert(msgRequiresOp(MsgId)); + return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; +} + +bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) { + + if (!Strict) + return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); + + switch(MsgId) + { + case ID_GS: + return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; + case ID_GS_DONE: + return (OpId == OP_GS_NOP)? + (StreamId == STREAM_ID_NONE_) : + (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); + default: + return StreamId == STREAM_ID_NONE_; + } +} + +bool msgRequiresOp(int64_t MsgId) { + return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; +} + +bool msgSupportsStream(int64_t MsgId, int64_t OpId) { + return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; +} + +void decodeMsg(unsigned Val, + uint16_t &MsgId, + uint16_t &OpId, + uint16_t &StreamId) { + MsgId = Val & ID_MASK_; + OpId = (Val & OP_MASK_) >> OP_SHIFT_; + StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; +} + +uint64_t encodeMsg(uint64_t MsgId, + uint64_t OpId, + uint64_t StreamId) { + return (MsgId << ID_SHIFT_) | + (OpId << OP_SHIFT_) | + (StreamId << STREAM_ID_SHIFT_); +} + +} // namespace SendMsg + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + unsigned getInitialPSInputAddr(const Function &F) { return getIntegerAttribute(F, "InitialPSInputAddr", 0); } @@ -679,6 +901,10 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGFX10(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; +} + bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; } @@ -704,46 +930,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { CASE_CI_VI(FLAT_SCR) \ CASE_CI_VI(FLAT_SCR_LO) \ CASE_CI_VI(FLAT_SCR_HI) \ - CASE_VI_GFX9(TTMP0) \ - CASE_VI_GFX9(TTMP1) \ - CASE_VI_GFX9(TTMP2) \ - CASE_VI_GFX9(TTMP3) \ - CASE_VI_GFX9(TTMP4) \ - CASE_VI_GFX9(TTMP5) \ - CASE_VI_GFX9(TTMP6) \ - CASE_VI_GFX9(TTMP7) \ - CASE_VI_GFX9(TTMP8) \ - CASE_VI_GFX9(TTMP9) \ - CASE_VI_GFX9(TTMP10) \ - CASE_VI_GFX9(TTMP11) \ - CASE_VI_GFX9(TTMP12) \ - CASE_VI_GFX9(TTMP13) \ - CASE_VI_GFX9(TTMP14) \ - CASE_VI_GFX9(TTMP15) \ - CASE_VI_GFX9(TTMP0_TTMP1) \ - CASE_VI_GFX9(TTMP2_TTMP3) \ - CASE_VI_GFX9(TTMP4_TTMP5) \ - CASE_VI_GFX9(TTMP6_TTMP7) \ - CASE_VI_GFX9(TTMP8_TTMP9) \ - CASE_VI_GFX9(TTMP10_TTMP11) \ - CASE_VI_GFX9(TTMP12_TTMP13) \ - CASE_VI_GFX9(TTMP14_TTMP15) \ - CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ - CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ - CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ - CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ - CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ - CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ - CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ - CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9_GFX10(TTMP0) \ + CASE_VI_GFX9_GFX10(TTMP1) \ + CASE_VI_GFX9_GFX10(TTMP2) \ + CASE_VI_GFX9_GFX10(TTMP3) \ + CASE_VI_GFX9_GFX10(TTMP4) \ + CASE_VI_GFX9_GFX10(TTMP5) \ + CASE_VI_GFX9_GFX10(TTMP6) \ + CASE_VI_GFX9_GFX10(TTMP7) \ + CASE_VI_GFX9_GFX10(TTMP8) \ + CASE_VI_GFX9_GFX10(TTMP9) \ + CASE_VI_GFX9_GFX10(TTMP10) \ + CASE_VI_GFX9_GFX10(TTMP11) \ + CASE_VI_GFX9_GFX10(TTMP12) \ + CASE_VI_GFX9_GFX10(TTMP13) \ + CASE_VI_GFX9_GFX10(TTMP14) \ + CASE_VI_GFX9_GFX10(TTMP15) \ + CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \ + CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \ + CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \ + CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \ + CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \ + CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \ + CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \ + CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \ + CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \ + CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ } #define CASE_CI_VI(node) \ assert(!isSI(STI)); \ case node: return isCI(STI) ? node##_ci : node##_vi; -#define CASE_VI_GFX9(node) \ - case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; +#define CASE_VI_GFX9_GFX10(node) \ + case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi; unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { if (STI.getTargetTriple().getArch() == Triple::r600) @@ -752,17 +978,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { } #undef CASE_CI_VI -#undef CASE_VI_GFX9 +#undef CASE_VI_GFX9_GFX10 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; -#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; +#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node; unsigned mc2PseudoReg(unsigned Reg) { MAP_REG2REG } #undef CASE_CI_VI -#undef CASE_VI_GFX9 +#undef CASE_VI_GFX9_GFX10 #undef MAP_REG2REG bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { @@ -779,10 +1005,17 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: return true; default: return false; @@ -802,28 +1035,46 @@ unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VRegOrLds_32RegClassID: + case AMDGPU::AGPR_32RegClassID: case AMDGPU::VS_32RegClassID: + case AMDGPU::AV_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: + case AMDGPU::SRegOrLds_32RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: + case AMDGPU::AV_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: + case AMDGPU::AReg_64RegClassID: case AMDGPU::SReg_64_XEXECRegClassID: return 64; + case AMDGPU::SGPR_96RegClassID: + case AMDGPU::SReg_96RegClassID: case AMDGPU::VReg_96RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: case AMDGPU::VReg_128RegClassID: + case AMDGPU::AReg_128RegClassID: return 128; + case AMDGPU::SGPR_160RegClassID: + case AMDGPU::SReg_160RegClassID: + case AMDGPU::VReg_160RegClassID: + return 160; case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: return 256; case AMDGPU::SReg_512RegClassID: case AMDGPU::VReg_512RegClassID: + case AMDGPU::AReg_512RegClassID: return 512; + case AMDGPU::SReg_1024RegClassID: + case AMDGPU::VReg_1024RegClassID: + case AMDGPU::AReg_1024RegClassID: + return 1024; default: llvm_unreachable("Unexpected register class"); } @@ -905,6 +1156,13 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { assert(HasInv2Pi); + if (isInt<16>(Literal) || isUInt<16>(Literal)) { + int16_t Trunc = static_cast<int16_t>(Literal); + return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); + } + if (!(Literal & 0xffff)) + return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); + int16_t Lo16 = static_cast<int16_t>(Literal); int16_t Hi16 = static_cast<int16_t>(Literal >> 16); return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); @@ -936,15 +1194,19 @@ bool isArgPassedInSGPR(const Argument *A) { } } +static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { + return isGCN3Encoding(ST) || isGFX10(ST); +} + int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - if (isGCN3Encoding(ST)) + if (hasSMEMByteOffset(ST)) return ByteOffset; return ByteOffset >> 2; } bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return isGCN3Encoding(ST) ? + return (hasSMEMByteOffset(ST)) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } @@ -994,6 +1256,19 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, return true; } +SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { + *this = getDefaultForCallingConv(F.getCallingConv()); + + StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); + if (!IEEEAttr.empty()) + IEEE = IEEEAttr == "true"; + + StringRef DX10ClampAttr + = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); + if (!DX10ClampAttr.empty()) + DX10Clamp = DX10ClampAttr == "true"; +} + namespace { struct SourceOfDivergence { @@ -1009,5 +1284,6 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); bool isIntrinsicSourceOfDivergence(unsigned IntrID) { return lookupSourceOfDivergence(IntrID); } + } // namespace AMDGPU } // namespace llvm |