diff options
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 268 |
1 files changed, 193 insertions, 75 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 67ad904ca9723..819a7add0be4e 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -39,7 +40,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #define GET_INSTRINFO_NAMED_OPS +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO #undef GET_INSTRINFO_NAMED_OPS namespace { @@ -100,15 +103,76 @@ static cl::opt<bool> EnablePackedInlinableLiterals( namespace AMDGPU { +LLVM_READNONE +static inline Channels indexToChannel(unsigned Channel) { + switch (Channel) { + case 1: + return AMDGPU::Channels_1; + case 2: + return AMDGPU::Channels_2; + case 3: + return AMDGPU::Channels_3; + case 4: + return AMDGPU::Channels_4; + default: + llvm_unreachable("invalid MIMG channel"); + } +} + + +// FIXME: Need to handle d16 images correctly. +static unsigned rcToChannels(unsigned RCID) { + switch (RCID) { + case AMDGPU::VGPR_32RegClassID: + return 1; + case AMDGPU::VReg_64RegClassID: + return 2; + case AMDGPU::VReg_96RegClassID: + return 3; + case AMDGPU::VReg_128RegClassID: + return 4; + default: + llvm_unreachable("invalid MIMG register class"); + } +} + +int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + if (NewChannels == OrigChannels) + return Opc; + + switch (OrigChannels) { + case 1: + return AMDGPU::getMaskedMIMGOp1(Opc, Channel); + case 2: + return AMDGPU::getMaskedMIMGOp2(Opc, Channel); + case 3: + return AMDGPU::getMaskedMIMGOp3(Opc, Channel); + case 4: + return AMDGPU::getMaskedMIMGOp4(Opc, Channel); + default: + llvm_unreachable("invalid MIMG channel"); + } +} + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); +} + namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { - // SI. + // GCN GFX6 (Southern Islands (SI)). if (Features.test(FeatureISAVersion6_0_0)) return {6, 0, 0}; if (Features.test(FeatureISAVersion6_0_1)) return {6, 0, 1}; - // CI. + + // GCN GFX7 (Sea Islands (CI)). if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; if (Features.test(FeatureISAVersion7_0_1)) @@ -117,8 +181,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 2}; if (Features.test(FeatureISAVersion7_0_3)) return {7, 0, 3}; + if (Features.test(FeatureISAVersion7_0_4)) + return {7, 0, 4}; - // VI. + // GCN GFX8 (Volcanic Islands (VI)). if (Features.test(FeatureISAVersion8_0_0)) return {8, 0, 0}; if (Features.test(FeatureISAVersion8_0_1)) @@ -127,26 +193,39 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {8, 0, 2}; if (Features.test(FeatureISAVersion8_0_3)) return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_0_4)) - return {8, 0, 4}; if (Features.test(FeatureISAVersion8_1_0)) return {8, 1, 0}; - // GFX9. + // GCN GFX9. if (Features.test(FeatureISAVersion9_0_0)) return {9, 0, 0}; - if (Features.test(FeatureISAVersion9_0_1)) - return {9, 0, 1}; if (Features.test(FeatureISAVersion9_0_2)) return {9, 0, 2}; - if (Features.test(FeatureISAVersion9_0_3)) - return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; return {7, 0, 0}; } +void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { + auto TargetTriple = STI->getTargetTriple(); + auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); + + Stream << TargetTriple.getArchName() << '-' + << TargetTriple.getVendorName() << '-' + << TargetTriple.getOSName() << '-' + << TargetTriple.getEnvironmentName() << '-' + << "gfx" + << ISAVersion.Major + << ISAVersion.Minor + << ISAVersion.Stepping; + Stream.flush(); +} + +bool hasCodeObjectV3(const FeatureBitset &Features) { + return Features.test(FeatureCodeObjectV3); +} + unsigned getWavefrontSize(const FeatureBitset &Features) { if (Features.test(FeatureWavefrontSize16)) return 16; @@ -337,16 +416,16 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { - return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { - return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { - return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -486,7 +565,9 @@ unsigned getInitialPSInputAddr(const Function &F) { bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -508,7 +589,9 @@ bool isEntryFunctionCC(CallingConv::ID CC) { case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_LS: return true; default: return false; @@ -531,6 +614,10 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGCN3Encoding(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); @@ -545,44 +632,68 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { return false; } -unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { +#define MAP_REG2REG \ + using namespace AMDGPU; \ + switch(Reg) { \ + default: return Reg; \ + CASE_CI_VI(FLAT_SCR) \ + CASE_CI_VI(FLAT_SCR_LO) \ + CASE_CI_VI(FLAT_SCR_HI) \ + CASE_VI_GFX9(TTMP0) \ + CASE_VI_GFX9(TTMP1) \ + CASE_VI_GFX9(TTMP2) \ + CASE_VI_GFX9(TTMP3) \ + CASE_VI_GFX9(TTMP4) \ + CASE_VI_GFX9(TTMP5) \ + CASE_VI_GFX9(TTMP6) \ + CASE_VI_GFX9(TTMP7) \ + CASE_VI_GFX9(TTMP8) \ + CASE_VI_GFX9(TTMP9) \ + CASE_VI_GFX9(TTMP10) \ + CASE_VI_GFX9(TTMP11) \ + CASE_VI_GFX9(TTMP12) \ + CASE_VI_GFX9(TTMP13) \ + CASE_VI_GFX9(TTMP14) \ + CASE_VI_GFX9(TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1) \ + CASE_VI_GFX9(TTMP2_TTMP3) \ + CASE_VI_GFX9(TTMP4_TTMP5) \ + CASE_VI_GFX9(TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP8_TTMP9) \ + CASE_VI_GFX9(TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP12_TTMP13) \ + CASE_VI_GFX9(TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ + CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ + } - switch(Reg) { - default: break; - case AMDGPU::FLAT_SCR: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; +#define CASE_CI_VI(node) \ + assert(!isSI(STI)); \ + case node: return isCI(STI) ? node##_ci : node##_vi; - case AMDGPU::FLAT_SCR_LO: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; +#define CASE_VI_GFX9(node) \ + case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; - case AMDGPU::FLAT_SCR_HI: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; - } - return Reg; +unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { + MAP_REG2REG } -unsigned mc2PseudoReg(unsigned Reg) { - switch (Reg) { - case AMDGPU::FLAT_SCR_ci: - case AMDGPU::FLAT_SCR_vi: - return FLAT_SCR; +#undef CASE_CI_VI +#undef CASE_VI_GFX9 - case AMDGPU::FLAT_SCR_LO_ci: - case AMDGPU::FLAT_SCR_LO_vi: - return AMDGPU::FLAT_SCR_LO; +#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; +#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; - case AMDGPU::FLAT_SCR_HI_ci: - case AMDGPU::FLAT_SCR_HI_vi: - return AMDGPU::FLAT_SCR_HI; - - default: - return Reg; - } +unsigned mc2PseudoReg(unsigned Reg) { + MAP_REG2REG } +#undef CASE_CI_VI +#undef CASE_VI_GFX9 +#undef MAP_REG2REG + bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.OpInfo[OpNo].OperandType; @@ -730,59 +841,66 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); } +bool isArgPassedInSGPR(const Argument *A) { + const Function *F = A->getParent(); + + // Arguments to compute shaders are never a source of divergence. + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + // For non-compute shaders, SGPR inputs are marked with either inreg or byval. + // Everything else is in VGPRs. + return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || + F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); + default: + // TODO: Should calls support inreg for SGPR inputs? + return false; + } +} + +// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. bool isUniformMMO(const MachineMemOperand *MMO) { const Value *Ptr = MMO->getValue(); // UndefValue means this is a load of a kernel input. These are uniform. // Sometimes LDS instructions have constant pointers. // If Ptr is null, then that means this mem operand contains a // PseudoSourceValue like GOT. - if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || + if (!Ptr || isa<UndefValue>(Ptr) || isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) return true; + if (const Argument *Arg = dyn_cast<Argument>(Ptr)) + return isArgPassedInSGPR(Arg); + const Instruction *I = dyn_cast<Instruction>(Ptr); return I && I->getMetadata("amdgpu.uniform"); } int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - if (isSI(ST) || isCI(ST)) - return ByteOffset >> 2; - - return ByteOffset; + if (isGCN3Encoding(ST)) + return ByteOffset; + return ByteOffset >> 2; } bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : - isUInt<20>(EncodedOffset); + return isGCN3Encoding(ST) ? + isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } + } // end namespace AMDGPU } // end namespace llvm -const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; -const unsigned AMDGPUAS::GLOBAL_ADDRESS; -const unsigned AMDGPUAS::LOCAL_ADDRESS; -const unsigned AMDGPUAS::PARAM_D_ADDRESS; -const unsigned AMDGPUAS::PARAM_I_ADDRESS; -const unsigned AMDGPUAS::CONSTANT_BUFFER_0; -const unsigned AMDGPUAS::CONSTANT_BUFFER_1; -const unsigned AMDGPUAS::CONSTANT_BUFFER_2; -const unsigned AMDGPUAS::CONSTANT_BUFFER_3; -const unsigned AMDGPUAS::CONSTANT_BUFFER_4; -const unsigned AMDGPUAS::CONSTANT_BUFFER_5; -const unsigned AMDGPUAS::CONSTANT_BUFFER_6; -const unsigned AMDGPUAS::CONSTANT_BUFFER_7; -const unsigned AMDGPUAS::CONSTANT_BUFFER_8; -const unsigned AMDGPUAS::CONSTANT_BUFFER_9; -const unsigned AMDGPUAS::CONSTANT_BUFFER_10; -const unsigned AMDGPUAS::CONSTANT_BUFFER_11; -const unsigned AMDGPUAS::CONSTANT_BUFFER_12; -const unsigned AMDGPUAS::CONSTANT_BUFFER_13; -const unsigned AMDGPUAS::CONSTANT_BUFFER_14; -const unsigned AMDGPUAS::CONSTANT_BUFFER_15; -const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; - namespace llvm { namespace AMDGPU { |