diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 119 |
1 files changed, 84 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index 99a01ca3a2fd..d078fc147a36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -8,6 +8,8 @@ #include "AMDGPU.h" #include "AMDGPUArgumentUsageInfo.h" +#include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/raw_ostream.h" @@ -43,6 +45,10 @@ char AMDGPUArgumentUsageInfo::ID = 0; const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{}; +// Hardcoded registers from fixed function ABI +const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo + = AMDGPUFunctionArgInfo::fixedABILayout(); + bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) { return false; } @@ -77,59 +83,102 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const { } } -std::pair<const ArgDescriptor *, const TargetRegisterClass *> +std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> AMDGPUFunctionArgInfo::getPreloadedValue( - AMDGPUFunctionArgInfo::PreloadedValue Value) const { + AMDGPUFunctionArgInfo::PreloadedValue Value) const { switch (Value) { case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: { - return std::make_pair( - PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr, - &AMDGPU::SGPR_128RegClass); + return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer + : nullptr, + &AMDGPU::SGPR_128RegClass, LLT::vector(4, 32)); } case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR: - return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr, + &AMDGPU::SGPR_64RegClass, + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: - return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr, - &AMDGPU::SGPR_32RegClass); - + return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y: - return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr, - &AMDGPU::SGPR_32RegClass); + return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: - return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, - &AMDGPU::SGPR_32RegClass); + return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: - return std::make_pair( - PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr, - &AMDGPU::SGPR_32RegClass); + return std::make_tuple( + PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR: - return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr, + &AMDGPU::SGPR_64RegClass, + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR: - return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr, + &AMDGPU::SGPR_64RegClass, + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); case AMDGPUFunctionArgInfo::DISPATCH_ID: - return std::make_pair(DispatchID ? &DispatchID : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(DispatchID ? &DispatchID : nullptr, + &AMDGPU::SGPR_64RegClass, LLT::scalar(64)); case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT: - return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr, + &AMDGPU::SGPR_64RegClass, LLT::scalar(64)); case AMDGPUFunctionArgInfo::DISPATCH_PTR: - return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr, + &AMDGPU::SGPR_64RegClass, + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); case AMDGPUFunctionArgInfo::QUEUE_PTR: - return std::make_pair(QueuePtr ? &QueuePtr : nullptr, - &AMDGPU::SGPR_64RegClass); + return std::make_tuple(QueuePtr ? &QueuePtr : nullptr, + &AMDGPU::SGPR_64RegClass, + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); case AMDGPUFunctionArgInfo::WORKITEM_ID_X: - return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr, - &AMDGPU::VGPR_32RegClass); + return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr, + &AMDGPU::VGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::WORKITEM_ID_Y: - return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr, - &AMDGPU::VGPR_32RegClass); + return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr, + &AMDGPU::VGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::WORKITEM_ID_Z: - return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr, - &AMDGPU::VGPR_32RegClass); + return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr, + &AMDGPU::VGPR_32RegClass, LLT::scalar(32)); } llvm_unreachable("unexpected preloaded value type"); } + +constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() { + AMDGPUFunctionArgInfo AI; + AI.PrivateSegmentBuffer + = ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3); + AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5); + AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7); + + // Do not pass kernarg segment pointer, only pass increment version in its + // place. + AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9); + AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11); + + // Skip FlatScratchInit/PrivateSegmentSize + AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12); + AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13); + AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14); + + const unsigned Mask = 0x3ff; + AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask); + AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10); + AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20); + return AI; +} + +const AMDGPUFunctionArgInfo & +AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const { + auto I = ArgInfoMap.find(&F); + if (I == ArgInfoMap.end()) { + if (AMDGPUTargetMachine::EnableFixedFunctionABI) + return FixedABIFunctionInfo; + + // Without the fixed ABI, we assume no function has special inputs. + assert(F.isDeclaration()); + return ExternFunctionInfo; + } + + return I->second; +} |