aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
commit0b57cec536236d46e3dba9bd041533462f33dbb7 (patch)
tree56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
parent718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff)
Notes
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp465
1 files changed, 0 insertions, 465 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
deleted file mode 100644
index b107c357196d7..0000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ /dev/null
@@ -1,465 +0,0 @@
-//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file implements the lowering of LLVM calls to machine code calls for
-/// GlobalISel.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUCallLowering.h"
-#include "AMDGPU.h"
-#include "AMDGPUISelLowering.h"
-#include "AMDGPUSubtarget.h"
-#include "SIISelLowering.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-
-using namespace llvm;
-
-namespace {
-
-struct OutgoingArgHandler : public CallLowering::ValueHandler {
- OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
-
- MachineInstrBuilder MIB;
-
- Register getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) override {
- llvm_unreachable("not implemented");
- }
-
- void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("not implemented");
- }
-
- void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
- MIB.addUse(PhysReg);
- MIRBuilder.buildCopy(PhysReg, ValVReg);
- }
-
- bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- const CallLowering::ArgInfo &Info,
- CCState &State) override {
- return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
- }
-};
-
-}
-
-AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
- : CallLowering(&TLI) {
-}
-
-bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val,
- ArrayRef<Register> VRegs) const {
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- MFI->setIfReturnsVoid(!Val);
-
- if (!Val) {
- MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
- return true;
- }
-
- Register VReg = VRegs[0];
-
- const Function &F = MF.getFunction();
- auto &DL = F.getParent()->getDataLayout();
- if (!AMDGPU::isShader(F.getCallingConv()))
- return false;
-
-
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
- SmallVector<EVT, 4> SplitVTs;
- SmallVector<uint64_t, 4> Offsets;
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
- ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
-
- SmallVector<ArgInfo, 8> SplitArgs;
- CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
- for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
- Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
- SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
- }
- auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
- OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
- if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
- return false;
- MIRBuilder.insertInstr(RetInstr);
-
- return true;
-}
-
-Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
- Type *ParamTy,
- uint64_t Offset) const {
-
- MachineFunction &MF = MIRBuilder.getMF();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
- LLT PtrType = getLLTForType(*PtrTy, DL);
- Register DstReg = MRI.createGenericVirtualRegister(PtrType);
- Register KernArgSegmentPtr =
- MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
-
- Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- MIRBuilder.buildConstant(OffsetReg, Offset);
-
- MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
-
- return DstReg;
-}
-
-void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
- Type *ParamTy, uint64_t Offset,
- unsigned Align,
- Register DstReg) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
- MachineMemOperand::MONonTemporal |
- MachineMemOperand::MOInvariant,
- TypeSize, Align);
-
- MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
-}
-
-static Register findFirstFreeSGPR(CCState &CCInfo) {
- unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
- if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
- return AMDGPU::SGPR0 + Reg;
- }
- }
- llvm_unreachable("Cannot allocate sgpr");
-}
-
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkItemIDX()) {
- Register Reg = AMDGPU::VGPR0;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDY()) {
- Register Reg = AMDGPU::VGPR1;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDZ()) {
- Register Reg = AMDGPU::VGPR2;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
- }
-}
-
-// Allocate special inputs passed in user SGPRs.
-static void allocateHSAUserSGPRs(CCState &CCInfo,
- MachineIRBuilder &MIRBuilder,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
- // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info.hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
- CCInfo.AllocateReg(PrivateSegmentBufferReg);
- }
-
- if (Info.hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
- MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(DispatchPtrReg);
- }
-
- if (Info.hasQueuePtr()) {
- unsigned QueuePtrReg = Info.addQueuePtr(TRI);
- MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(QueuePtrReg);
- }
-
- if (Info.hasKernargSegmentPtr()) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
- const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
- Register VReg = MRI.createGenericVirtualRegister(P4);
- MRI.addLiveIn(InputPtrReg, VReg);
- MIRBuilder.getMBB().addLiveIn(InputPtrReg);
- MIRBuilder.buildCopy(VReg, InputPtrReg);
- CCInfo.AllocateReg(InputPtrReg);
- }
-
- if (Info.hasDispatchID()) {
- unsigned DispatchIDReg = Info.addDispatchID(TRI);
- MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(DispatchIDReg);
- }
-
- if (Info.hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
- MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(FlatScratchInitReg);
- }
-
- // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
- // these from the dispatch pointer.
-}
-
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- unsigned Reg = Info.addWorkGroupIDZ();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupInfo()) {
- unsigned Reg = Info.addWorkGroupInfo();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasPrivateSegmentWaveByteOffset()) {
- // Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg;
-
- if (IsShader) {
- PrivateSegmentWaveByteOffsetReg =
- Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
-
- // This is true if the scratch wave byte offset doesn't have a fixed
- // location.
- if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
- PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
- }
- } else
- PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
-
- MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
- CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
- }
-}
-
-bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
- const DataLayout &DL = F.getParent()->getDataLayout();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
-
- allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
-
- unsigned i = 0;
- const unsigned KernArgBaseAlign = 16;
- const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
- uint64_t ExplicitArgOffset = 0;
-
- // TODO: Align down to dword alignment and extract bits for extending loads.
- for (auto &Arg : F.args()) {
- Type *ArgTy = Arg.getType();
- unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
- if (AllocSize == 0)
- continue;
-
- unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
-
- uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
- ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
-
- ArrayRef<Register> OrigArgRegs = VRegs[i];
- Register ArgReg =
- OrigArgRegs.size() == 1
- ? OrigArgRegs[0]
- : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
- unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
- ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
- lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
- if (OrigArgRegs.size() > 1)
- unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
- ++i;
- }
-
- allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
- return true;
-}
-
-bool AMDGPUCallLowering::lowerFormalArguments(
- MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs) const {
- // The infrastructure for normal calling convention lowering is essentially
- // useless for kernels. We want to avoid any kind of legalization or argument
- // splitting.
- if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
- return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
-
- // AMDGPU_GS and AMDGP_HS are not supported yet.
- if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
- F.getCallingConv() == CallingConv::AMDGPU_HS)
- return false;
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
- const DataLayout &DL = F.getParent()->getDataLayout();
-
- bool IsShader = AMDGPU::isShader(F.getCallingConv());
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
-
- if (Info->hasImplicitBufferPtr()) {
- unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
- MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(ImplicitBufferPtrReg);
- }
-
- unsigned NumArgs = F.arg_size();
- Function::const_arg_iterator CurOrigArg = F.arg_begin();
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
- unsigned PSInputNum = 0;
- BitVector Skipped(NumArgs);
- for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
- EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
-
- // We can only hanlde simple value types at the moment.
- ISD::ArgFlagsTy Flags;
- assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
- ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
- setArgFlags(OrigArg, i + 1, DL, F);
- Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
-
- if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
- !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
- PSInputNum <= 15) {
- if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
- Skipped.set(i);
- ++PSInputNum;
- continue;
- }
-
- Info->markPSInputAllocated(PSInputNum);
- if (!CurOrigArg->use_empty())
- Info->markPSInputEnabled(PSInputNum);
-
- ++PSInputNum;
- }
-
- CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
- /*IsVarArg=*/false);
-
- if (ValEVT.isVector()) {
- EVT ElemVT = ValEVT.getVectorElementType();
- if (!ValEVT.isSimple())
- return false;
- MVT ValVT = ElemVT.getSimpleVT();
- bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
- OrigArg.Flags, CCInfo);
- if (!Res)
- return false;
- } else {
- MVT ValVT = ValEVT.getSimpleVT();
- if (!ValEVT.isSimple())
- return false;
- bool Res =
- AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
-
- // Fail if we don't know how to handle this type.
- if (Res)
- return false;
- }
- }
-
- Function::const_arg_iterator Arg = F.arg_begin();
-
- if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
- F.getCallingConv() == CallingConv::AMDGPU_PS) {
- for (unsigned i = 0, OrigArgIdx = 0;
- OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
- if (Skipped.test(OrigArgIdx))
- continue;
- assert(VRegs[OrigArgIdx].size() == 1 &&
- "Can't lower into more than 1 reg");
- CCValAssign &VA = ArgLocs[i++];
- MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
- MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
- MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
- }
-
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
- return true;
- }
-
- return false;
-}