diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
| commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
| tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/AMDGPU/SIMachineFunctionInfo.h | |
| parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) | |
Notes
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.h')
| -rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.h | 227 |
1 files changed, 144 insertions, 83 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 4c7f38a09a48..5dde72910ee3 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// +//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -14,23 +14,32 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H +#include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUMachineFunction.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include <array> #include <cassert> -#include <map> #include <utility> +#include <vector> namespace llvm { +class MachineFrameInfo; +class MachineFunction; +class TargetRegisterClass; + class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: - explicit AMDGPUImagePseudoSourceValue() : - PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -44,7 +53,7 @@ public: return false; } - bool mayAlias(const MachineFrameInfo*) const override { + bool mayAlias(const MachineFrameInfo *) const override { // FIXME: If we ever change image intrinsics to accept fat pointers, then // this could be true for some cases. return false; @@ -53,8 +62,8 @@ public: class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { public: - explicit AMDGPUBufferPseudoSourceValue() : - PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -68,7 +77,7 @@ public: return false; } - bool mayAlias(const MachineFrameInfo*) const override { + bool mayAlias(const MachineFrameInfo *) const override { // FIXME: If we ever change image intrinsics to accept fat pointers, then // this could be true for some cases. return false; @@ -78,86 +87,68 @@ public: /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction { - // FIXME: This should be removed and getPreloadedValue moved here. - friend class SIRegisterInfo; - - unsigned TIDReg; + unsigned TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same // as the input registers. - unsigned ScratchRSrcReg; - unsigned ScratchWaveOffsetReg; + unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; + unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; // This is the current function's incremented size from the kernel's scratch // wave offset register. For an entry function, this is exactly the same as // the ScratchWaveOffsetReg. - unsigned FrameOffsetReg; + unsigned FrameOffsetReg = AMDGPU::FP_REG; // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. - unsigned StackPtrOffsetReg; - - // Input registers for non-HSA ABI - unsigned ImplicitBufferPtrUserSGPR; - - // Input registers setup for the HSA ABI. - // User SGPRs in allocation order. - unsigned PrivateSegmentBufferUserSGPR; - unsigned DispatchPtrUserSGPR; - unsigned QueuePtrUserSGPR; - unsigned KernargSegmentPtrUserSGPR; - unsigned DispatchIDUserSGPR; - unsigned FlatScratchInitUserSGPR; - unsigned PrivateSegmentSizeUserSGPR; - unsigned GridWorkGroupCountXUserSGPR; - unsigned GridWorkGroupCountYUserSGPR; - unsigned GridWorkGroupCountZUserSGPR; - - // System SGPRs in allocation order. - unsigned WorkGroupIDXSystemSGPR; - unsigned WorkGroupIDYSystemSGPR; - unsigned WorkGroupIDZSystemSGPR; - unsigned WorkGroupInfoSystemSGPR; - unsigned PrivateSegmentWaveByteOffsetSystemSGPR; + unsigned StackPtrOffsetReg = AMDGPU::SP_REG; - // VGPR inputs. These are always v0, v1 and v2 for entry functions. - unsigned WorkItemIDXVGPR; - unsigned WorkItemIDYVGPR; - unsigned WorkItemIDZVGPR; + AMDGPUFunctionArgInfo ArgInfo; // Graphics info. - unsigned PSInputAddr; - unsigned PSInputEnable; + unsigned PSInputAddr = 0; + unsigned PSInputEnable = 0; - bool ReturnsVoid; + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea = 0; + + bool ReturnsVoid = true; // A pair of default/requested minimum/maximum flat work group sizes. // Minimum - first, maximum - second. - std::pair<unsigned, unsigned> FlatWorkGroupSizes; + std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; // A pair of default/requested minimum/maximum number of waves per execution // unit. Minimum - first, maximum - second. - std::pair<unsigned, unsigned> WavesPerEU; + std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; // Stack object indices for work group IDs. - std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; + std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; + // Stack object indices for work item IDs. - std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; + std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; AMDGPUImagePseudoSourceValue ImagePSV; private: - unsigned LDSWaveSpillSize; - unsigned ScratchOffsetReg; - unsigned NumUserSGPRs; - unsigned NumSystemSGPRs; + unsigned LDSWaveSpillSize = 0; + unsigned NumUserSGPRs = 0; + unsigned NumSystemSGPRs = 0; - bool HasSpilledSGPRs; - bool HasSpilledVGPRs; - bool HasNonSpillStackObjects; + bool HasSpilledSGPRs = false; + bool HasSpilledVGPRs = false; + bool HasNonSpillStackObjects = false; - unsigned NumSpilledSGPRs; - unsigned NumSpilledVGPRs; + unsigned NumSpilledSGPRs = 0; + unsigned NumSpilledVGPRs = 0; // Feature bits required for inputs passed in user SGPRs. bool PrivateSegmentBuffer : 1; @@ -186,6 +177,15 @@ private: // Other shaders indirect 64-bits at sgpr[0:1] bool ImplicitBufferPtr : 1; + // Pointer to where the ABI inserts special kernel arguments separate from the + // user arguments. This is an offset from the KernargSegmentPtr. + bool ImplicitArgPtr : 1; + + // The hard-wired high half of the address of the global information table + // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since + // current hardware only allows a 16 bit value. + unsigned GITPtrHigh; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -201,24 +201,34 @@ public: int Lane = -1; SpilledReg() = default; - SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} bool hasLane() { return Lane != -1;} bool hasReg() { return VGPR != AMDGPU::NoRegister;} }; + struct SGPRSpillVGPRCSR { + // VGPR used for SGPR spills + unsigned VGPR; + + // If the VGPR is a CSR, the stack slot used to save/restore it in the + // prolog/epilog. + Optional<int> FI; + + SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} + }; + private: // SGPR->VGPR spilling support. - typedef std::pair<unsigned, unsigned> SpillRegMask; + using SpillRegMask = std::pair<unsigned, unsigned>; // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; unsigned NumVGPRSpillLanes = 0; - SmallVector<unsigned, 2> SpillVGPRs; + SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; public: - SIMachineFunctionInfo(const MachineFunction &MF); ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { @@ -227,13 +237,25 @@ public: ArrayRef<SpilledReg>() : makeArrayRef(I->second); } + ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { + return SpillVGPRs; + } + bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); - bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; - unsigned getTIDReg() const { return TIDReg; }; + bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } + unsigned getTIDReg() const { return TIDReg; } void setTIDReg(unsigned Reg) { TIDReg = Reg; } + unsigned getBytesInStackArgArea() const { + return BytesInStackArgArea; + } + + void setBytesInStackArgArea(unsigned Bytes) { + BytesInStackArgArea = Bytes; + } + // Add user SGPRs. unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); unsigned addDispatchPtr(const SIRegisterInfo &TRI); @@ -245,37 +267,51 @@ public: // Add system SGPRs. unsigned addWorkGroupIDX() { - WorkGroupIDXSystemSGPR = getNextSystemSGPR(); + ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; - return WorkGroupIDXSystemSGPR; + return ArgInfo.WorkGroupIDX.getRegister(); } unsigned addWorkGroupIDY() { - WorkGroupIDYSystemSGPR = getNextSystemSGPR(); + ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; - return WorkGroupIDYSystemSGPR; + return ArgInfo.WorkGroupIDY.getRegister(); } unsigned addWorkGroupIDZ() { - WorkGroupIDZSystemSGPR = getNextSystemSGPR(); + ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; - return WorkGroupIDZSystemSGPR; + return ArgInfo.WorkGroupIDZ.getRegister(); } unsigned addWorkGroupInfo() { - WorkGroupInfoSystemSGPR = getNextSystemSGPR(); + ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; - return WorkGroupInfoSystemSGPR; + return ArgInfo.WorkGroupInfo.getRegister(); + } + + // Add special VGPR inputs + void setWorkItemIDX(ArgDescriptor Arg) { + ArgInfo.WorkItemIDX = Arg; + } + + void setWorkItemIDY(ArgDescriptor Arg) { + ArgInfo.WorkItemIDY = Arg; + } + + void setWorkItemIDZ(ArgDescriptor Arg) { + ArgInfo.WorkItemIDZ = Arg; } unsigned addPrivateSegmentWaveByteOffset() { - PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); + ArgInfo.PrivateSegmentWaveByteOffset + = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; - return PrivateSegmentWaveByteOffsetSystemSGPR; + return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); } void setPrivateSegmentWaveByteOffset(unsigned Reg) { - PrivateSegmentWaveByteOffsetSystemSGPR = Reg; + ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); } bool hasPrivateSegmentBuffer() const { @@ -346,10 +382,35 @@ public: return WorkItemIDZ; } + bool hasImplicitArgPtr() const { + return ImplicitArgPtr; + } + bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } + AMDGPUFunctionArgInfo &getArgInfo() { + return ArgInfo; + } + + const AMDGPUFunctionArgInfo &getArgInfo() const { + return ArgInfo; + } + + std::pair<const ArgDescriptor *, const TargetRegisterClass *> + getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { + return ArgInfo.getPreloadedValue(Value); + } + + unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { + return ArgInfo.getPreloadedValue(Value).first->getRegister(); + } + + unsigned getGITPtrHigh() const { + return GITPtrHigh; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -359,7 +420,7 @@ public: } unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { - return PrivateSegmentWaveByteOffsetSystemSGPR; + return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); } /// \brief Returns the physical register reserved for use as the resource @@ -401,11 +462,11 @@ public: } unsigned getQueuePtrUserSGPR() const { - return QueuePtrUserSGPR; + return ArgInfo.QueuePtr.getRegister(); } unsigned getImplicitBufferPtrUserSGPR() const { - return ImplicitBufferPtrUserSGPR; + return ArgInfo.ImplicitBufferPtr.getRegister(); } bool hasSpilledSGPRs() const { @@ -537,13 +598,13 @@ public: switch (Dim) { case 0: assert(hasWorkGroupIDX()); - return WorkGroupIDXSystemSGPR; + return ArgInfo.WorkGroupIDX.getRegister(); case 1: assert(hasWorkGroupIDY()); - return WorkGroupIDYSystemSGPR; + return ArgInfo.WorkGroupIDY.getRegister(); case 2: assert(hasWorkGroupIDZ()); - return WorkGroupIDZSystemSGPR; + return ArgInfo.WorkGroupIDZ.getRegister(); } llvm_unreachable("unexpected dimension"); } |
