aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.h
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/AMDGPU/SIMachineFunctionInfo.h
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h227
1 files changed, 144 insertions, 83 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a09a48..5dde72910ee3 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -14,23 +14,32 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
+#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
#include <cassert>
-#include <map>
#include <utility>
+#include <vector>
namespace llvm {
+class MachineFrameInfo;
+class MachineFunction;
+class TargetRegisterClass;
+
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
- explicit AMDGPUImagePseudoSourceValue() :
- PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+ explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
@@ -44,7 +53,7 @@ public:
return false;
}
- bool mayAlias(const MachineFrameInfo*) const override {
+ bool mayAlias(const MachineFrameInfo *) const override {
// FIXME: If we ever change image intrinsics to accept fat pointers, then
// this could be true for some cases.
return false;
@@ -53,8 +62,8 @@ public:
class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
public:
- explicit AMDGPUBufferPseudoSourceValue() :
- PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+ explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
@@ -68,7 +77,7 @@ public:
return false;
}
- bool mayAlias(const MachineFrameInfo*) const override {
+ bool mayAlias(const MachineFrameInfo *) const override {
// FIXME: If we ever change image intrinsics to accept fat pointers, then
// this could be true for some cases.
return false;
@@ -78,86 +87,68 @@ public:
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
- // FIXME: This should be removed and getPreloadedValue moved here.
- friend class SIRegisterInfo;
-
- unsigned TIDReg;
+ unsigned TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
- unsigned ScratchRSrcReg;
- unsigned ScratchWaveOffsetReg;
+ unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
+ unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
// This is the current function's incremented size from the kernel's scratch
// wave offset register. For an entry function, this is exactly the same as
// the ScratchWaveOffsetReg.
- unsigned FrameOffsetReg;
+ unsigned FrameOffsetReg = AMDGPU::FP_REG;
// Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
- unsigned StackPtrOffsetReg;
-
- // Input registers for non-HSA ABI
- unsigned ImplicitBufferPtrUserSGPR;
-
- // Input registers setup for the HSA ABI.
- // User SGPRs in allocation order.
- unsigned PrivateSegmentBufferUserSGPR;
- unsigned DispatchPtrUserSGPR;
- unsigned QueuePtrUserSGPR;
- unsigned KernargSegmentPtrUserSGPR;
- unsigned DispatchIDUserSGPR;
- unsigned FlatScratchInitUserSGPR;
- unsigned PrivateSegmentSizeUserSGPR;
- unsigned GridWorkGroupCountXUserSGPR;
- unsigned GridWorkGroupCountYUserSGPR;
- unsigned GridWorkGroupCountZUserSGPR;
-
- // System SGPRs in allocation order.
- unsigned WorkGroupIDXSystemSGPR;
- unsigned WorkGroupIDYSystemSGPR;
- unsigned WorkGroupIDZSystemSGPR;
- unsigned WorkGroupInfoSystemSGPR;
- unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
+ unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
- // VGPR inputs. These are always v0, v1 and v2 for entry functions.
- unsigned WorkItemIDXVGPR;
- unsigned WorkItemIDYVGPR;
- unsigned WorkItemIDZVGPR;
+ AMDGPUFunctionArgInfo ArgInfo;
// Graphics info.
- unsigned PSInputAddr;
- unsigned PSInputEnable;
+ unsigned PSInputAddr = 0;
+ unsigned PSInputEnable = 0;
- bool ReturnsVoid;
+ /// Number of bytes of arguments this function has on the stack. If the callee
+ /// is expected to restore the argument stack this should be a multiple of 16,
+ /// all usable during a tail call.
+ ///
+ /// The alternative would forbid tail call optimisation in some cases: if we
+ /// want to transfer control from a function with 8-bytes of stack-argument
+ /// space to a function with 16-bytes then misalignment of this value would
+ /// make a stack adjustment necessary, which could not be undone by the
+ /// callee.
+ unsigned BytesInStackArgArea = 0;
+
+ bool ReturnsVoid = true;
// A pair of default/requested minimum/maximum flat work group sizes.
// Minimum - first, maximum - second.
- std::pair<unsigned, unsigned> FlatWorkGroupSizes;
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
// A pair of default/requested minimum/maximum number of waves per execution
// unit. Minimum - first, maximum - second.
- std::pair<unsigned, unsigned> WavesPerEU;
+ std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
// Stack object indices for work group IDs.
- std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
+ std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
+
// Stack object indices for work item IDs.
- std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
+ std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
AMDGPUBufferPseudoSourceValue BufferPSV;
AMDGPUImagePseudoSourceValue ImagePSV;
private:
- unsigned LDSWaveSpillSize;
- unsigned ScratchOffsetReg;
- unsigned NumUserSGPRs;
- unsigned NumSystemSGPRs;
+ unsigned LDSWaveSpillSize = 0;
+ unsigned NumUserSGPRs = 0;
+ unsigned NumSystemSGPRs = 0;
- bool HasSpilledSGPRs;
- bool HasSpilledVGPRs;
- bool HasNonSpillStackObjects;
+ bool HasSpilledSGPRs = false;
+ bool HasSpilledVGPRs = false;
+ bool HasNonSpillStackObjects = false;
- unsigned NumSpilledSGPRs;
- unsigned NumSpilledVGPRs;
+ unsigned NumSpilledSGPRs = 0;
+ unsigned NumSpilledVGPRs = 0;
// Feature bits required for inputs passed in user SGPRs.
bool PrivateSegmentBuffer : 1;
@@ -186,6 +177,15 @@ private:
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
+ // Pointer to where the ABI inserts special kernel arguments separate from the
+ // user arguments. This is an offset from the KernargSegmentPtr.
+ bool ImplicitArgPtr : 1;
+
+ // The hard-wired high half of the address of the global information table
+ // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
+ // current hardware only allows a 16 bit value.
+ unsigned GITPtrHigh;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -201,24 +201,34 @@ public:
int Lane = -1;
SpilledReg() = default;
- SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
+ SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
+ struct SGPRSpillVGPRCSR {
+ // VGPR used for SGPR spills
+ unsigned VGPR;
+
+ // If the VGPR is a CSR, the stack slot used to save/restore it in the
+ // prolog/epilog.
+ Optional<int> FI;
+
+ SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
+ };
+
private:
// SGPR->VGPR spilling support.
- typedef std::pair<unsigned, unsigned> SpillRegMask;
+ using SpillRegMask = std::pair<unsigned, unsigned>;
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
unsigned NumVGPRSpillLanes = 0;
- SmallVector<unsigned, 2> SpillVGPRs;
+ SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
public:
-
SIMachineFunctionInfo(const MachineFunction &MF);
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
@@ -227,13 +237,25 @@ public:
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
}
+ ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
+ return SpillVGPRs;
+ }
+
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
- bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
- unsigned getTIDReg() const { return TIDReg; };
+ bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
+ unsigned getTIDReg() const { return TIDReg; }
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+ unsigned getBytesInStackArgArea() const {
+ return BytesInStackArgArea;
+ }
+
+ void setBytesInStackArgArea(unsigned Bytes) {
+ BytesInStackArgArea = Bytes;
+ }
+
// Add user SGPRs.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
unsigned addDispatchPtr(const SIRegisterInfo &TRI);
@@ -245,37 +267,51 @@ public:
// Add system SGPRs.
unsigned addWorkGroupIDX() {
- WorkGroupIDXSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDXSystemSGPR;
+ return ArgInfo.WorkGroupIDX.getRegister();
}
unsigned addWorkGroupIDY() {
- WorkGroupIDYSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDYSystemSGPR;
+ return ArgInfo.WorkGroupIDY.getRegister();
}
unsigned addWorkGroupIDZ() {
- WorkGroupIDZSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupIDZSystemSGPR;
+ return ArgInfo.WorkGroupIDZ.getRegister();
}
unsigned addWorkGroupInfo() {
- WorkGroupInfoSystemSGPR = getNextSystemSGPR();
+ ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return WorkGroupInfoSystemSGPR;
+ return ArgInfo.WorkGroupInfo.getRegister();
+ }
+
+ // Add special VGPR inputs
+ void setWorkItemIDX(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDX = Arg;
+ }
+
+ void setWorkItemIDY(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDY = Arg;
+ }
+
+ void setWorkItemIDZ(ArgDescriptor Arg) {
+ ArgInfo.WorkItemIDZ = Arg;
}
unsigned addPrivateSegmentWaveByteOffset() {
- PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
+ ArgInfo.PrivateSegmentWaveByteOffset
+ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
- return PrivateSegmentWaveByteOffsetSystemSGPR;
+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
void setPrivateSegmentWaveByteOffset(unsigned Reg) {
- PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
+ ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
bool hasPrivateSegmentBuffer() const {
@@ -346,10 +382,35 @@ public:
return WorkItemIDZ;
}
+ bool hasImplicitArgPtr() const {
+ return ImplicitArgPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}
+ AMDGPUFunctionArgInfo &getArgInfo() {
+ return ArgInfo;
+ }
+
+ const AMDGPUFunctionArgInfo &getArgInfo() const {
+ return ArgInfo;
+ }
+
+ std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ return ArgInfo.getPreloadedValue(Value);
+ }
+
+ unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ return ArgInfo.getPreloadedValue(Value).first->getRegister();
+ }
+
+ unsigned getGITPtrHigh() const {
+ return GITPtrHigh;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -359,7 +420,7 @@ public:
}
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
- return PrivateSegmentWaveByteOffsetSystemSGPR;
+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
/// \brief Returns the physical register reserved for use as the resource
@@ -401,11 +462,11 @@ public:
}
unsigned getQueuePtrUserSGPR() const {
- return QueuePtrUserSGPR;
+ return ArgInfo.QueuePtr.getRegister();
}
unsigned getImplicitBufferPtrUserSGPR() const {
- return ImplicitBufferPtrUserSGPR;
+ return ArgInfo.ImplicitBufferPtr.getRegister();
}
bool hasSpilledSGPRs() const {
@@ -537,13 +598,13 @@ public:
switch (Dim) {
case 0:
assert(hasWorkGroupIDX());
- return WorkGroupIDXSystemSGPR;
+ return ArgInfo.WorkGroupIDX.getRegister();
case 1:
assert(hasWorkGroupIDY());
- return WorkGroupIDYSystemSGPR;
+ return ArgInfo.WorkGroupIDY.getRegister();
case 2:
assert(hasWorkGroupIDZ());
- return WorkGroupIDZSystemSGPR;
+ return ArgInfo.WorkGroupIDZ.getRegister();
}
llvm_unreachable("unexpected dimension");
}