aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h142
1 files changed, 77 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ef0186f7d57f..cf1629fda0af 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,23 +236,29 @@ template <> struct MappingTraits<SIArgumentInfo> {
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
- bool FP32Denormals = true;
- bool FP64FP16Denormals = true;
+ bool FP32InputDenormals = true;
+ bool FP32OutputDenormals = true;
+ bool FP64FP16InputDenormals = true;
+ bool FP64FP16OutputDenormals = true;
SIMode() = default;
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
- FP32Denormals = Mode.FP32Denormals;
- FP64FP16Denormals = Mode.FP64FP16Denormals;
+ FP32InputDenormals = Mode.FP32InputDenormals;
+ FP32OutputDenormals = Mode.FP32OutputDenormals;
+ FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
+ FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
}
bool operator ==(const SIMode Other) const {
return IEEE == Other.IEEE &&
DX10Clamp == Other.DX10Clamp &&
- FP32Denormals == Other.FP32Denormals &&
- FP64FP16Denormals == Other.FP64FP16Denormals;
+ FP32InputDenormals == Other.FP32InputDenormals &&
+ FP32OutputDenormals == Other.FP32OutputDenormals &&
+ FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
+ FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
}
};
@@ -260,8 +266,10 @@ template <> struct MappingTraits<SIMode> {
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
- YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
- YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
+ YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
+ YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
+ YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
+ YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
}
};
@@ -276,7 +284,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint32_t HighBitsOf32BitAddress = 0;
StringValue ScratchRSrcReg = "$private_rsrc_reg";
- StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
@@ -303,8 +310,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
StringValue("$private_rsrc_reg"));
- YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
- StringValue("$scratch_wave_offset_reg"));
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
StringValue("$fp_reg"));
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
@@ -323,20 +328,20 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
friend class GCNTargetMachine;
- unsigned TIDReg = AMDGPU::NoRegister;
+ Register TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
- unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
- unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
+ Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
- // This is the current function's incremented size from the kernel's scratch
- // wave offset register. For an entry function, this is exactly the same as
- // the ScratchWaveOffsetReg.
- unsigned FrameOffsetReg = AMDGPU::FP_REG;
+ // This is the the unswizzled offset from the current dispatch's scratch wave
+ // base to the beginning of the current function's frame.
+ Register FrameOffsetReg = AMDGPU::FP_REG;
- // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
- unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
+ // This is an ABI register used in the non-entry calling convention to
+ // communicate the unswizzled offset from the current dispatch's scratch wave
+ // base to the beginning of the new function's frame.
+ Register StackPtrOffsetReg = AMDGPU::SP_REG;
AMDGPUFunctionArgInfo ArgInfo;
@@ -429,11 +434,11 @@ private:
public:
struct SpilledReg {
- unsigned VGPR = 0;
+ Register VGPR;
int Lane = -1;
SpilledReg() = default;
- SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
+ SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != 0;}
@@ -441,13 +446,13 @@ public:
struct SGPRSpillVGPRCSR {
// VGPR used for SGPR spills
- unsigned VGPR;
+ Register VGPR;
// If the VGPR is a CSR, the stack slot used to save/restore it in the
// prolog/epilog.
Optional<int> FI;
- SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
+ SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
};
struct VGPRSpillToAGPR {
@@ -457,12 +462,9 @@ public:
SparseBitVector<> WWMReservedRegs;
- void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
+ void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
private:
- // SGPR->VGPR spilling support.
- using SpillRegMask = std::pair<unsigned, unsigned>;
-
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
@@ -480,9 +482,17 @@ private:
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
- unsigned SGPRForFPSaveRestoreCopy = 0;
+ Register SGPRForFPSaveRestoreCopy;
Optional<int> FramePointerSaveIndex;
+ /// If this is set, an SGPR used for save/restore of the register used for the
+ /// base pointer.
+ Register SGPRForBPSaveRestoreCopy;
+ Optional<int> BasePointerSaveIndex;
+
+ Register VGPRReservedForSGPRSpill;
+ bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
+
public:
SIMachineFunctionInfo(const MachineFunction &MF);
@@ -498,6 +508,14 @@ public:
return SpillVGPRs;
}
+ void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
+ SpillVGPRs[Index].VGPR = NewVGPR;
+ SpillVGPRs[Index].FI = newFI;
+ VGPRReservedForSGPRSpill = NewVGPR;
+ }
+
+ bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
+
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
return SpillAGPR;
}
@@ -515,12 +533,13 @@ public:
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ bool reserveVGPRforSGPRSpills(MachineFunction &MF);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
- unsigned getTIDReg() const { return TIDReg; };
- void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+ Register getTIDReg() const { return TIDReg; };
+ void setTIDReg(Register Reg) { TIDReg = Reg; }
unsigned getBytesInStackArgArea() const {
return BytesInStackArgArea;
@@ -531,34 +550,34 @@ public:
}
// Add user SGPRs.
- unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
- unsigned addDispatchPtr(const SIRegisterInfo &TRI);
- unsigned addQueuePtr(const SIRegisterInfo &TRI);
- unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
- unsigned addDispatchID(const SIRegisterInfo &TRI);
- unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
- unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
+ Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
+ Register addDispatchPtr(const SIRegisterInfo &TRI);
+ Register addQueuePtr(const SIRegisterInfo &TRI);
+ Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
+ Register addDispatchID(const SIRegisterInfo &TRI);
+ Register addFlatScratchInit(const SIRegisterInfo &TRI);
+ Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
- unsigned addWorkGroupIDX() {
+ Register addWorkGroupIDX() {
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDX.getRegister();
}
- unsigned addWorkGroupIDY() {
+ Register addWorkGroupIDY() {
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDY.getRegister();
}
- unsigned addWorkGroupIDZ() {
+ Register addWorkGroupIDZ() {
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDZ.getRegister();
}
- unsigned addWorkGroupInfo() {
+ Register addWorkGroupInfo() {
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupInfo.getRegister();
@@ -577,14 +596,14 @@ public:
ArgInfo.WorkItemIDZ = Arg;
}
- unsigned addPrivateSegmentWaveByteOffset() {
+ Register addPrivateSegmentWaveByteOffset() {
ArgInfo.PrivateSegmentWaveByteOffset
= ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
- void setPrivateSegmentWaveByteOffset(unsigned Reg) {
+ void setPrivateSegmentWaveByteOffset(Register Reg) {
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
@@ -660,13 +679,13 @@ public:
return ArgInfo;
}
- std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
return ArgInfo.getPreloadedValue(Value);
}
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
- auto Arg = ArgInfo.getPreloadedValue(Value).first;
+ auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
return Arg ? Arg->getRegister() : Register();
}
@@ -674,6 +693,8 @@ public:
return GITPtrHigh;
}
+ Register getGITPtrLoReg(const MachineFunction &MF) const;
+
uint32_t get32BitAddressHighBits() const {
return HighBitsOf32BitAddress;
}
@@ -690,35 +711,31 @@ public:
return NumUserSGPRs + NumSystemSGPRs;
}
- unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
+ Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
/// Returns the physical register reserved for use as the resource
/// descriptor for scratch accesses.
- unsigned getScratchRSrcReg() const {
+ Register getScratchRSrcReg() const {
return ScratchRSrcReg;
}
- void setScratchRSrcReg(unsigned Reg) {
+ void setScratchRSrcReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
ScratchRSrcReg = Reg;
}
- unsigned getScratchWaveOffsetReg() const {
- return ScratchWaveOffsetReg;
- }
-
- unsigned getFrameOffsetReg() const {
+ Register getFrameOffsetReg() const {
return FrameOffsetReg;
}
- void setFrameOffsetReg(unsigned Reg) {
+ void setFrameOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
FrameOffsetReg = Reg;
}
- void setStackPtrOffsetReg(unsigned Reg) {
+ void setStackPtrOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
StackPtrOffsetReg = Reg;
}
@@ -727,20 +744,15 @@ public:
// NoRegister. This is mostly a workaround for MIR tests where state that
// can't be directly computed from the function is not preserved in serialized
// MIR.
- unsigned getStackPtrOffsetReg() const {
+ Register getStackPtrOffsetReg() const {
return StackPtrOffsetReg;
}
- void setScratchWaveOffsetReg(unsigned Reg) {
- assert(Reg != 0 && "Should never be unset");
- ScratchWaveOffsetReg = Reg;
- }
-
- unsigned getQueuePtrUserSGPR() const {
+ Register getQueuePtrUserSGPR() const {
return ArgInfo.QueuePtr.getRegister();
}
- unsigned getImplicitBufferPtrUserSGPR() const {
+ Register getImplicitBufferPtrUserSGPR() const {
return ArgInfo.ImplicitBufferPtr.getRegister();
}
@@ -853,7 +865,7 @@ public:
}
/// \returns SGPR used for \p Dim's work group ID.
- unsigned getWorkGroupIDSGPR(unsigned Dim) const {
+ Register getWorkGroupIDSGPR(unsigned Dim) const {
switch (Dim) {
case 0:
assert(hasWorkGroupIDX());