diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 142 |
1 files changed, 77 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ef0186f7d57f..cf1629fda0af 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -236,23 +236,29 @@ template <> struct MappingTraits<SIArgumentInfo> { struct SIMode { bool IEEE = true; bool DX10Clamp = true; - bool FP32Denormals = true; - bool FP64FP16Denormals = true; + bool FP32InputDenormals = true; + bool FP32OutputDenormals = true; + bool FP64FP16InputDenormals = true; + bool FP64FP16OutputDenormals = true; SIMode() = default; SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { IEEE = Mode.IEEE; DX10Clamp = Mode.DX10Clamp; - FP32Denormals = Mode.FP32Denormals; - FP64FP16Denormals = Mode.FP64FP16Denormals; + FP32InputDenormals = Mode.FP32InputDenormals; + FP32OutputDenormals = Mode.FP32OutputDenormals; + FP64FP16InputDenormals = Mode.FP64FP16InputDenormals; + FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals; } bool operator ==(const SIMode Other) const { return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && - FP32Denormals == Other.FP32Denormals && - FP64FP16Denormals == Other.FP64FP16Denormals; + FP32InputDenormals == Other.FP32InputDenormals && + FP32OutputDenormals == Other.FP32OutputDenormals && + FP64FP16InputDenormals == Other.FP64FP16InputDenormals && + FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; } }; @@ -260,8 +266,10 @@ template <> struct MappingTraits<SIMode> { static void mapping(IO &YamlIO, SIMode &Mode) { YamlIO.mapOptional("ieee", Mode.IEEE, true); YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); - YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true); - YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true); + YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true); + YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true); + YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true); + YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true); } }; @@ -276,7 +284,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { uint32_t HighBitsOf32BitAddress = 0; StringValue ScratchRSrcReg = "$private_rsrc_reg"; - StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; StringValue FrameOffsetReg = "$fp_reg"; StringValue StackPtrOffsetReg = "$sp_reg"; @@ -303,8 +310,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> { YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, StringValue("$private_rsrc_reg")); - YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, - StringValue("$scratch_wave_offset_reg")); YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, StringValue("$fp_reg")); YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, @@ -323,20 +328,20 @@ template <> struct MappingTraits<SIMachineFunctionInfo> { class SIMachineFunctionInfo final : public AMDGPUMachineFunction { friend class GCNTargetMachine; - unsigned TIDReg = AMDGPU::NoRegister; + Register TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same // as the input registers. - unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; - unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; + Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; - // This is the current function's incremented size from the kernel's scratch - // wave offset register. For an entry function, this is exactly the same as - // the ScratchWaveOffsetReg. - unsigned FrameOffsetReg = AMDGPU::FP_REG; + // This is the the unswizzled offset from the current dispatch's scratch wave + // base to the beginning of the current function's frame. + Register FrameOffsetReg = AMDGPU::FP_REG; - // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. - unsigned StackPtrOffsetReg = AMDGPU::SP_REG; + // This is an ABI register used in the non-entry calling convention to + // communicate the unswizzled offset from the current dispatch's scratch wave + // base to the beginning of the new function's frame. + Register StackPtrOffsetReg = AMDGPU::SP_REG; AMDGPUFunctionArgInfo ArgInfo; @@ -429,11 +434,11 @@ private: public: struct SpilledReg { - unsigned VGPR = 0; + Register VGPR; int Lane = -1; SpilledReg() = default; - SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} + SpilledReg(Register R, int L) : VGPR (R), Lane (L) {} bool hasLane() { return Lane != -1;} bool hasReg() { return VGPR != 0;} @@ -441,13 +446,13 @@ public: struct SGPRSpillVGPRCSR { // VGPR used for SGPR spills - unsigned VGPR; + Register VGPR; // If the VGPR is a CSR, the stack slot used to save/restore it in the // prolog/epilog. Optional<int> FI; - SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} + SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {} }; struct VGPRSpillToAGPR { @@ -457,12 +462,9 @@ public: SparseBitVector<> WWMReservedRegs; - void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } + void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); } private: - // SGPR->VGPR spilling support. - using SpillRegMask = std::pair<unsigned, unsigned>; - // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; @@ -480,9 +482,17 @@ private: public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. - unsigned SGPRForFPSaveRestoreCopy = 0; + Register SGPRForFPSaveRestoreCopy; Optional<int> FramePointerSaveIndex; + /// If this is set, an SGPR used for save/restore of the register used for the + /// base pointer. + Register SGPRForBPSaveRestoreCopy; + Optional<int> BasePointerSaveIndex; + + Register VGPRReservedForSGPRSpill; + bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); + public: SIMachineFunctionInfo(const MachineFunction &MF); @@ -498,6 +508,14 @@ public: return SpillVGPRs; } + void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) { + SpillVGPRs[Index].VGPR = NewVGPR; + SpillVGPRs[Index].FI = newFI; + VGPRReservedForSGPRSpill = NewVGPR; + } + + bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF); + ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { return SpillAGPR; } @@ -515,12 +533,13 @@ public: bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); + bool reserveVGPRforSGPRSpills(MachineFunction &MF); bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); void removeDeadFrameIndices(MachineFrameInfo &MFI); bool hasCalculatedTID() const { return TIDReg != 0; }; - unsigned getTIDReg() const { return TIDReg; }; - void setTIDReg(unsigned Reg) { TIDReg = Reg; } + Register getTIDReg() const { return TIDReg; }; + void setTIDReg(Register Reg) { TIDReg = Reg; } unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; @@ -531,34 +550,34 @@ public: } // Add user SGPRs. - unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); - unsigned addDispatchPtr(const SIRegisterInfo &TRI); - unsigned addQueuePtr(const SIRegisterInfo &TRI); - unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); - unsigned addDispatchID(const SIRegisterInfo &TRI); - unsigned addFlatScratchInit(const SIRegisterInfo &TRI); - unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); + Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); + Register addDispatchPtr(const SIRegisterInfo &TRI); + Register addQueuePtr(const SIRegisterInfo &TRI); + Register addKernargSegmentPtr(const SIRegisterInfo &TRI); + Register addDispatchID(const SIRegisterInfo &TRI); + Register addFlatScratchInit(const SIRegisterInfo &TRI); + Register addImplicitBufferPtr(const SIRegisterInfo &TRI); // Add system SGPRs. - unsigned addWorkGroupIDX() { + Register addWorkGroupIDX() { ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDX.getRegister(); } - unsigned addWorkGroupIDY() { + Register addWorkGroupIDY() { ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDY.getRegister(); } - unsigned addWorkGroupIDZ() { + Register addWorkGroupIDZ() { ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; return ArgInfo.WorkGroupIDZ.getRegister(); } - unsigned addWorkGroupInfo() { + Register addWorkGroupInfo() { ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; return ArgInfo.WorkGroupInfo.getRegister(); @@ -577,14 +596,14 @@ public: ArgInfo.WorkItemIDZ = Arg; } - unsigned addPrivateSegmentWaveByteOffset() { + Register addPrivateSegmentWaveByteOffset() { ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(getNextSystemSGPR()); NumSystemSGPRs += 1; return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); } - void setPrivateSegmentWaveByteOffset(unsigned Reg) { + void setPrivateSegmentWaveByteOffset(Register Reg) { ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); } @@ -660,13 +679,13 @@ public: return ArgInfo; } - std::pair<const ArgDescriptor *, const TargetRegisterClass *> + std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { return ArgInfo.getPreloadedValue(Value); } Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { - auto Arg = ArgInfo.getPreloadedValue(Value).first; + auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value)); return Arg ? Arg->getRegister() : Register(); } @@ -674,6 +693,8 @@ public: return GITPtrHigh; } + Register getGITPtrLoReg(const MachineFunction &MF) const; + uint32_t get32BitAddressHighBits() const { return HighBitsOf32BitAddress; } @@ -690,35 +711,31 @@ public: return NumUserSGPRs + NumSystemSGPRs; } - unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { + Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); } /// Returns the physical register reserved for use as the resource /// descriptor for scratch accesses. - unsigned getScratchRSrcReg() const { + Register getScratchRSrcReg() const { return ScratchRSrcReg; } - void setScratchRSrcReg(unsigned Reg) { + void setScratchRSrcReg(Register Reg) { assert(Reg != 0 && "Should never be unset"); ScratchRSrcReg = Reg; } - unsigned getScratchWaveOffsetReg() const { - return ScratchWaveOffsetReg; - } - - unsigned getFrameOffsetReg() const { + Register getFrameOffsetReg() const { return FrameOffsetReg; } - void setFrameOffsetReg(unsigned Reg) { + void setFrameOffsetReg(Register Reg) { assert(Reg != 0 && "Should never be unset"); FrameOffsetReg = Reg; } - void setStackPtrOffsetReg(unsigned Reg) { + void setStackPtrOffsetReg(Register Reg) { assert(Reg != 0 && "Should never be unset"); StackPtrOffsetReg = Reg; } @@ -727,20 +744,15 @@ public: // NoRegister. This is mostly a workaround for MIR tests where state that // can't be directly computed from the function is not preserved in serialized // MIR. - unsigned getStackPtrOffsetReg() const { + Register getStackPtrOffsetReg() const { return StackPtrOffsetReg; } - void setScratchWaveOffsetReg(unsigned Reg) { - assert(Reg != 0 && "Should never be unset"); - ScratchWaveOffsetReg = Reg; - } - - unsigned getQueuePtrUserSGPR() const { + Register getQueuePtrUserSGPR() const { return ArgInfo.QueuePtr.getRegister(); } - unsigned getImplicitBufferPtrUserSGPR() const { + Register getImplicitBufferPtrUserSGPR() const { return ArgInfo.ImplicitBufferPtr.getRegister(); } @@ -853,7 +865,7 @@ public: } /// \returns SGPR used for \p Dim's work group ID. - unsigned getWorkGroupIDSGPR(unsigned Dim) const { + Register getWorkGroupIDSGPR(unsigned Dim) const { switch (Dim) { case 0: assert(hasWorkGroupIDX()); |