diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 927 |
1 files changed, 927 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h new file mode 100644 index 000000000000..7d70c786b594 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -0,0 +1,927 @@ +//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H + +#include "AMDGPUArgumentUsageInfo.h" +#include "AMDGPUMachineFunction.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include <array> +#include <cassert> +#include <utility> +#include <vector> + +namespace llvm { + +class MachineFrameInfo; +class MachineFunction; +class TargetRegisterClass; + +class AMDGPUPseudoSourceValue : public PseudoSourceValue { +public: + enum AMDGPUPSVKind : unsigned { + PSVBuffer = PseudoSourceValue::TargetCustom, + PSVImage, + GWSResource + }; + +protected: + AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) + : PseudoSourceValue(Kind, TII) {} + +public: + bool isConstant(const MachineFrameInfo *) const override { + // This should probably be true for most images, but we will start by being + // conservative. + return false; + } + + bool isAliased(const MachineFrameInfo *) const override { + return true; + } + + bool mayAlias(const MachineFrameInfo *) const override { + return true; + } +}; + +class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(PSVBuffer, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == PSVBuffer; + } +}; + +class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + // TODO: Is the img rsrc useful? + explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(PSVImage, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == PSVImage; + } +}; + +class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(GWSResource, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == GWSResource; + } + + // These are inaccessible memory from IR. + bool isAliased(const MachineFrameInfo *) const override { + return false; + } + + // These are inaccessible memory from IR. + bool mayAlias(const MachineFrameInfo *) const override { + return false; + } + + void printCustom(raw_ostream &OS) const override { + OS << "GWSResource"; + } +}; + +namespace yaml { + +struct SIArgument { + bool IsRegister; + union { + StringValue RegisterName; + unsigned StackOffset; + }; + Optional<unsigned> Mask; + + // Default constructor, which creates a stack argument. + SIArgument() : IsRegister(false), StackOffset(0) {} + SIArgument(const SIArgument &Other) { + IsRegister = Other.IsRegister; + if (IsRegister) { + ::new ((void *)std::addressof(RegisterName)) + StringValue(Other.RegisterName); + } else + StackOffset = Other.StackOffset; + Mask = Other.Mask; + } + SIArgument &operator=(const SIArgument &Other) { + IsRegister = Other.IsRegister; + if (IsRegister) { + ::new ((void *)std::addressof(RegisterName)) + StringValue(Other.RegisterName); + } else + StackOffset = Other.StackOffset; + Mask = Other.Mask; + return *this; + } + ~SIArgument() { + if (IsRegister) + RegisterName.~StringValue(); + } + + // Helper to create a register or stack argument. + static inline SIArgument createArgument(bool IsReg) { + if (IsReg) + return SIArgument(IsReg); + return SIArgument(); + } + +private: + // Construct a register argument. + SIArgument(bool) : IsRegister(true), RegisterName() {} +}; + +template <> struct MappingTraits<SIArgument> { + static void mapping(IO &YamlIO, SIArgument &A) { + if (YamlIO.outputting()) { + if (A.IsRegister) + YamlIO.mapRequired("reg", A.RegisterName); + else + YamlIO.mapRequired("offset", A.StackOffset); + } else { + auto Keys = YamlIO.keys(); + if (is_contained(Keys, "reg")) { + A = SIArgument::createArgument(true); + YamlIO.mapRequired("reg", A.RegisterName); + } else if (is_contained(Keys, "offset")) + YamlIO.mapRequired("offset", A.StackOffset); + else + YamlIO.setError("missing required key 'reg' or 'offset'"); + } + YamlIO.mapOptional("mask", A.Mask); + } + static const bool flow = true; +}; + +struct SIArgumentInfo { + Optional<SIArgument> PrivateSegmentBuffer; + Optional<SIArgument> DispatchPtr; + Optional<SIArgument> QueuePtr; + Optional<SIArgument> KernargSegmentPtr; + Optional<SIArgument> DispatchID; + Optional<SIArgument> FlatScratchInit; + Optional<SIArgument> PrivateSegmentSize; + + Optional<SIArgument> WorkGroupIDX; + Optional<SIArgument> WorkGroupIDY; + Optional<SIArgument> WorkGroupIDZ; + Optional<SIArgument> WorkGroupInfo; + Optional<SIArgument> PrivateSegmentWaveByteOffset; + + Optional<SIArgument> ImplicitArgPtr; + Optional<SIArgument> ImplicitBufferPtr; + + Optional<SIArgument> WorkItemIDX; + Optional<SIArgument> WorkItemIDY; + Optional<SIArgument> WorkItemIDZ; +}; + +template <> struct MappingTraits<SIArgumentInfo> { + static void mapping(IO &YamlIO, SIArgumentInfo &AI) { + YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer); + YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr); + YamlIO.mapOptional("queuePtr", AI.QueuePtr); + YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr); + YamlIO.mapOptional("dispatchID", AI.DispatchID); + YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit); + YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize); + + YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX); + YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); + YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); + YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); + YamlIO.mapOptional("privateSegmentWaveByteOffset", + AI.PrivateSegmentWaveByteOffset); + + YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr); + YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr); + + YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX); + YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY); + YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ); + } +}; + +// Default to default mode for default calling convention. +struct SIMode { + bool IEEE = true; + bool DX10Clamp = true; + + SIMode() = default; + + + SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { + IEEE = Mode.IEEE; + DX10Clamp = Mode.DX10Clamp; + } + + bool operator ==(const SIMode Other) const { + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + } +}; + +template <> struct MappingTraits<SIMode> { + static void mapping(IO &YamlIO, SIMode &Mode) { + YamlIO.mapOptional("ieee", Mode.IEEE, true); + YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); + } +}; + +struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { + uint64_t ExplicitKernArgSize = 0; + unsigned MaxKernArgAlign = 0; + unsigned LDSSize = 0; + bool IsEntryFunction = false; + bool NoSignedZerosFPMath = false; + bool MemoryBound = false; + bool WaveLimiter = false; + uint32_t HighBitsOf32BitAddress = 0; + + StringValue ScratchRSrcReg = "$private_rsrc_reg"; + StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; + StringValue FrameOffsetReg = "$fp_reg"; + StringValue StackPtrOffsetReg = "$sp_reg"; + + Optional<SIArgumentInfo> ArgInfo; + SIMode Mode; + + SIMachineFunctionInfo() = default; + SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, + const TargetRegisterInfo &TRI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~SIMachineFunctionInfo() = default; +}; + +template <> struct MappingTraits<SIMachineFunctionInfo> { + static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { + YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, + UINT64_C(0)); + YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); + YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); + YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); + YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); + YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); + YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); + YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, + StringValue("$private_rsrc_reg")); + YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, + StringValue("$scratch_wave_offset_reg")); + YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, + StringValue("$fp_reg")); + YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, + StringValue("$sp_reg")); + YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); + YamlIO.mapOptional("mode", MFI.Mode, SIMode()); + YamlIO.mapOptional("highBitsOf32BitAddress", + MFI.HighBitsOf32BitAddress, 0u); + } +}; + +} // end namespace yaml + +/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which +/// tells the hardware which interpolation parameters to load. +class SIMachineFunctionInfo final : public AMDGPUMachineFunction { + friend class GCNTargetMachine; + + unsigned TIDReg = AMDGPU::NoRegister; + + // Registers that may be reserved for spilling purposes. These may be the same + // as the input registers. + unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; + unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; + + // This is the current function's incremented size from the kernel's scratch + // wave offset register. For an entry function, this is exactly the same as + // the ScratchWaveOffsetReg. + unsigned FrameOffsetReg = AMDGPU::FP_REG; + + // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. + unsigned StackPtrOffsetReg = AMDGPU::SP_REG; + + AMDGPUFunctionArgInfo ArgInfo; + + // State of MODE register, assumed FP mode. + AMDGPU::SIModeRegisterDefaults Mode; + + // Graphics info. + unsigned PSInputAddr = 0; + unsigned PSInputEnable = 0; + + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea = 0; + + bool ReturnsVoid = true; + + // A pair of default/requested minimum/maximum flat work group sizes. + // Minimum - first, maximum - second. + std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; + + // A pair of default/requested minimum/maximum number of waves per execution + // unit. Minimum - first, maximum - second. + std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; + + DenseMap<const Value *, + std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; + DenseMap<const Value *, + std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; + std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV; + +private: + unsigned LDSWaveSpillSize = 0; + unsigned NumUserSGPRs = 0; + unsigned NumSystemSGPRs = 0; + + bool HasSpilledSGPRs = false; + bool HasSpilledVGPRs = false; + bool HasNonSpillStackObjects = false; + bool IsStackRealigned = false; + + unsigned NumSpilledSGPRs = 0; + unsigned NumSpilledVGPRs = 0; + + // Feature bits required for inputs passed in user SGPRs. + bool PrivateSegmentBuffer : 1; + bool DispatchPtr : 1; + bool QueuePtr : 1; + bool KernargSegmentPtr : 1; + bool DispatchID : 1; + bool FlatScratchInit : 1; + + // Feature bits required for inputs passed in system SGPRs. + bool WorkGroupIDX : 1; // Always initialized. + bool WorkGroupIDY : 1; + bool WorkGroupIDZ : 1; + bool WorkGroupInfo : 1; + bool PrivateSegmentWaveByteOffset : 1; + + bool WorkItemIDX : 1; // Always initialized. + bool WorkItemIDY : 1; + bool WorkItemIDZ : 1; + + // Private memory buffer + // Compute directly in sgpr[0:1] + // Other shaders indirect 64-bits at sgpr[0:1] + bool ImplicitBufferPtr : 1; + + // Pointer to where the ABI inserts special kernel arguments separate from the + // user arguments. This is an offset from the KernargSegmentPtr. + bool ImplicitArgPtr : 1; + + // The hard-wired high half of the address of the global information table + // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since + // current hardware only allows a 16 bit value. + unsigned GITPtrHigh; + + unsigned HighBitsOf32BitAddress; + unsigned GDSSize; + + // Current recorded maximum possible occupancy. + unsigned Occupancy; + + MCPhysReg getNextUserSGPR() const; + + MCPhysReg getNextSystemSGPR() const; + +public: + struct SpilledReg { + unsigned VGPR = 0; + int Lane = -1; + + SpilledReg() = default; + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} + + bool hasLane() { return Lane != -1;} + bool hasReg() { return VGPR != 0;} + }; + + struct SGPRSpillVGPRCSR { + // VGPR used for SGPR spills + unsigned VGPR; + + // If the VGPR is a CSR, the stack slot used to save/restore it in the + // prolog/epilog. + Optional<int> FI; + + SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} + }; + + struct VGPRSpillToAGPR { + SmallVector<MCPhysReg, 32> Lanes; + bool FullyAllocated = false; + }; + + SparseBitVector<> WWMReservedRegs; + + void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } + +private: + // SGPR->VGPR spilling support. + using SpillRegMask = std::pair<unsigned, unsigned>; + + // Track VGPR + wave index for each subregister of the SGPR spilled to + // frameindex key. + DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; + unsigned NumVGPRSpillLanes = 0; + SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; + + DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; + + // AGPRs used for VGPR spills. + SmallVector<MCPhysReg, 32> SpillAGPR; + + // VGPRs used for AGPR spills. + SmallVector<MCPhysReg, 32> SpillVGPR; + +public: // FIXME + /// If this is set, an SGPR used for save/restore of the register used for the + /// frame pointer. + unsigned SGPRForFPSaveRestoreCopy = 0; + Optional<int> FramePointerSaveIndex; + +public: + SIMachineFunctionInfo(const MachineFunction &MF); + + bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); + + ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { + auto I = SGPRToVGPRSpills.find(FrameIndex); + return (I == SGPRToVGPRSpills.end()) ? + ArrayRef<SpilledReg>() : makeArrayRef(I->second); + } + + ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { + return SpillVGPRs; + } + + ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { + return SpillAGPR; + } + + ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { + return SpillVGPR; + } + + MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { + auto I = VGPRToAGPRSpills.find(FrameIndex); + return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister + : I->second.Lanes[Lane]; + } + + AMDGPU::SIModeRegisterDefaults getMode() const { + return Mode; + } + + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, + unsigned NumLane) const; + bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); + bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); + void removeDeadFrameIndices(MachineFrameInfo &MFI); + + bool hasCalculatedTID() const { return TIDReg != 0; }; + unsigned getTIDReg() const { return TIDReg; }; + void setTIDReg(unsigned Reg) { TIDReg = Reg; } + + unsigned getBytesInStackArgArea() const { + return BytesInStackArgArea; + } + + void setBytesInStackArgArea(unsigned Bytes) { + BytesInStackArgArea = Bytes; + } + + // Add user SGPRs. + unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); + unsigned addDispatchPtr(const SIRegisterInfo &TRI); + unsigned addQueuePtr(const SIRegisterInfo &TRI); + unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); + unsigned addDispatchID(const SIRegisterInfo &TRI); + unsigned addFlatScratchInit(const SIRegisterInfo &TRI); + unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); + + // Add system SGPRs. + unsigned addWorkGroupIDX() { + ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; + return ArgInfo.WorkGroupIDX.getRegister(); + } + + unsigned addWorkGroupIDY() { + ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; + return ArgInfo.WorkGroupIDY.getRegister(); + } + + unsigned addWorkGroupIDZ() { + ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; + return ArgInfo.WorkGroupIDZ.getRegister(); + } + + unsigned addWorkGroupInfo() { + ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; + return ArgInfo.WorkGroupInfo.getRegister(); + } + + // Add special VGPR inputs + void setWorkItemIDX(ArgDescriptor Arg) { + ArgInfo.WorkItemIDX = Arg; + } + + void setWorkItemIDY(ArgDescriptor Arg) { + ArgInfo.WorkItemIDY = Arg; + } + + void setWorkItemIDZ(ArgDescriptor Arg) { + ArgInfo.WorkItemIDZ = Arg; + } + + unsigned addPrivateSegmentWaveByteOffset() { + ArgInfo.PrivateSegmentWaveByteOffset + = ArgDescriptor::createRegister(getNextSystemSGPR()); + NumSystemSGPRs += 1; + return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); + } + + void setPrivateSegmentWaveByteOffset(unsigned Reg) { + ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); + } + + bool hasPrivateSegmentBuffer() const { + return PrivateSegmentBuffer; + } + + bool hasDispatchPtr() const { + return DispatchPtr; + } + + bool hasQueuePtr() const { + return QueuePtr; + } + + bool hasKernargSegmentPtr() const { + return KernargSegmentPtr; + } + + bool hasDispatchID() const { + return DispatchID; + } + + bool hasFlatScratchInit() const { + return FlatScratchInit; + } + + bool hasWorkGroupIDX() const { + return WorkGroupIDX; + } + + bool hasWorkGroupIDY() const { + return WorkGroupIDY; + } + + bool hasWorkGroupIDZ() const { + return WorkGroupIDZ; + } + + bool hasWorkGroupInfo() const { + return WorkGroupInfo; + } + + bool hasPrivateSegmentWaveByteOffset() const { + return PrivateSegmentWaveByteOffset; + } + + bool hasWorkItemIDX() const { + return WorkItemIDX; + } + + bool hasWorkItemIDY() const { + return WorkItemIDY; + } + + bool hasWorkItemIDZ() const { + return WorkItemIDZ; + } + + bool hasImplicitArgPtr() const { + return ImplicitArgPtr; + } + + bool hasImplicitBufferPtr() const { + return ImplicitBufferPtr; + } + + AMDGPUFunctionArgInfo &getArgInfo() { + return ArgInfo; + } + + const AMDGPUFunctionArgInfo &getArgInfo() const { + return ArgInfo; + } + + std::pair<const ArgDescriptor *, const TargetRegisterClass *> + getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { + return ArgInfo.getPreloadedValue(Value); + } + + Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { + auto Arg = ArgInfo.getPreloadedValue(Value).first; + return Arg ? Arg->getRegister() : Register(); + } + + unsigned getGITPtrHigh() const { + return GITPtrHigh; + } + + uint32_t get32BitAddressHighBits() const { + return HighBitsOf32BitAddress; + } + + unsigned getGDSSize() const { + return GDSSize; + } + + unsigned getNumUserSGPRs() const { + return NumUserSGPRs; + } + + unsigned getNumPreloadedSGPRs() const { + return NumUserSGPRs + NumSystemSGPRs; + } + + unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { + return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); + } + + /// Returns the physical register reserved for use as the resource + /// descriptor for scratch accesses. + unsigned getScratchRSrcReg() const { + return ScratchRSrcReg; + } + + void setScratchRSrcReg(unsigned Reg) { + assert(Reg != 0 && "Should never be unset"); + ScratchRSrcReg = Reg; + } + + unsigned getScratchWaveOffsetReg() const { + return ScratchWaveOffsetReg; + } + + unsigned getFrameOffsetReg() const { + return FrameOffsetReg; + } + + void setFrameOffsetReg(unsigned Reg) { + assert(Reg != 0 && "Should never be unset"); + FrameOffsetReg = Reg; + } + + void setStackPtrOffsetReg(unsigned Reg) { + assert(Reg != 0 && "Should never be unset"); + StackPtrOffsetReg = Reg; + } + + // Note the unset value for this is AMDGPU::SP_REG rather than + // NoRegister. This is mostly a workaround for MIR tests where state that + // can't be directly computed from the function is not preserved in serialized + // MIR. + unsigned getStackPtrOffsetReg() const { + return StackPtrOffsetReg; + } + + void setScratchWaveOffsetReg(unsigned Reg) { + assert(Reg != 0 && "Should never be unset"); + ScratchWaveOffsetReg = Reg; + } + + unsigned getQueuePtrUserSGPR() const { + return ArgInfo.QueuePtr.getRegister(); + } + + unsigned getImplicitBufferPtrUserSGPR() const { + return ArgInfo.ImplicitBufferPtr.getRegister(); + } + + bool hasSpilledSGPRs() const { + return HasSpilledSGPRs; + } + + void setHasSpilledSGPRs(bool Spill = true) { + HasSpilledSGPRs = Spill; + } + + bool hasSpilledVGPRs() const { + return HasSpilledVGPRs; + } + + void setHasSpilledVGPRs(bool Spill = true) { + HasSpilledVGPRs = Spill; + } + + bool hasNonSpillStackObjects() const { + return HasNonSpillStackObjects; + } + + void setHasNonSpillStackObjects(bool StackObject = true) { + HasNonSpillStackObjects = StackObject; + } + + bool isStackRealigned() const { + return IsStackRealigned; + } + + void setIsStackRealigned(bool Realigned = true) { + IsStackRealigned = Realigned; + } + + unsigned getNumSpilledSGPRs() const { + return NumSpilledSGPRs; + } + + unsigned getNumSpilledVGPRs() const { + return NumSpilledVGPRs; + } + + void addToSpilledSGPRs(unsigned num) { + NumSpilledSGPRs += num; + } + + void addToSpilledVGPRs(unsigned num) { + NumSpilledVGPRs += num; + } + + unsigned getPSInputAddr() const { + return PSInputAddr; + } + + unsigned getPSInputEnable() const { + return PSInputEnable; + } + + bool isPSInputAllocated(unsigned Index) const { + return PSInputAddr & (1 << Index); + } + + void markPSInputAllocated(unsigned Index) { + PSInputAddr |= 1 << Index; + } + + void markPSInputEnabled(unsigned Index) { + PSInputEnable |= 1 << Index; + } + + bool returnsVoid() const { + return ReturnsVoid; + } + + void setIfReturnsVoid(bool Value) { + ReturnsVoid = Value; + } + + /// \returns A pair of default/requested minimum/maximum flat work group sizes + /// for this function. + std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { + return FlatWorkGroupSizes; + } + + /// \returns Default/requested minimum flat work group size for this function. + unsigned getMinFlatWorkGroupSize() const { + return FlatWorkGroupSizes.first; + } + + /// \returns Default/requested maximum flat work group size for this function. + unsigned getMaxFlatWorkGroupSize() const { + return FlatWorkGroupSizes.second; + } + + /// \returns A pair of default/requested minimum/maximum number of waves per + /// execution unit. + std::pair<unsigned, unsigned> getWavesPerEU() const { + return WavesPerEU; + } + + /// \returns Default/requested minimum number of waves per execution unit. + unsigned getMinWavesPerEU() const { + return WavesPerEU.first; + } + + /// \returns Default/requested maximum number of waves per execution unit. + unsigned getMaxWavesPerEU() const { + return WavesPerEU.second; + } + + /// \returns SGPR used for \p Dim's work group ID. + unsigned getWorkGroupIDSGPR(unsigned Dim) const { + switch (Dim) { + case 0: + assert(hasWorkGroupIDX()); + return ArgInfo.WorkGroupIDX.getRegister(); + case 1: + assert(hasWorkGroupIDY()); + return ArgInfo.WorkGroupIDY.getRegister(); + case 2: + assert(hasWorkGroupIDZ()); + return ArgInfo.WorkGroupIDZ.getRegister(); + } + llvm_unreachable("unexpected dimension"); + } + + unsigned getLDSWaveSpillSize() const { + return LDSWaveSpillSize; + } + + const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, + const Value *BufferRsrc) { + assert(BufferRsrc); + auto PSV = BufferPSVs.try_emplace( + BufferRsrc, + std::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); + return PSV.first->second.get(); + } + + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + std::make_unique<AMDGPUImagePseudoSourceValue>(TII)); + return PSV.first->second.get(); + } + + const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { + if (!GWSResourcePSV) { + GWSResourcePSV = + std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII); + } + + return GWSResourcePSV.get(); + } + + unsigned getOccupancy() const { + return Occupancy; + } + + unsigned getMinAllowedOccupancy() const { + if (!isMemoryBound() && !needsWaveLimiter()) + return Occupancy; + return (Occupancy < 4) ? Occupancy : 4; + } + + void limitOccupancy(const MachineFunction &MF); + + void limitOccupancy(unsigned Limit) { + if (Occupancy > Limit) + Occupancy = Limit; + } + + void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { + if (Occupancy < Limit) + Occupancy = Limit; + limitOccupancy(MF); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |