diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.h | 377 |
1 files changed, 321 insertions, 56 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ef91d1e43075..f19b20ceb5da 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1,9 +1,8 @@ //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,13 +15,16 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUMachineFunction.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -38,12 +40,19 @@ class MachineFrameInfo; class MachineFunction; class TargetRegisterClass; -class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { +class AMDGPUPseudoSourceValue : public PseudoSourceValue { public: - // TODO: Is the img rsrc useful? - explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} + enum AMDGPUPSVKind : unsigned { + PSVBuffer = PseudoSourceValue::TargetCustom, + PSVImage, + GWSResource + }; + +protected: + AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) + : PseudoSourceValue(Kind, TII) {} +public: bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being // conservative. @@ -59,29 +68,250 @@ public: } }; -class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { +class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue { public: - explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(PSVBuffer, TII) {} - bool isConstant(const MachineFrameInfo *) const override { - // This should probably be true for most images, but we will start by being - // conservative. - return false; + static bool classof(const PseudoSourceValue *V) { + return V->kind() == PSVBuffer; } +}; +class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + // TODO: Is the img rsrc useful? + explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(PSVImage, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == PSVImage; + } +}; + +class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(GWSResource, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == GWSResource; + } + + // These are inaccessible memory from IR. bool isAliased(const MachineFrameInfo *) const override { - return true; + return false; } + // These are inaccessible memory from IR. bool mayAlias(const MachineFrameInfo *) const override { - return true; + return false; + } + + void printCustom(raw_ostream &OS) const override { + OS << "GWSResource"; + } +}; + +namespace yaml { + +struct SIArgument { + bool IsRegister; + union { + StringValue RegisterName; + unsigned StackOffset; + }; + Optional<unsigned> Mask; + + // Default constructor, which creates a stack argument. + SIArgument() : IsRegister(false), StackOffset(0) {} + SIArgument(const SIArgument &Other) { + IsRegister = Other.IsRegister; + if (IsRegister) { + ::new ((void *)std::addressof(RegisterName)) + StringValue(Other.RegisterName); + } else + StackOffset = Other.StackOffset; + Mask = Other.Mask; + } + SIArgument &operator=(const SIArgument &Other) { + IsRegister = Other.IsRegister; + if (IsRegister) { + ::new ((void *)std::addressof(RegisterName)) + StringValue(Other.RegisterName); + } else + StackOffset = Other.StackOffset; + Mask = Other.Mask; + return *this; + } + ~SIArgument() { + if (IsRegister) + RegisterName.~StringValue(); + } + + // Helper to create a register or stack argument. + static inline SIArgument createArgument(bool IsReg) { + if (IsReg) + return SIArgument(IsReg); + return SIArgument(); + } + +private: + // Construct a register argument. + SIArgument(bool) : IsRegister(true), RegisterName() {} +}; + +template <> struct MappingTraits<SIArgument> { + static void mapping(IO &YamlIO, SIArgument &A) { + if (YamlIO.outputting()) { + if (A.IsRegister) + YamlIO.mapRequired("reg", A.RegisterName); + else + YamlIO.mapRequired("offset", A.StackOffset); + } else { + auto Keys = YamlIO.keys(); + if (is_contained(Keys, "reg")) { + A = SIArgument::createArgument(true); + YamlIO.mapRequired("reg", A.RegisterName); + } else if (is_contained(Keys, "offset")) + YamlIO.mapRequired("offset", A.StackOffset); + else + YamlIO.setError("missing required key 'reg' or 'offset'"); + } + YamlIO.mapOptional("mask", A.Mask); + } + static const bool flow = true; +}; + +struct SIArgumentInfo { + Optional<SIArgument> PrivateSegmentBuffer; + Optional<SIArgument> DispatchPtr; + Optional<SIArgument> QueuePtr; + Optional<SIArgument> KernargSegmentPtr; + Optional<SIArgument> DispatchID; + Optional<SIArgument> FlatScratchInit; + Optional<SIArgument> PrivateSegmentSize; + + Optional<SIArgument> WorkGroupIDX; + Optional<SIArgument> WorkGroupIDY; + Optional<SIArgument> WorkGroupIDZ; + Optional<SIArgument> WorkGroupInfo; + Optional<SIArgument> PrivateSegmentWaveByteOffset; + + Optional<SIArgument> ImplicitArgPtr; + Optional<SIArgument> ImplicitBufferPtr; + + Optional<SIArgument> WorkItemIDX; + Optional<SIArgument> WorkItemIDY; + Optional<SIArgument> WorkItemIDZ; +}; + +template <> struct MappingTraits<SIArgumentInfo> { + static void mapping(IO &YamlIO, SIArgumentInfo &AI) { + YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer); + YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr); + YamlIO.mapOptional("queuePtr", AI.QueuePtr); + YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr); + YamlIO.mapOptional("dispatchID", AI.DispatchID); + YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit); + YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize); + + YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX); + YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); + YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); + YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); + YamlIO.mapOptional("privateSegmentWaveByteOffset", + AI.PrivateSegmentWaveByteOffset); + + YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr); + YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr); + + YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX); + YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY); + YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ); + } +}; + +// Default to default mode for default calling convention. +struct SIMode { + bool IEEE = true; + bool DX10Clamp = true; + + SIMode() = default; + + + SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { + IEEE = Mode.IEEE; + DX10Clamp = Mode.DX10Clamp; } + + bool operator ==(const SIMode Other) const { + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + } +}; + +template <> struct MappingTraits<SIMode> { + static void mapping(IO &YamlIO, SIMode &Mode) { + YamlIO.mapOptional("ieee", Mode.IEEE, true); + YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); + } +}; + +struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { + uint64_t ExplicitKernArgSize = 0; + unsigned MaxKernArgAlign = 0; + unsigned LDSSize = 0; + bool IsEntryFunction = false; + bool NoSignedZerosFPMath = false; + bool MemoryBound = false; + bool WaveLimiter = false; + + StringValue ScratchRSrcReg = "$private_rsrc_reg"; + StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; + StringValue FrameOffsetReg = "$fp_reg"; + StringValue StackPtrOffsetReg = "$sp_reg"; + + Optional<SIArgumentInfo> ArgInfo; + SIMode Mode; + + SIMachineFunctionInfo() = default; + SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, + const TargetRegisterInfo &TRI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~SIMachineFunctionInfo() = default; }; +template <> struct MappingTraits<SIMachineFunctionInfo> { + static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { + YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, + UINT64_C(0)); + YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); + YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); + YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); + YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); + YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); + YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); + YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, + StringValue("$private_rsrc_reg")); + YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, + StringValue("$scratch_wave_offset_reg")); + YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, + StringValue("$fp_reg")); + YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, + StringValue("$sp_reg")); + YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); + YamlIO.mapOptional("mode", MFI.Mode, SIMode()); + } +}; + +} // end namespace yaml + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction { + friend class GCNTargetMachine; + unsigned TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same @@ -99,6 +329,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { AMDGPUFunctionArgInfo ArgInfo; + // State of MODE register, assumed FP mode. + AMDGPU::SIModeRegisterDefaults Mode; + // Graphics info. unsigned PSInputAddr = 0; unsigned PSInputEnable = 0; @@ -124,16 +357,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // unit. Minimum - first, maximum - second. std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; - // Stack object indices for work group IDs. - std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; - - // Stack object indices for work item IDs. - std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; - DenseMap<const Value *, std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; DenseMap<const Value *, std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; + std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV; private: unsigned LDSWaveSpillSize = 0; @@ -182,6 +410,7 @@ private: unsigned GITPtrHigh; unsigned HighBitsOf32BitAddress; + unsigned GDSSize; // Current recorded maximum possible occupancy. unsigned Occupancy; @@ -213,6 +442,15 @@ public: SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} }; + struct VGPRSpillToAGPR { + SmallVector<MCPhysReg, 32> Lanes; + bool FullyAllocated = false; + }; + + SparseBitVector<> WWMReservedRegs; + + void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } + private: // SGPR->VGPR spilling support. using SpillRegMask = std::pair<unsigned, unsigned>; @@ -223,9 +461,25 @@ private: unsigned NumVGPRSpillLanes = 0; SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; + DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; + + // AGPRs used for VGPR spills. + SmallVector<MCPhysReg, 32> SpillAGPR; + + // VGPRs used for AGPR spills. + SmallVector<MCPhysReg, 32> SpillVGPR; + +public: // FIXME + /// If this is set, an SGPR used for save/restore of the register used for the + /// frame pointer. + unsigned SGPRForFPSaveRestoreCopy = 0; + Optional<int> FramePointerSaveIndex; + public: SIMachineFunctionInfo(const MachineFunction &MF); + bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); + ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { auto I = SGPRToVGPRSpills.find(FrameIndex); return (I == SGPRToVGPRSpills.end()) ? @@ -236,8 +490,29 @@ public: return SpillVGPRs; } + ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { + return SpillAGPR; + } + + ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { + return SpillVGPR; + } + + MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { + auto I = VGPRToAGPRSpills.find(FrameIndex); + return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister + : I->second.Lanes[Lane]; + } + + AMDGPU::SIModeRegisterDefaults getMode() const { + return Mode; + } + + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, + unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); - void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); + bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); + void removeDeadFrameIndices(MachineFrameInfo &MFI); bool hasCalculatedTID() const { return TIDReg != 0; }; unsigned getTIDReg() const { return TIDReg; }; @@ -386,8 +661,9 @@ public: return ArgInfo.getPreloadedValue(Value); } - unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { - return ArgInfo.getPreloadedValue(Value).first->getRegister(); + Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { + auto Arg = ArgInfo.getPreloadedValue(Value).first; + return Arg ? Arg->getRegister() : Register(); } unsigned getGITPtrHigh() const { @@ -398,6 +674,10 @@ public: return HighBitsOf32BitAddress; } + unsigned getGDSSize() const { + return GDSSize; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -429,6 +709,11 @@ public: return FrameOffsetReg; } + void setFrameOffsetReg(unsigned Reg) { + assert(Reg != 0 && "Should never be unset"); + FrameOffsetReg = Reg; + } + void setStackPtrOffsetReg(unsigned Reg) { assert(Reg != 0 && "Should never be unset"); StackPtrOffsetReg = Reg; @@ -445,8 +730,6 @@ public: void setScratchWaveOffsetReg(unsigned Reg) { assert(Reg != 0 && "Should never be unset"); ScratchWaveOffsetReg = Reg; - if (isEntryFunction()) - FrameOffsetReg = ScratchWaveOffsetReg; } unsigned getQueuePtrUserSGPR() const { @@ -565,30 +848,6 @@ public: return WavesPerEU.second; } - /// \returns Stack object index for \p Dim's work group ID. - int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { - assert(Dim < 3); - return DebuggerWorkGroupIDStackObjectIndices[Dim]; - } - - /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx. - void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { - assert(Dim < 3); - DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; - } - - /// \returns Stack object index for \p Dim's work item ID. - int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { - assert(Dim < 3); - return DebuggerWorkItemIDStackObjectIndices[Dim]; - } - - /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx. - void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { - assert(Dim < 3); - DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; - } - /// \returns SGPR used for \p Dim's work group ID. unsigned getWorkGroupIDSGPR(unsigned Dim) const { switch (Dim) { @@ -605,9 +864,6 @@ public: llvm_unreachable("unexpected dimension"); } - /// \returns VGPR used for \p Dim' work item ID. - unsigned getWorkItemIDVGPR(unsigned Dim) const; - unsigned getLDSWaveSpillSize() const { return LDSWaveSpillSize; } @@ -630,6 +886,15 @@ public: return PSV.first->second.get(); } + const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { + if (!GWSResourcePSV) { + GWSResourcePSV = + llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII); + } + + return GWSResourcePSV.get(); + } + unsigned getOccupancy() const { return Occupancy; } |