aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.h')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h377
1 files changed, 321 insertions, 56 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ef91d1e43075..f19b20ceb5da 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,13 +15,16 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -38,12 +40,19 @@ class MachineFrameInfo;
class MachineFunction;
class TargetRegisterClass;
-class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
+class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
- // TODO: Is the img rsrc useful?
- explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
+ enum AMDGPUPSVKind : unsigned {
+ PSVBuffer = PseudoSourceValue::TargetCustom,
+ PSVImage,
+ GWSResource
+ };
+
+protected:
+ AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
+ : PseudoSourceValue(Kind, TII) {}
+public:
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
// conservative.
@@ -59,29 +68,250 @@ public:
}
};
-class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
+class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
- explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+ explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
- bool isConstant(const MachineFrameInfo *) const override {
- // This should probably be true for most images, but we will start by being
- // conservative.
- return false;
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == PSVBuffer;
}
+};
+class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+ // TODO: Is the img rsrc useful?
+ explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(PSVImage, TII) {}
+
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == PSVImage;
+ }
+};
+
+class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+ explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(GWSResource, TII) {}
+
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == GWSResource;
+ }
+
+ // These are inaccessible memory from IR.
bool isAliased(const MachineFrameInfo *) const override {
- return true;
+ return false;
}
+ // These are inaccessible memory from IR.
bool mayAlias(const MachineFrameInfo *) const override {
- return true;
+ return false;
+ }
+
+ void printCustom(raw_ostream &OS) const override {
+ OS << "GWSResource";
+ }
+};
+
+namespace yaml {
+
+struct SIArgument {
+ bool IsRegister;
+ union {
+ StringValue RegisterName;
+ unsigned StackOffset;
+ };
+ Optional<unsigned> Mask;
+
+ // Default constructor, which creates a stack argument.
+ SIArgument() : IsRegister(false), StackOffset(0) {}
+ SIArgument(const SIArgument &Other) {
+ IsRegister = Other.IsRegister;
+ if (IsRegister) {
+ ::new ((void *)std::addressof(RegisterName))
+ StringValue(Other.RegisterName);
+ } else
+ StackOffset = Other.StackOffset;
+ Mask = Other.Mask;
+ }
+ SIArgument &operator=(const SIArgument &Other) {
+ IsRegister = Other.IsRegister;
+ if (IsRegister) {
+ ::new ((void *)std::addressof(RegisterName))
+ StringValue(Other.RegisterName);
+ } else
+ StackOffset = Other.StackOffset;
+ Mask = Other.Mask;
+ return *this;
+ }
+ ~SIArgument() {
+ if (IsRegister)
+ RegisterName.~StringValue();
+ }
+
+ // Helper to create a register or stack argument.
+ static inline SIArgument createArgument(bool IsReg) {
+ if (IsReg)
+ return SIArgument(IsReg);
+ return SIArgument();
+ }
+
+private:
+ // Construct a register argument.
+ SIArgument(bool) : IsRegister(true), RegisterName() {}
+};
+
+template <> struct MappingTraits<SIArgument> {
+ static void mapping(IO &YamlIO, SIArgument &A) {
+ if (YamlIO.outputting()) {
+ if (A.IsRegister)
+ YamlIO.mapRequired("reg", A.RegisterName);
+ else
+ YamlIO.mapRequired("offset", A.StackOffset);
+ } else {
+ auto Keys = YamlIO.keys();
+ if (is_contained(Keys, "reg")) {
+ A = SIArgument::createArgument(true);
+ YamlIO.mapRequired("reg", A.RegisterName);
+ } else if (is_contained(Keys, "offset"))
+ YamlIO.mapRequired("offset", A.StackOffset);
+ else
+ YamlIO.setError("missing required key 'reg' or 'offset'");
+ }
+ YamlIO.mapOptional("mask", A.Mask);
+ }
+ static const bool flow = true;
+};
+
+struct SIArgumentInfo {
+ Optional<SIArgument> PrivateSegmentBuffer;
+ Optional<SIArgument> DispatchPtr;
+ Optional<SIArgument> QueuePtr;
+ Optional<SIArgument> KernargSegmentPtr;
+ Optional<SIArgument> DispatchID;
+ Optional<SIArgument> FlatScratchInit;
+ Optional<SIArgument> PrivateSegmentSize;
+
+ Optional<SIArgument> WorkGroupIDX;
+ Optional<SIArgument> WorkGroupIDY;
+ Optional<SIArgument> WorkGroupIDZ;
+ Optional<SIArgument> WorkGroupInfo;
+ Optional<SIArgument> PrivateSegmentWaveByteOffset;
+
+ Optional<SIArgument> ImplicitArgPtr;
+ Optional<SIArgument> ImplicitBufferPtr;
+
+ Optional<SIArgument> WorkItemIDX;
+ Optional<SIArgument> WorkItemIDY;
+ Optional<SIArgument> WorkItemIDZ;
+};
+
+template <> struct MappingTraits<SIArgumentInfo> {
+ static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
+ YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
+ YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
+ YamlIO.mapOptional("queuePtr", AI.QueuePtr);
+ YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
+ YamlIO.mapOptional("dispatchID", AI.DispatchID);
+ YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
+ YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
+
+ YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
+ YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
+ YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
+ YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
+ YamlIO.mapOptional("privateSegmentWaveByteOffset",
+ AI.PrivateSegmentWaveByteOffset);
+
+ YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
+ YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
+
+ YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
+ YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
+ YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
+ }
+};
+
+// Default to default mode for default calling convention.
+struct SIMode {
+ bool IEEE = true;
+ bool DX10Clamp = true;
+
+ SIMode() = default;
+
+
+ SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+ IEEE = Mode.IEEE;
+ DX10Clamp = Mode.DX10Clamp;
}
+
+ bool operator ==(const SIMode Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+};
+
+template <> struct MappingTraits<SIMode> {
+ static void mapping(IO &YamlIO, SIMode &Mode) {
+ YamlIO.mapOptional("ieee", Mode.IEEE, true);
+ YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ }
+};
+
+struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
+ uint64_t ExplicitKernArgSize = 0;
+ unsigned MaxKernArgAlign = 0;
+ unsigned LDSSize = 0;
+ bool IsEntryFunction = false;
+ bool NoSignedZerosFPMath = false;
+ bool MemoryBound = false;
+ bool WaveLimiter = false;
+
+ StringValue ScratchRSrcReg = "$private_rsrc_reg";
+ StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
+ StringValue FrameOffsetReg = "$fp_reg";
+ StringValue StackPtrOffsetReg = "$sp_reg";
+
+ Optional<SIArgumentInfo> ArgInfo;
+ SIMode Mode;
+
+ SIMachineFunctionInfo() = default;
+ SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
+ const TargetRegisterInfo &TRI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~SIMachineFunctionInfo() = default;
};
+template <> struct MappingTraits<SIMachineFunctionInfo> {
+ static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
+ YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
+ UINT64_C(0));
+ YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
+ YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
+ YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
+ YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
+ YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
+ YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
+ StringValue("$private_rsrc_reg"));
+ YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
+ StringValue("$scratch_wave_offset_reg"));
+ YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
+ StringValue("$fp_reg"));
+ YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
+ StringValue("$sp_reg"));
+ YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+ YamlIO.mapOptional("mode", MFI.Mode, SIMode());
+ }
+};
+
+} // end namespace yaml
+
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
+ friend class GCNTargetMachine;
+
unsigned TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
@@ -99,6 +329,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUFunctionArgInfo ArgInfo;
+ // State of MODE register, assumed FP mode.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
// Graphics info.
unsigned PSInputAddr = 0;
unsigned PSInputEnable = 0;
@@ -124,16 +357,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// unit. Minimum - first, maximum - second.
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
- // Stack object indices for work group IDs.
- std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
-
- // Stack object indices for work item IDs.
- std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
-
DenseMap<const Value *,
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+ std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
unsigned LDSWaveSpillSize = 0;
@@ -182,6 +410,7 @@ private:
unsigned GITPtrHigh;
unsigned HighBitsOf32BitAddress;
+ unsigned GDSSize;
// Current recorded maximum possible occupancy.
unsigned Occupancy;
@@ -213,6 +442,15 @@ public:
SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
};
+ struct VGPRSpillToAGPR {
+ SmallVector<MCPhysReg, 32> Lanes;
+ bool FullyAllocated = false;
+ };
+
+ SparseBitVector<> WWMReservedRegs;
+
+ void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
+
private:
// SGPR->VGPR spilling support.
using SpillRegMask = std::pair<unsigned, unsigned>;
@@ -223,9 +461,25 @@ private:
unsigned NumVGPRSpillLanes = 0;
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
+ DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
+
+ // AGPRs used for VGPR spills.
+ SmallVector<MCPhysReg, 32> SpillAGPR;
+
+ // VGPRs used for AGPR spills.
+ SmallVector<MCPhysReg, 32> SpillVGPR;
+
+public: // FIXME
+ /// If this is set, an SGPR used for save/restore of the register used for the
+ /// frame pointer.
+ unsigned SGPRForFPSaveRestoreCopy = 0;
+ Optional<int> FramePointerSaveIndex;
+
public:
SIMachineFunctionInfo(const MachineFunction &MF);
+ bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
+
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
auto I = SGPRToVGPRSpills.find(FrameIndex);
return (I == SGPRToVGPRSpills.end()) ?
@@ -236,8 +490,29 @@ public:
return SpillVGPRs;
}
+ ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
+ return SpillAGPR;
+ }
+
+ ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
+ return SpillVGPR;
+ }
+
+ MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
+ auto I = VGPRToAGPRSpills.find(FrameIndex);
+ return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
+ : I->second.Lanes[Lane];
+ }
+
+ AMDGPU::SIModeRegisterDefaults getMode() const {
+ return Mode;
+ }
+
+ bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
+ unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
- void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+ bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
+ void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
unsigned getTIDReg() const { return TIDReg; };
@@ -386,8 +661,9 @@ public:
return ArgInfo.getPreloadedValue(Value);
}
- unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
- return ArgInfo.getPreloadedValue(Value).first->getRegister();
+ Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ auto Arg = ArgInfo.getPreloadedValue(Value).first;
+ return Arg ? Arg->getRegister() : Register();
}
unsigned getGITPtrHigh() const {
@@ -398,6 +674,10 @@ public:
return HighBitsOf32BitAddress;
}
+ unsigned getGDSSize() const {
+ return GDSSize;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -429,6 +709,11 @@ public:
return FrameOffsetReg;
}
+ void setFrameOffsetReg(unsigned Reg) {
+ assert(Reg != 0 && "Should never be unset");
+ FrameOffsetReg = Reg;
+ }
+
void setStackPtrOffsetReg(unsigned Reg) {
assert(Reg != 0 && "Should never be unset");
StackPtrOffsetReg = Reg;
@@ -445,8 +730,6 @@ public:
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != 0 && "Should never be unset");
ScratchWaveOffsetReg = Reg;
- if (isEntryFunction())
- FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
@@ -565,30 +848,6 @@ public:
return WavesPerEU.second;
}
- /// \returns Stack object index for \p Dim's work group ID.
- int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
- assert(Dim < 3);
- return DebuggerWorkGroupIDStackObjectIndices[Dim];
- }
-
- /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
- void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
- assert(Dim < 3);
- DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
- }
-
- /// \returns Stack object index for \p Dim's work item ID.
- int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
- assert(Dim < 3);
- return DebuggerWorkItemIDStackObjectIndices[Dim];
- }
-
- /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
- void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
- assert(Dim < 3);
- DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
- }
-
/// \returns SGPR used for \p Dim's work group ID.
unsigned getWorkGroupIDSGPR(unsigned Dim) const {
switch (Dim) {
@@ -605,9 +864,6 @@ public:
llvm_unreachable("unexpected dimension");
}
- /// \returns VGPR used for \p Dim' work item ID.
- unsigned getWorkItemIDVGPR(unsigned Dim) const;
-
unsigned getLDSWaveSpillSize() const {
return LDSWaveSpillSize;
}
@@ -630,6 +886,15 @@ public:
return PSV.first->second.get();
}
+ const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
+ if (!GWSResourcePSV) {
+ GWSResourcePSV =
+ llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
+ }
+
+ return GWSResourcePSV.get();
+ }
+
unsigned getOccupancy() const {
return Occupancy;
}