aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h67
1 files changed, 47 insertions, 20 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 78c3b823946d..936feb00c62b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -75,6 +75,7 @@ protected:
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
bool HasTrigReducedRange;
+ unsigned MaxWavesPerEU;
int LocalMemorySize;
unsigned WavefrontSize;
@@ -195,8 +196,8 @@ public:
return LocalMemorySize;
}
- unsigned getAlignmentForImplicitArgPtr() const {
- return isAmdHsaOS() ? 8 : 4;
+ Align getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? Align(8) : Align(4);
}
/// Returns the offset in bytes from the start of the input buffer
@@ -223,7 +224,9 @@ public:
/// subtarget.
virtual unsigned getMinWavesPerEU() const = 0;
- unsigned getMaxWavesPerEU() const { return 10; }
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget without any kind of limitation.
+ unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
/// Creates value range metadata on an workitemid.* inrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
@@ -235,16 +238,17 @@ public:
return 16;
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
}
- uint64_t getExplicitKernArgSize(const Function &F,
- unsigned &MaxAlign) const;
- unsigned getKernArgSegmentSize(const Function &F,
- unsigned &MaxAlign) const;
+ uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
+ unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
virtual ~AMDGPUSubtarget() {}
};
class GCNSubtarget : public AMDGPUGenSubtargetInfo,
public AMDGPUSubtarget {
+
+ using AMDGPUSubtarget::getMaxWavesPerEU;
+
public:
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
@@ -362,6 +366,7 @@ protected:
bool CaymanISA;
bool CFALUBug;
bool LDSMisalignedBug;
+ bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
@@ -416,7 +421,7 @@ public:
return CallLoweringInfo.get();
}
- const InstructionSelector *getInstructionSelector() const override {
+ InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
@@ -544,6 +549,14 @@ public:
return GFX9Insts;
}
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
+
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
@@ -611,6 +624,11 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
+ /// \returns If target supports S_DENORM_MODE.
+ bool hasDenormModeInst() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX10;
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -848,9 +866,7 @@ public:
// on the pointer value itself may rely on the alignment / known low bits of
// the pointer. Set this to something above the minimum to avoid needing
// dynamic realignment in common cases.
- unsigned getStackAlignment() const {
- return 16;
- }
+ Align getStackAlignment() const { return Align(16); }
bool enableMachineScheduler() const override {
return true;
@@ -881,12 +897,6 @@ public:
return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget without any kind of limitation.
- unsigned getMaxWavesPerEU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU();
- }
-
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
@@ -944,6 +954,14 @@ public:
return HasDPP;
}
+ bool hasDPPBroadcasts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
+ bool hasDPPWavefrontShifts() const {
+ return HasDPP && getGeneration() < GFX10;
+ }
+
bool hasDPP8() const {
return HasDPP8;
}
@@ -974,6 +992,10 @@ public:
return SGPRInitBug;
}
+ bool hasMFMAInlineLiteralBug() const {
+ return HasMFMAInlineLiteralBug;
+ }
+
bool has12DWordStoreHazard() const {
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
@@ -1036,6 +1058,13 @@ public:
/// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+ /// Return occupancy for the given function. Used LDS and a number of
+ /// registers if provided.
+ /// Note, occupancy can be affected by the scratch allocation as well, but
+ /// we do not have enough information to compute it.
+ unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
+ unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
+
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
@@ -1226,9 +1255,7 @@ public:
return Gen;
}
- unsigned getStackAlignment() const {
- return 4;
- }
+ Align getStackAlignment() const { return Align(4); }
R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);