summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h172
1 files changed, 102 insertions, 70 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 19a240800ba14..c833bfbcf9366 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600FrameLowering.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
@@ -24,6 +25,7 @@
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -65,8 +67,8 @@ private:
protected:
bool Has16BitInsts;
bool HasMadMixInsts;
- bool FP32Denormals;
- bool FPExceptions;
+ bool HasMadMacF32Insts;
+ bool HasDsSrc2Insts;
bool HasSDWA;
bool HasVOP3PInsts;
bool HasMulI24;
@@ -77,7 +79,7 @@ protected:
bool HasTrigReducedRange;
unsigned MaxWavesPerEU;
int LocalMemorySize;
- unsigned WavefrontSize;
+ char WavefrontSizeLog2;
public:
AMDGPUSubtarget(const Triple &TT);
@@ -140,6 +142,10 @@ public:
return isAmdHsaOS() || isMesaKernel(F);
}
+ bool isGCN() const {
+ return TargetTriple.getArch() == Triple::amdgcn;
+ }
+
bool has16BitInsts() const {
return Has16BitInsts;
}
@@ -148,17 +154,12 @@ public:
return HasMadMixInsts;
}
- bool hasFP32Denormals(const Function &F) const {
- // FIXME: This should not be a property of the subtarget. This should be a
- // property with a default set by the calling convention which can be
- // overridden by attributes. For now, use the subtarget feature as a
- // placeholder attribute. The function arguments only purpose is to
- // discourage use without a function context until this is removed.
- return FP32Denormals;
+ bool hasMadMacF32Insts() const {
+ return HasMadMacF32Insts || !isGCN();
}
- bool hasFPExceptions() const {
- return FPExceptions;
+ bool hasDsSrc2Insts() const {
+ return HasDsSrc2Insts;
}
bool hasSDWA() const {
@@ -194,7 +195,11 @@ public:
}
unsigned getWavefrontSize() const {
- return WavefrontSize;
+ return 1 << WavefrontSizeLog2;
+ }
+
+ unsigned getWavefrontSizeLog2() const {
+ return WavefrontSizeLog2;
}
int getLocalMemorySize() const {
@@ -221,9 +226,10 @@ public:
/// \returns Maximum flat work group size supported by the subtarget.
virtual unsigned getMaxFlatWorkGroupSize() const = 0;
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ virtual unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
@@ -246,6 +252,13 @@ public:
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
+ /// \returns Corresponsing DWARF register number mapping flavour for the
+ /// \p WavefrontSize.
+ AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const {
+ return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32
+ : AMDGPUDwarfFlavour::Wave64;
+ }
+
virtual ~AMDGPUSubtarget() {}
};
@@ -278,6 +291,7 @@ public:
private:
/// GlobalISel related APIs.
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
@@ -292,10 +306,10 @@ protected:
// Possibly statically set by tablegen, but may want to be overridden.
bool FastFMAF32;
+ bool FastDenormalF32;
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP64FP16Denormals;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
@@ -325,6 +339,7 @@ protected:
bool GFX8Insts;
bool GFX9Insts;
bool GFX10Insts;
+ bool GFX10_3Insts;
bool GFX7GFX8GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -342,7 +357,10 @@ protected:
bool HasDPP;
bool HasDPP8;
bool HasR128A16;
+ bool HasGFX10A16;
+ bool HasG16;
bool HasNSAEncoding;
+ bool GFX10_BEncoding;
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
@@ -357,6 +375,8 @@ protected:
bool DoesNotSupportSRAMECC;
bool HasNoSdstCMPX;
bool HasVscnt;
+ bool HasGetWaveIdInst;
+ bool HasSMemTimeInst;
bool HasRegisterBanking;
bool HasVOP3Literal;
bool HasNoDataDepHazard;
@@ -426,6 +446,10 @@ public:
return CallLoweringInfo.get();
}
+ const InlineAsmLowering *getInlineAsmLowering() const override {
+ return InlineAsmLoweringInfo.get();
+ }
+
InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
@@ -453,10 +477,6 @@ public:
return (Generation)Gen;
}
- unsigned getWavefrontSizeLog2() const {
- return Log2_32(WavefrontSize);
- }
-
/// Return the number of high bits known to be zero fror a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
@@ -506,6 +526,10 @@ public:
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
bool hasBFE() const {
return true;
}
@@ -587,6 +611,11 @@ public:
return getGeneration() <= SEA_ISLANDS;
}
+ /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
+ bool partialVCCWritesUpdateVCCZ() const {
+ return getGeneration() >= GFX10;
+ }
+
/// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
/// was written by a VALU instruction.
bool hasSMRDReadVALUDefHazard() const {
@@ -617,20 +646,6 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- /// Alias for hasFP64FP16Denormals
- bool hasFP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- /// Alias for hasFP64FP16Denormals
- bool hasFP64Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- bool hasFP64FP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
bool supportsMinMaxDenormModes() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
@@ -724,6 +739,18 @@ public:
return ScalarFlatScratchInsts;
}
+ bool hasGlobalAddTidInsts() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasAtomicCSub() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasMultiDwordFlatScratchAddressing() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasFlatSegmentOffsetBug() const {
return HasFlatSegmentOffsetBug;
}
@@ -853,6 +880,14 @@ public:
return HasVscnt;
}
+ bool hasGetWaveIdInst() const {
+ return HasGetWaveIdInst;
+ }
+
+ bool hasSMemTimeInst() const {
+ return HasSMemTimeInst;
+ }
+
bool hasRegisterBanking() const {
return HasRegisterBanking;
}
@@ -890,30 +925,6 @@ public:
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
- /// \returns Number of execution units per compute unit supported by the
- /// subtarget.
- unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(this);
- }
-
- /// \returns Maximum number of waves per compute unit supported by the
- /// subtarget without any kind of limitation.
- unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
- }
-
- /// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Number of waves per work group supported by the subtarget and
- /// limited by given \p FlatWorkGroupSize.
- unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
- }
-
// static wrappers
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
@@ -979,6 +990,14 @@ public:
return HasR128A16;
}
+ bool hasGFX10A16() const {
+ return HasGFX10A16;
+ }
+
+ bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
+
+ bool hasG16() const { return HasG16; }
+
bool hasOffset3fBug() const {
return HasOffset3fBug;
}
@@ -987,6 +1006,14 @@ public:
return HasNSAEncoding;
}
+ bool hasGFX10_BEncoding() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasGFX10_3Insts() const {
+ return GFX10_3Insts;
+ }
+
bool hasMadF16() const;
bool enableSIScheduler() const {
@@ -1059,6 +1086,8 @@ public:
return HasNSAtoVMEMBug;
}
+ bool hasHardClauses() const { return getGeneration() >= GFX10; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
@@ -1071,7 +1100,7 @@ public:
/// registers if provided.
/// Note, occupancy can be affected by the scratch allocation as well, but
/// we do not have enough information to compute it.
- unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
+ unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
/// \returns true if the flat_scratch register should be initialized with the
@@ -1178,7 +1207,7 @@ public:
const override;
bool isWave32() const {
- return WavefrontSize == 32;
+ return getWavefrontSize() == 32;
}
const TargetRegisterClass *getBoolRC() const {
@@ -1201,10 +1230,11 @@ public:
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
@@ -1213,7 +1243,8 @@ public:
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
}
- void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override;
+ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+ SDep &Dep) const override;
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -1338,10 +1369,11 @@ public:
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the