summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h127
1 files changed, 101 insertions, 26 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index d4b6a5fe8020b..cf4a691d4b58b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
+#include "AMDGPUCallLowering.h"
#include "R600FrameLowering.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
@@ -25,7 +26,9 @@
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
@@ -63,16 +66,14 @@ public:
ISAVersion7_0_1,
ISAVersion7_0_2,
ISAVersion7_0_3,
+ ISAVersion7_0_4,
ISAVersion8_0_0,
ISAVersion8_0_1,
ISAVersion8_0_2,
ISAVersion8_0_3,
- ISAVersion8_0_4,
ISAVersion8_1_0,
ISAVersion9_0_0,
- ISAVersion9_0_1,
- ISAVersion9_0_2,
- ISAVersion9_0_3
+ ISAVersion9_0_2
};
enum TrapHandlerAbi {
@@ -116,6 +117,7 @@ protected:
bool DX10Clamp;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
+ bool CodeObjectV3;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool HasApertureRegs;
@@ -126,6 +128,7 @@ protected:
bool DebuggerEmitPrologue;
// Used as options.
+ bool EnableHugePrivateBuffer;
bool EnableVGPRSpilling;
bool EnablePromoteAlloca;
bool EnableLoadStoreOpt;
@@ -135,15 +138,17 @@ protected:
// Subtarget statically properties set by tablegen
bool FP64;
+ bool FMA;
bool IsGCN;
- bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasIntClamp;
bool HasVOP3PInsts;
+ bool HasMadMixInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
@@ -159,6 +164,7 @@ protected:
bool FlatInstOffsets;
bool FlatGlobalInsts;
bool FlatScratchInsts;
+ bool AddNoCarryInsts;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
@@ -210,6 +216,10 @@ public:
TargetTriple.getEnvironmentName() == "amdgizcl";
}
+ bool isAmdPalOS() const {
+ return TargetTriple.getOS() == Triple::AMDPAL;
+ }
+
Generation getGeneration() const {
return Gen;
}
@@ -218,6 +228,10 @@ public:
return WavefrontSize;
}
+ unsigned getWavefrontSizeLog2() const {
+ return Log2_32(WavefrontSize);
+ }
+
int getLocalMemorySize() const {
return LocalMemorySize;
}
@@ -238,11 +252,15 @@ public:
return Has16BitInsts;
}
+ bool hasIntClamp() const {
+ return HasIntClamp;
+ }
+
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
- bool hasHWFP64() const {
+ bool hasFP64() const {
return FP64;
}
@@ -305,6 +323,18 @@ public:
return getGeneration() >= GFX9;
}
+ bool hasMadMixInsts() const {
+ return HasMadMixInsts;
+ }
+
+ bool hasSBufferLoadStoreAtomicDwordxN() const {
+ // Only use the "x1" variants on GFX9 or don't use the buffer variants.
+ // For x2 and higher variants, if the accessed region spans 2 VM pages and
+ // the second page is unmapped, the hw hangs.
+ // TODO: There is one future GFX9 chip that doesn't have this bug.
+ return getGeneration() != GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
@@ -317,10 +347,18 @@ public:
return CaymanISA;
}
+ bool hasFMA() const {
+ return FMA;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
+ bool enableHugePrivateBuffer() const {
+ return EnableHugePrivateBuffer;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -344,7 +382,7 @@ public:
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
- return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
+ return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
}
bool hasFP16Denormals() const {
@@ -372,17 +410,27 @@ public:
}
bool enableIEEEBit(const MachineFunction &MF) const {
- return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+ return AMDGPU::isCompute(MF.getFunction().getCallingConv());
}
bool useFlatForGlobal() const {
return FlatForGlobal;
}
+ /// \returns If MUBUF instructions always perform range checking, even for
+ /// buffer resources used for private memory access.
+ bool privateMemoryResourceIsRangeChecked() const {
+ return getGeneration() < AMDGPUSubtarget::GFX9;
+ }
+
bool hasAutoWaitcntBeforeBarrier() const {
return AutoWaitcntBeforeBarrier;
}
+ bool hasCodeObjectV3() const {
+ return CodeObjectV3;
+ }
+
bool hasUnalignedBufferAccess() const {
return UnalignedBufferAccess;
}
@@ -419,19 +467,37 @@ public:
return FlatScratchInsts;
}
+ bool hasD16LoadStore() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// Return if most LDS instructions have an m0 use that require m0 to be
+ /// iniitalized.
+ bool ldsRequiresM0Init() const {
+ return getGeneration() < GFX9;
+ }
+
+ bool hasAddNoCarry() const {
+ return AddNoCarryInsts;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
- return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction().getCallingConv());
}
// Covers VS/PS/CS graphics shaders
bool isMesaGfxShader(const MachineFunction &MF) const {
- return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ return isMesa3DOS() && AMDGPU::isShader(MF.getFunction().getCallingConv());
}
bool isAmdCodeObjectV2(const MachineFunction &MF) const {
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
bool hasFminFmaxLegacy() const {
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
@@ -558,6 +624,9 @@ public:
FlatWorkGroupSize);
}
+ /// \returns Default range flat work group size for a calling convention.
+ std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
+
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -626,7 +695,12 @@ private:
SIInstrInfo InstrInfo;
SIFrameLowering FrameLowering;
SITargetLowering TLInfo;
- std::unique_ptr<GISelAccessor> GISel;
+
+ /// GlobalISel related APIs.
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
public:
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -645,33 +719,25 @@ public:
}
const CallLowering *getCallLowering() const override {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getCallLowering();
+ return CallLoweringInfo.get();
}
const InstructionSelector *getInstructionSelector() const override {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getInstructionSelector();
+ return InstSelector.get();
}
const LegalizerInfo *getLegalizerInfo() const override {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getLegalizerInfo();
+ return Legalizer.get();
}
const RegisterBankInfo *getRegBankInfo() const override {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getRegBankInfo();
+ return RegBankInfo.get();
}
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- void setGISelAccessor(GISelAccessor &GISel) {
- this->GISel.reset(&GISel);
- }
-
// XXX - Why is this here if it isn't in the default pass set?
bool enableEarlyIfConversion() const override {
return true;
@@ -755,11 +821,16 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool hasReadM0Hazard() const {
+ bool hasReadM0MovRelInterpHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+ bool hasReadM0SendMsgHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ }
+
+ unsigned getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
@@ -865,6 +936,10 @@ public:
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+
+ void getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
+ const override;
};
} // end namespace llvm