diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 220 |
1 files changed, 153 insertions, 67 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 80feaa44766f..98b49070fa99 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief Implements the AMDGPU specific subclass of TargetSubtarget. +/// Implements the AMDGPU specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// @@ -20,8 +20,10 @@ #include "AMDGPULegalizerInfo.h" #include "AMDGPURegisterBankInfo.h" #include "SIMachineFunctionInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/IR/MDBuilder.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include <algorithm> @@ -32,12 +34,37 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenSubtargetInfo.inc" +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#undef AMDGPUSubtarget +#include "R600GenSubtargetInfo.inc" -AMDGPUSubtarget::~AMDGPUSubtarget() = default; +GCNSubtarget::~GCNSubtarget() = default; + +R600Subtarget & +R600Subtarget::initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS) { + SmallString<256> FullFS("+promote-alloca,+dx10-clamp,"); + FullFS += FS; + ParseSubtargetFeatures(GPU, FullFS); + + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere + // if someone tries to enable these? + if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { + FP32Denormals = false; + } + + HasMulU24 = getGeneration() >= EVERGREEN; + HasMulI24 = hasCaymanISA(); + + return *this; +} -AMDGPUSubtarget & -AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, +GCNSubtarget & +GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { // Determine default and user-specified characteristics // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be @@ -92,26 +119,43 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, HasMovrel = true; } + HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + return *this; } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) - : AMDGPUGenSubtargetInfo(TT, GPU, FS), +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, + const FeatureBitset &FeatureBits) : + TargetTriple(TT), + SubtargetFeatureBits(FeatureBits), + Has16BitInsts(false), + HasMadMixInsts(false), + FP32Denormals(false), + FPExceptions(false), + HasSDWA(false), + HasVOP3PInsts(false), + HasMulI24(true), + HasMulU24(true), + HasFminFmaxLegacy(true), + EnablePromoteAlloca(false), + LocalMemorySize(0), + WavefrontSize(0) + { } + +GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM) : + AMDGPUGenSubtargetInfo(TT, GPU, FS), + AMDGPUSubtarget(TT, getFeatureBits()), TargetTriple(TT), - Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), + Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), - WavefrontSize(0), - LocalMemorySize(0), LDSBankCount(0), MaxPrivateElementSize(0), FastFMAF32(false), HalfRate64Ops(false), - FP32Denormals(false), FP64FP16Denormals(false), - FPExceptions(false), DX10Clamp(false), FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), @@ -123,57 +167,56 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, EnableXNACK(false), TrapHandler(false), DebuggerInsertNops(false), - DebuggerReserveRegs(false), DebuggerEmitPrologue(false), EnableHugePrivateBuffer(false), EnableVGPRSpilling(false), - EnablePromoteAlloca(false), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), EnableSIScheduler(false), + EnableDS128(false), DumpCode(false), FP64(false), - FMA(false), - IsGCN(false), GCN3Encoding(false), CIInsts(false), GFX9Insts(false), SGPRInitBug(false), HasSMemRealTime(false), - Has16BitInsts(false), HasIntClamp(false), - HasVOP3PInsts(false), - HasMadMixInsts(false), + HasFmaMixInsts(false), HasMovrel(false), HasVGPRIndexMode(false), HasScalarStores(false), + HasScalarAtomics(false), HasInv2PiInlineImm(false), - HasSDWA(false), HasSDWAOmod(false), HasSDWAScalar(false), HasSDWASdst(false), HasSDWAMac(false), HasSDWAOutModsVOPC(false), HasDPP(false), + HasDLInsts(false), + D16PreservesUnusedBits(false), FlatAddressSpace(false), FlatInstOffsets(false), FlatGlobalInsts(false), FlatScratchInsts(false), AddNoCarryInsts(false), + HasUnpackedD16VMem(false), - R600ALUInst(false), - CaymanISA(false), - CFALUBug(false), - HasVertexCache(false), - TexVTXClauseSize(0), ScalarizeGlobal(false), FeatureDisable(false), - InstrItins(getInstrItineraryForCPU(GPU)) { + InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), + TLInfo(TM, *this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { AS = AMDGPU::getAMDGPUAS(TT); - initializeSubtargetDependencies(TT, GPU, FS); + CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM)); } unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, @@ -198,6 +241,12 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, return NumWaves; } +unsigned +AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { + const auto *MFI = MF.getInfo<SIMachineFunctionInfo>(); + return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction()); +} + std::pair<unsigned, unsigned> AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { switch (CC) { @@ -357,27 +406,64 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { return true; } -R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : - AMDGPUSubtarget(TT, GPU, FS, TM), - InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) {} +uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, + unsigned &MaxAlign) const { + assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || + F.getCallingConv() == CallingConv::SPIR_KERNEL); -SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) - : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) { - CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); - Legalizer.reset(new AMDGPULegalizerInfo()); + const DataLayout &DL = F.getParent()->getDataLayout(); + uint64_t ExplicitArgBytes = 0; + MaxAlign = 1; - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); - InstSelector.reset(new AMDGPUInstructionSelector( - *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); + for (const Argument &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + + unsigned Align = DL.getABITypeAlignment(ArgTy); + uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); + ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; + MaxAlign = std::max(MaxAlign, Align); + } + + return ExplicitArgBytes; } -void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, + unsigned &MaxAlign) const { + uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign); + + unsigned ExplicitOffset = getExplicitKernelArgOffset(F); + + uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; + unsigned ImplicitBytes = getImplicitArgNumBytes(F); + if (ImplicitBytes != 0) { + unsigned Alignment = getAlignmentForImplicitArgPtr(); + TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; + } + + // Being able to dereference past the end is useful for emitting scalar loads. + return alignTo(TotalSize, 4); +} + +R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) : + R600GenSubtargetInfo(TT, GPU, FS), + AMDGPUSubtarget(TT, getFeatureBits()), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + FMA(false), + CaymanISA(false), + CFALUBug(false), + DX10Clamp(false), + HasVertexCache(false), + R600ALUInst(false), + FP64(false), + TexVTXClauseSize(0), + Gen(R600), + TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), + InstrItins(getInstrItineraryForCPU(GPU)), + AS (AMDGPU::getAMDGPUAS(TT)) { } + +void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // Track register pressure so the scheduler can try to decrease // pressure once register usage is above the threshold defined by @@ -394,22 +480,12 @@ void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackLaneMasks = true; } -bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { +bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, - unsigned ExplicitArgBytes) const { - unsigned ImplicitBytes = getImplicitArgNumBytes(MF); - if (ImplicitBytes == 0) - return ExplicitArgBytes; - - unsigned Alignment = getAlignmentForImplicitArgPtr(); - return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; -} - -unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { - if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { +unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) return 10; if (SGPRs <= 88) @@ -431,7 +507,7 @@ unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { return 5; } -unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { +unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { if (VGPRs <= 24) return 10; if (VGPRs <= 28) @@ -453,7 +529,7 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { return 1; } -unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); if (MFI.hasFlatScratchInit()) { if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) @@ -467,7 +543,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { return 2; // VCC. } -unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); @@ -517,7 +593,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { MaxAddressableNumSGPRs); } -unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); @@ -532,10 +608,6 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { unsigned Requested = AMDGPU::getIntegerAttribute( F, "amdgpu-num-vgpr", MaxNumVGPRs); - // Make sure requested value does not violate subtarget's specifications. - if (Requested && Requested <= getReservedNumVGPRs(MF)) - Requested = 0; - // Make sure requested value is compatible with values implied by // default/requested minimum/maximum number of waves per execution unit. if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) @@ -548,7 +620,7 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { MaxNumVGPRs = Requested; } - return MaxNumVGPRs - getReservedNumVGPRs(MF); + return MaxNumVGPRs; } namespace { @@ -602,7 +674,21 @@ struct MemOpClusterMutation : ScheduleDAGMutation { }; } // namespace -void SISubtarget::getPostRAMutations( +void GCNSubtarget::getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); } + +const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { + if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) + return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>()); + else + return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>()); +} + +const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) { + if (TM.getTargetTriple().getArch() == Triple::amdgcn) + return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F)); + else + return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F)); +} |