diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
| -rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 172 |
1 files changed, 117 insertions, 55 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 7796176290108..80feaa44766f9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -15,17 +15,15 @@ #include "AMDGPUSubtarget.h" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL #include "AMDGPUCallLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPURegisterBankInfo.h" -#endif #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <algorithm> using namespace llvm; @@ -50,14 +48,27 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); + if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere + // if someone tries to enable these? + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + FullFS += "+fp64-fp16-denormals,"; + } else { + FullFS += "-fp32-denormals,"; + } FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); + // We don't support FP64 for EG/NI atm. + assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es // on VI and newer hardware to avoid assertion failures due to missing ADDR64 // variants of MUBUF instructions. @@ -65,45 +76,24 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, FlatForGlobal = true; } - // FIXME: I don't think think Evergreen has any useful support for - // denormals, but should be checked. Should we issue a warning somewhere - // if someone tries to enable these? - if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP64FP16Denormals = false; - FP32Denormals = false; - } - // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; - return *this; -} + if (LDSBankCount == 0) + LDSBankCount = 32; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { + if (TT.getArch() == Triple::amdgcn) { + if (LocalMemorySize == 0) + LocalMemorySize = 32768; -struct SIGISelActualAccessor : public GISelAccessor { - std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - const AMDGPUCallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); + // Do something sensible for unspecified target. + if (!HasMovrel && !HasVGPRIndexMode) + HasMovrel = true; } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; -} // end anonymous namespace -#endif + return *this; +} AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) @@ -111,7 +101,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, TargetTriple(TT), Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), IsaVersion(ISAVersion0_0_0), - WavefrontSize(64), + WavefrontSize(0), LocalMemorySize(0), LDSBankCount(0), MaxPrivateElementSize(0), @@ -125,6 +115,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DX10Clamp(false), FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), + CodeObjectV3(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), @@ -135,6 +126,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DebuggerReserveRegs(false), DebuggerEmitPrologue(false), + EnableHugePrivateBuffer(false), EnableVGPRSpilling(false), EnablePromoteAlloca(false), EnableLoadStoreOpt(false), @@ -143,15 +135,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DumpCode(false), FP64(false), + FMA(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), GFX9Insts(false), SGPRInitBug(false), HasSMemRealTime(false), Has16BitInsts(false), + HasIntClamp(false), HasVOP3PInsts(false), + HasMadMixInsts(false), HasMovrel(false), HasVGPRIndexMode(false), HasScalarStores(false), @@ -167,6 +161,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FlatInstOffsets(false), FlatGlobalInsts(false), FlatScratchInsts(false), + AddNoCarryInsts(false), R600ALUInst(false), CaymanISA(false), @@ -203,14 +198,31 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, return NumWaves; } +std::pair<unsigned, unsigned> +AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + return std::make_pair(1, getWavefrontSize()); + default: + return std::make_pair(1, 16 * getWavefrontSize()); + } +} + std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( const Function &F) const { + // FIXME: 1024 if function. // Default minimum/maximum flat work group sizes. std::pair<unsigned, unsigned> Default = - AMDGPU::isCompute(F.getCallingConv()) ? - std::pair<unsigned, unsigned>(getWavefrontSize() * 2, - getWavefrontSize() * 4) : - std::pair<unsigned, unsigned>(1, getWavefrontSize()); + getDefaultFlatWorkGroupSize(F.getCallingConv()); // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa // starts using "amdgpu-flat-work-group-size" attribute. @@ -357,18 +369,12 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), TLInfo(TM, *this) { -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); - GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); - GISel->Legalizer.reset(new AMDGPULegalizerInfo()); + CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + Legalizer.reset(new AMDGPULegalizerInfo()); - GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); - GISel->InstSelector.reset(new AMDGPUInstructionSelector( - *this, *static_cast<AMDGPURegisterBankInfo *>(GISel->RegBankInfo.get()))); -#endif - setGISelAccessor(*GISel); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()))); } void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, @@ -462,7 +468,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { } unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); // Compute maximum number of SGPRs function can use using default/requested @@ -512,7 +518,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { } unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); // Compute maximum number of VGPRs function can use using default/requested @@ -544,3 +550,59 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { return MaxNumVGPRs - getReservedNumVGPRs(MF); } + +namespace { +struct MemOpClusterMutation : ScheduleDAGMutation { + const SIInstrInfo *TII; + + MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + + SUnit *SUa = nullptr; + // Search for two consequent memory operations and link them + // to prevent scheduler from moving them apart. + // In DAG pre-process SUnits are in the original order of + // the instructions before scheduling. + for (SUnit &SU : DAG->SUnits) { + MachineInstr &MI2 = *SU.getInstr(); + if (!MI2.mayLoad() && !MI2.mayStore()) { + SUa = nullptr; + continue; + } + if (!SUa) { + SUa = &SU; + continue; + } + + MachineInstr &MI1 = *SUa->getInstr(); + if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || + (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || + (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || + (TII->isDS(MI1) && TII->isDS(MI2))) { + SU.addPredBarrier(SUa); + + for (const SDep &SI : SU.Preds) { + if (SI.getSUnit() != SUa) + SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); + } + + if (&SU != &DAG->ExitSU) { + for (const SDep &SI : SUa->Succs) { + if (SI.getSUnit() != &SU) + SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); + } + } + } + + SUa = &SU; + } + } +}; +} // namespace + +void SISubtarget::getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); +} |
