summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp172
1 files changed, 117 insertions, 55 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 7796176290108..80feaa44766f9 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -15,17 +15,15 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
-#endif
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
using namespace llvm;
@@ -50,14 +48,27 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
+
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+ FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere
+ // if someone tries to enable these?
+ if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ FullFS += "+fp64-fp16-denormals,";
+ } else {
+ FullFS += "-fp32-denormals,";
+ }
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
+ // We don't support FP64 for EG/NI atm.
+ assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
+
// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
// variants of MUBUF instructions.
@@ -65,45 +76,24 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
FlatForGlobal = true;
}
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP64FP16Denormals = false;
- FP32Denormals = false;
- }
-
// Set defaults if needed.
if (MaxPrivateElementSize == 0)
MaxPrivateElementSize = 4;
- return *this;
-}
+ if (LDSBankCount == 0)
+ LDSBankCount = 32;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
+ if (TT.getArch() == Triple::amdgcn) {
+ if (LocalMemorySize == 0)
+ LocalMemorySize = 32768;
-struct SIGISelActualAccessor : public GISelAccessor {
- std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- const AMDGPUCallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
+ // Do something sensible for unspecified target.
+ if (!HasMovrel && !HasVGPRIndexMode)
+ HasMovrel = true;
}
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-} // end anonymous namespace
-#endif
+ return *this;
+}
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM)
@@ -111,7 +101,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
TargetTriple(TT),
Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
IsaVersion(ISAVersion0_0_0),
- WavefrontSize(64),
+ WavefrontSize(0),
LocalMemorySize(0),
LDSBankCount(0),
MaxPrivateElementSize(0),
@@ -125,6 +115,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DX10Clamp(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
+ CodeObjectV3(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
@@ -135,6 +126,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
+ EnableHugePrivateBuffer(false),
EnableVGPRSpilling(false),
EnablePromoteAlloca(false),
EnableLoadStoreOpt(false),
@@ -143,15 +135,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DumpCode(false),
FP64(false),
+ FMA(false),
IsGCN(false),
- GCN1Encoding(false),
GCN3Encoding(false),
CIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
+ HasIntClamp(false),
HasVOP3PInsts(false),
+ HasMadMixInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
@@ -167,6 +161,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
+ AddNoCarryInsts(false),
R600ALUInst(false),
CaymanISA(false),
@@ -203,14 +198,31 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
return NumWaves;
}
+std::pair<unsigned, unsigned>
+AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
+ switch (CC) {
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ return std::make_pair(1, getWavefrontSize());
+ default:
+ return std::make_pair(1, 16 * getWavefrontSize());
+ }
+}
+
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
const Function &F) const {
+ // FIXME: 1024 if function.
// Default minimum/maximum flat work group sizes.
std::pair<unsigned, unsigned> Default =
- AMDGPU::isCompute(F.getCallingConv()) ?
- std::pair<unsigned, unsigned>(getWavefrontSize() * 2,
- getWavefrontSize() * 4) :
- std::pair<unsigned, unsigned>(1, getWavefrontSize());
+ getDefaultFlatWorkGroupSize(F.getCallingConv());
// TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
// starts using "amdgpu-flat-work-group-size" attribute.
@@ -357,18 +369,12 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
: AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
TLInfo(TM, *this) {
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
- GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
- GISel->Legalizer.reset(new AMDGPULegalizerInfo());
+ CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
+ Legalizer.reset(new AMDGPULegalizerInfo());
- GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
- GISel->InstSelector.reset(new AMDGPUInstructionSelector(
- *this, *static_cast<AMDGPURegisterBankInfo *>(GISel->RegBankInfo.get())));
-#endif
- setGISelAccessor(*GISel);
+ RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ InstSelector.reset(new AMDGPUInstructionSelector(
+ *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
}
void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
@@ -462,7 +468,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
}
unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of SGPRs function can use using default/requested
@@ -512,7 +518,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
}
unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
// Compute maximum number of VGPRs function can use using default/requested
@@ -544,3 +550,59 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return MaxNumVGPRs - getReservedNumVGPRs(MF);
}
+
+namespace {
+struct MemOpClusterMutation : ScheduleDAGMutation {
+ const SIInstrInfo *TII;
+
+ MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ SUnit *SUa = nullptr;
+ // Search for two consequent memory operations and link them
+ // to prevent scheduler from moving them apart.
+ // In DAG pre-process SUnits are in the original order of
+ // the instructions before scheduling.
+ for (SUnit &SU : DAG->SUnits) {
+ MachineInstr &MI2 = *SU.getInstr();
+ if (!MI2.mayLoad() && !MI2.mayStore()) {
+ SUa = nullptr;
+ continue;
+ }
+ if (!SUa) {
+ SUa = &SU;
+ continue;
+ }
+
+ MachineInstr &MI1 = *SUa->getInstr();
+ if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
+ (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
+ (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
+ (TII->isDS(MI1) && TII->isDS(MI2))) {
+ SU.addPredBarrier(SUa);
+
+ for (const SDep &SI : SU.Preds) {
+ if (SI.getSUnit() != SUa)
+ SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
+ }
+
+ if (&SU != &DAG->ExitSU) {
+ for (const SDep &SI : SUa->Succs) {
+ if (SI.getSUnit() != &SU)
+ SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
+ }
+ }
+ }
+
+ SUa = &SU;
+ }
+ }
+};
+} // namespace
+
+void SISubtarget::getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+ Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
+}