aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-04 19:20:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-08 19:02:26 +0000
commit81ad626541db97eb356e2c1d4a20eb2a26a766ab (patch)
tree311b6a8987c32b1e1dcbab65c54cfac3fdb56175 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent5fff09660e06a66bed6482da9c70df328e16bbb6 (diff)
parent145449b1e420787bb99721a429341fa6be3adfb6 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp88
1 files changed, 79 insertions, 9 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a2c61f9da8da..1c6b9d35695a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUExportClustering.h"
+#include "AMDGPUIGroupLP.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
@@ -27,6 +28,7 @@
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -56,6 +58,7 @@
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace {
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
@@ -269,12 +272,22 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Enable GFX11+ s_delay_alu insertion
+static cl::opt<bool>
+ EnableInsertDelayAlu("amdgpu-enable-delay-alu",
+ cl::desc("Enable s_delay_alu insertion"),
+ cl::init(true), cl::Hidden);
+
// Option is used in lit tests to prevent deadcoding of patterns inspected.
static cl::opt<bool>
EnableDCEInRA("amdgpu-dce-in-ra",
cl::init(true), cl::Hidden,
cl::desc("Enable machine DCE inside regalloc"));
+static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
+ cl::desc("Adjust wave priority"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> EnableScalarIRPasses(
"amdgpu-scalar-ir-passes",
cl::desc("Enable scalar IR passes"),
@@ -330,7 +343,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSIOptimizeVGPRLiveRangePass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
- initializeAMDGPUFixFunctionBitcastsPass(*PR);
initializeAMDGPUCtorDtorLoweringPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAttributorPass(*PR);
@@ -357,6 +369,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeAMDGPUReleaseVGPRsPass(*PR);
+ initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
@@ -390,9 +404,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createSchedBarrierDAGMutation());
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
@@ -400,9 +419,12 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -413,9 +435,12 @@ static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_ILP);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;
}
@@ -801,6 +826,23 @@ AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
return std::make_pair(nullptr, -1);
}
+unsigned
+AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
+ switch (Kind) {
+ case PseudoSourceValue::Stack:
+ case PseudoSourceValue::FixedStack:
+ return AMDGPUAS::PRIVATE_ADDRESS;
+ case PseudoSourceValue::ConstantPool:
+ case PseudoSourceValue::GOT:
+ case PseudoSourceValue::JumpTable:
+ case PseudoSourceValue::GlobalValueCallEntry:
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ case PseudoSourceValue::TargetCustom:
+ return AMDGPUAS::CONSTANT_ADDRESS;
+ }
+ return AMDGPUAS::FLAT_ADDRESS;
+}
+
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//
@@ -836,7 +878,7 @@ GCNTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-GCNTargetMachine::getTargetTransformInfo(const Function &F) {
+GCNTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(GCNTTIImpl(this, F));
}
@@ -873,7 +915,11 @@ public:
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
+ DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createSchedBarrierDAGMutation());
return DAG;
}
@@ -953,10 +999,6 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPUPrintfRuntimeBinding());
addPass(createAMDGPUCtorDtorLoweringPass());
- // This must occur before inlining, as the inliner will not look through
- // bitcast calls.
- addPass(createAMDGPUFixFunctionBitcastsPass());
-
// A call to propagate attributes pass in the backend in case opt was not run.
addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
@@ -967,7 +1009,7 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAlwaysInlinerLegacyPass());
// We need to add the barrier noop pass, otherwise adding the function
// inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
+ // one function at a time, which means if we have a module with two
// functions, then we will generate code for the first function
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
@@ -1079,8 +1121,11 @@ bool AMDGPUPassConfig::addGCPasses() {
llvm::ScheduleDAGInstrs *
AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -1363,6 +1408,8 @@ void GCNPassConfig::addPreEmitPass() {
addPass(&SIInsertHardClausesID);
addPass(&SILateBranchLoweringPassID);
+ if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
+ addPass(createAMDGPUSetWavePriorityPass());
if (getOptLevel() > CodeGenOpt::None)
addPass(&SIPreEmitPeepholeID);
// The hazard recognizer that runs as part of the post-ra scheduler does not
@@ -1374,6 +1421,13 @@ void GCNPassConfig::addPreEmitPass() {
// Here we add a stand-alone hazard recognizer pass which can handle all
// cases.
addPass(&PostRAHazardRecognizerID);
+
+ if (getOptLevel() > CodeGenOpt::Less)
+ addPass(&AMDGPUReleaseVGPRsID);
+
+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
+ addPass(&AMDGPUInsertDelayAluID);
+
addPass(&BranchRelaxationPassID);
}
@@ -1396,7 +1450,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange) const {
const yaml::SIMachineFunctionInfo &YamlMFI =
- reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
+ static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
MachineFunction &MF = PFS.MF;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1420,6 +1474,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return false;
};
+ auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
+ Register &RegVal) {
+ return !RegName.Value.empty() && parseRegister(RegName, RegVal);
+ };
+
+ if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
+ return true;
+
auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
// Create a diagnostic for a the register string literal.
const MemoryBuffer &Buffer =
@@ -1452,6 +1514,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
}
+ for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
+ Register ParsedReg;
+ if (parseRegister(YamlReg, ParsedReg))
+ return true;
+
+ MFI->reserveWWMRegister(ParsedReg);
+ }
+
auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
const TargetRegisterClass &RC,
ArgDescriptor &Arg, unsigned UserSGPRs,
@@ -1473,7 +1543,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
Arg = ArgDescriptor::createStack(A->StackOffset);
// Check and apply the optional mask.
if (A->Mask)
- Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
+ Arg = ArgDescriptor::createArg(Arg, *A->Mask);
MFI->NumUserSGPRs += UserSGPRs;
MFI->NumSystemSGPRs += SystemSGPRs;