src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2022-07-04 19:20:19 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-02-08 19:02:26 +0000
commit	81ad626541db97eb356e2c1d4a20eb2a26a766ab (patch)
tree	311b6a8987c32b1e1dcbab65c54cfac3fdb56175 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent	5fff09660e06a66bed6482da9c70df328e16bbb6 (diff)
parent	145449b1e420787bb99721a429341fa6be3adfb6 (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

1 files changed, 79 insertions, 9 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a2c61f9da8da..1c6b9d35695a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

@@ -16,6 +16,7 @@

#include "AMDGPU.h"

#include "AMDGPUAliasAnalysis.h"

#include "AMDGPUExportClustering.h"

+#include "AMDGPUIGroupLP.h"

#include "AMDGPUMacroFusion.h"

#include "AMDGPUTargetObjectFile.h"

#include "AMDGPUTargetTransformInfo.h"

@@ -27,6 +28,7 @@

#include "SIMachineScheduler.h"

#include "TargetInfo/AMDGPUTargetInfo.h"

#include "llvm/Analysis/CGSCCPassManager.h"

+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"

#include "llvm/CodeGen/GlobalISel/IRTranslator.h"

#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"

#include "llvm/CodeGen/GlobalISel/Legalizer.h"

@@ -56,6 +58,7 @@

#include "llvm/Transforms/Vectorize.h"

using namespace llvm;

+using namespace llvm::PatternMatch;

namespace {

class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {

@@ -269,12 +272,22 @@ static cl::opt<bool> EnableSIModeRegisterPass(

cl::init(true),

cl::Hidden);

+// Enable GFX11+ s_delay_alu insertion

+static cl::opt<bool>

+ EnableInsertDelayAlu("amdgpu-enable-delay-alu",

+ cl::desc("Enable s_delay_alu insertion"),

+ cl::init(true), cl::Hidden);

// Option is used in lit tests to prevent deadcoding of patterns inspected.

static cl::opt<bool>

EnableDCEInRA("amdgpu-dce-in-ra",

cl::init(true), cl::Hidden,

cl::desc("Enable machine DCE inside regalloc"));

+static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",

+ cl::desc("Adjust wave priority"),

+ cl::init(false), cl::Hidden);

static cl::opt<bool> EnableScalarIRPasses(

"amdgpu-scalar-ir-passes",

cl::desc("Enable scalar IR passes"),

@@ -330,7 +343,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {

initializeSIOptimizeExecMaskingPreRAPass(*PR);

initializeSIOptimizeVGPRLiveRangePass(*PR);

initializeSILoadStoreOptimizerPass(*PR);

- initializeAMDGPUFixFunctionBitcastsPass(*PR);

initializeAMDGPUCtorDtorLoweringPass(*PR);

initializeAMDGPUAlwaysInlinePass(*PR);

initializeAMDGPUAttributorPass(*PR);

@@ -357,6 +369,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {

initializeAMDGPURewriteOutArgumentsPass(*PR);

initializeAMDGPUUnifyMetadataPass(*PR);

initializeSIAnnotateControlFlowPass(*PR);

+ initializeAMDGPUReleaseVGPRsPass(*PR);

+ initializeAMDGPUInsertDelayAluPass(*PR);

initializeSIInsertHardClausesPass(*PR);

initializeSIInsertWaitcntsPass(*PR);

initializeSIModeRegisterPass(*PR);

@@ -390,9 +404,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {

static ScheduleDAGInstrs *

createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {

+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();

ScheduleDAGMILive *DAG =

new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));

DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

+ if (ST.shouldClusterStores())

+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));

+ DAG->addMutation(createIGroupLPDAGMutation());

+ DAG->addMutation(createSchedBarrierDAGMutation());

DAG->addMutation(createAMDGPUMacroFusionDAGMutation());

DAG->addMutation(createAMDGPUExportClusteringDAGMutation());

return DAG;

@@ -400,9 +419,12 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {

static ScheduleDAGInstrs *

createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {

+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();

auto DAG = new GCNIterativeScheduler(C,

GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);

DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

+ if (ST.shouldClusterStores())

+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));

return DAG;

}

@@ -413,9 +435,12 @@ static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {

static ScheduleDAGInstrs *

createIterativeILPMachineScheduler(MachineSchedContext *C) {

+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();

auto DAG = new GCNIterativeScheduler(C,

GCNIterativeScheduler::SCHEDULE_ILP);

DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

+ if (ST.shouldClusterStores())

+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));

DAG->addMutation(createAMDGPUMacroFusionDAGMutation());

return DAG;

}

@@ -801,6 +826,23 @@ AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {

return std::make_pair(nullptr, -1);

}

+unsigned

+AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {

+ switch (Kind) {

+ case PseudoSourceValue::Stack:

+ case PseudoSourceValue::FixedStack:

+ return AMDGPUAS::PRIVATE_ADDRESS;

+ case PseudoSourceValue::ConstantPool:

+ case PseudoSourceValue::GOT:

+ case PseudoSourceValue::JumpTable:

+ case PseudoSourceValue::GlobalValueCallEntry:

+ case PseudoSourceValue::ExternalSymbolCallEntry:

+ case PseudoSourceValue::TargetCustom:

+ return AMDGPUAS::CONSTANT_ADDRESS;

+ }

+ return AMDGPUAS::FLAT_ADDRESS;

//===----------------------------------------------------------------------===//

// GCN Target Machine (SI+)

//===----------------------------------------------------------------------===//

@@ -836,7 +878,7 @@ GCNTargetMachine::getSubtargetImpl(const Function &F) const {

}

TargetTransformInfo

-GCNTargetMachine::getTargetTransformInfo(const Function &F) {

+GCNTargetMachine::getTargetTransformInfo(const Function &F) const {

return TargetTransformInfo(GCNTTIImpl(this, F));

}

@@ -873,7 +915,11 @@ public:

ScheduleDAGMI *DAG = createGenericSchedPostRA(C);

const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();

DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

+ if (ST.shouldClusterStores())

+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));

DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));

+ DAG->addMutation(createIGroupLPDAGMutation());

+ DAG->addMutation(createSchedBarrierDAGMutation());

return DAG;

}

@@ -953,10 +999,6 @@ void AMDGPUPassConfig::addIRPasses() {

addPass(createAMDGPUPrintfRuntimeBinding());

addPass(createAMDGPUCtorDtorLoweringPass());

- // This must occur before inlining, as the inliner will not look through

- // bitcast calls.

- addPass(createAMDGPUFixFunctionBitcastsPass());

// A call to propagate attributes pass in the backend in case opt was not run.

addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));

@@ -967,7 +1009,7 @@ void AMDGPUPassConfig::addIRPasses() {

addPass(createAlwaysInlinerLegacyPass());

// We need to add the barrier noop pass, otherwise adding the function

// inlining pass will cause all of the PassConfigs passes to be run

- // one function at a time, which means if we have a nodule with two

+ // one function at a time, which means if we have a module with two

// functions, then we will generate code for the first function

// without ever running any passes on the second.

addPass(createBarrierNoopPass());

@@ -1079,8 +1121,11 @@ bool AMDGPUPassConfig::addGCPasses() {

llvm::ScheduleDAGInstrs *

AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {

+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();

ScheduleDAGMILive *DAG = createGenericSchedLive(C);

DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

+ if (ST.shouldClusterStores())

+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));

return DAG;

}

@@ -1363,6 +1408,8 @@ void GCNPassConfig::addPreEmitPass() {

addPass(&SIInsertHardClausesID);

addPass(&SILateBranchLoweringPassID);

+ if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))

+ addPass(createAMDGPUSetWavePriorityPass());

if (getOptLevel() > CodeGenOpt::None)

addPass(&SIPreEmitPeepholeID);

// The hazard recognizer that runs as part of the post-ra scheduler does not

@@ -1374,6 +1421,13 @@ void GCNPassConfig::addPreEmitPass() {

// Here we add a stand-alone hazard recognizer pass which can handle all

// cases.

addPass(&PostRAHazardRecognizerID);

+ if (getOptLevel() > CodeGenOpt::Less)

+ addPass(&AMDGPUReleaseVGPRsID);

+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))

+ addPass(&AMDGPUInsertDelayAluID);

addPass(&BranchRelaxationPassID);

}

@@ -1396,7 +1450,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(

const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,

SMDiagnostic &Error, SMRange &SourceRange) const {

const yaml::SIMachineFunctionInfo &YamlMFI =

- reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);

+ static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);

MachineFunction &MF = PFS.MF;

SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

@@ -1420,6 +1474,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(

return false;

};

+ auto parseOptionalRegister = [&](const yaml::StringValue &RegName,

+ Register &RegVal) {

+ return !RegName.Value.empty() && parseRegister(RegName, RegVal);

+ };

+ if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))

+ return true;

auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {

// Create a diagnostic for a the register string literal.

const MemoryBuffer &Buffer =

@@ -1452,6 +1514,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(

return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);

}

+ for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {

+ Register ParsedReg;

+ if (parseRegister(YamlReg, ParsedReg))

+ return true;

+ MFI->reserveWWMRegister(ParsedReg);

+ }

auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,

const TargetRegisterClass &RC,

ArgDescriptor &Arg, unsigned UserSGPRs,

@@ -1473,7 +1543,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(

Arg = ArgDescriptor::createStack(A->StackOffset);

// Check and apply the optional mask.

if (A->Mask)

- Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());

+ Arg = ArgDescriptor::createArg(Arg, *A->Mask);

MFI->NumUserSGPRs += UserSGPRs;

MFI->NumSystemSGPRs += SystemSGPRs;