diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-04 19:20:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-08 19:02:26 +0000 |
commit | 81ad626541db97eb356e2c1d4a20eb2a26a766ab (patch) | |
tree | 311b6a8987c32b1e1dcbab65c54cfac3fdb56175 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | |
parent | 5fff09660e06a66bed6482da9c70df328e16bbb6 (diff) | |
parent | 145449b1e420787bb99721a429341fa6be3adfb6 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 88 |
1 files changed, 79 insertions, 9 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a2c61f9da8da..1c6b9d35695a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" #include "AMDGPUExportClustering.h" +#include "AMDGPUIGroupLP.h" #include "AMDGPUMacroFusion.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" @@ -27,6 +28,7 @@ #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -56,6 +58,7 @@ #include "llvm/Transforms/Vectorize.h" using namespace llvm; +using namespace llvm::PatternMatch; namespace { class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> { @@ -269,12 +272,22 @@ static cl::opt<bool> EnableSIModeRegisterPass( cl::init(true), cl::Hidden); +// Enable GFX11+ s_delay_alu insertion +static cl::opt<bool> + EnableInsertDelayAlu("amdgpu-enable-delay-alu", + cl::desc("Enable s_delay_alu insertion"), + cl::init(true), cl::Hidden); + // Option is used in lit tests to prevent deadcoding of patterns inspected. static cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")); +static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority", + cl::desc("Adjust wave priority"), + cl::init(false), cl::Hidden); + static cl::opt<bool> EnableScalarIRPasses( "amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), @@ -330,7 +343,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); - initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUCtorDtorLoweringPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAttributorPass(*PR); @@ -357,6 +369,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteOutArgumentsPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowPass(*PR); + initializeAMDGPUReleaseVGPRsPass(*PR); + initializeAMDGPUInsertDelayAluPass(*PR); initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); initializeSIModeRegisterPass(*PR); @@ -390,9 +404,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); ScheduleDAGMILive *DAG = new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createIGroupLPDAGMutation()); + DAG->addMutation(createSchedBarrierDAGMutation()); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); DAG->addMutation(createAMDGPUExportClusteringDAGMutation()); return DAG; @@ -400,9 +419,12 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -413,9 +435,12 @@ static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C) { + const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); auto DAG = new GCNIterativeScheduler(C, GCNIterativeScheduler::SCHEDULE_ILP); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -801,6 +826,23 @@ AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); } +unsigned +AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const { + switch (Kind) { + case PseudoSourceValue::Stack: + case PseudoSourceValue::FixedStack: + return AMDGPUAS::PRIVATE_ADDRESS; + case PseudoSourceValue::ConstantPool: + case PseudoSourceValue::GOT: + case PseudoSourceValue::JumpTable: + case PseudoSourceValue::GlobalValueCallEntry: + case PseudoSourceValue::ExternalSymbolCallEntry: + case PseudoSourceValue::TargetCustom: + return AMDGPUAS::CONSTANT_ADDRESS; + } + return AMDGPUAS::FLAT_ADDRESS; +} + //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// @@ -836,7 +878,7 @@ GCNTargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -GCNTargetMachine::getTargetTransformInfo(const Function &F) { +GCNTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(GCNTTIImpl(this, F)); } @@ -873,7 +915,11 @@ public: ScheduleDAGMI *DAG = createGenericSchedPostRA(C); const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII)); + DAG->addMutation(createIGroupLPDAGMutation()); + DAG->addMutation(createSchedBarrierDAGMutation()); return DAG; } @@ -953,10 +999,6 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPUPrintfRuntimeBinding()); addPass(createAMDGPUCtorDtorLoweringPass()); - // This must occur before inlining, as the inliner will not look through - // bitcast calls. - addPass(createAMDGPUFixFunctionBitcastsPass()); - // A call to propagate attributes pass in the backend in case opt was not run. addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); @@ -967,7 +1009,7 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAlwaysInlinerLegacyPass()); // We need to add the barrier noop pass, otherwise adding the function // inlining pass will cause all of the PassConfigs passes to be run - // one function at a time, which means if we have a nodule with two + // one function at a time, which means if we have a module with two // functions, then we will generate code for the first function // without ever running any passes on the second. addPass(createBarrierNoopPass()); @@ -1079,8 +1121,11 @@ bool AMDGPUPassConfig::addGCPasses() { llvm::ScheduleDAGInstrs * AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const { + const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + if (ST.shouldClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -1363,6 +1408,8 @@ void GCNPassConfig::addPreEmitPass() { addPass(&SIInsertHardClausesID); addPass(&SILateBranchLoweringPassID); + if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less)) + addPass(createAMDGPUSetWavePriorityPass()); if (getOptLevel() > CodeGenOpt::None) addPass(&SIPreEmitPeepholeID); // The hazard recognizer that runs as part of the post-ra scheduler does not @@ -1374,6 +1421,13 @@ void GCNPassConfig::addPreEmitPass() { // Here we add a stand-alone hazard recognizer pass which can handle all // cases. addPass(&PostRAHazardRecognizerID); + + if (getOptLevel() > CodeGenOpt::Less) + addPass(&AMDGPUReleaseVGPRsID); + + if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less)) + addPass(&AMDGPUInsertDelayAluID); + addPass(&BranchRelaxationPassID); } @@ -1396,7 +1450,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { const yaml::SIMachineFunctionInfo &YamlMFI = - reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_); + static_cast<const yaml::SIMachineFunctionInfo &>(MFI_); MachineFunction &MF = PFS.MF; SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); @@ -1420,6 +1474,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return false; }; + auto parseOptionalRegister = [&](const yaml::StringValue &RegName, + Register &RegVal) { + return !RegName.Value.empty() && parseRegister(RegName, RegVal); + }; + + if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy)) + return true; + auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) { // Create a diagnostic for a the register string literal. const MemoryBuffer &Buffer = @@ -1452,6 +1514,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg); } + for (const auto &YamlReg : YamlMFI.WWMReservedRegs) { + Register ParsedReg; + if (parseRegister(YamlReg, ParsedReg)) + return true; + + MFI->reserveWWMRegister(ParsedReg); + } + auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A, const TargetRegisterClass &RC, ArgDescriptor &Arg, unsigned UserSGPRs, @@ -1473,7 +1543,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( Arg = ArgDescriptor::createStack(A->StackOffset); // Check and apply the optional mask. if (A->Mask) - Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue()); + Arg = ArgDescriptor::createArg(Arg, *A->Mask); MFI->NumUserSGPRs += UserSGPRs; MFI->NumSystemSGPRs += SystemSGPRs; |