diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
| commit | 145449b1e420787bb99721a429341fa6be3adfb6 (patch) | |
| tree | 1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | |
| parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) | |
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 27 |
1 files changed, 21 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 6e2b5dc471bc..35922341de26 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -14,7 +14,7 @@ // known address. AMDGPUMachineFunction allocates the LDS global. // // Local variables with constant annotation or non-undef initializer are passed -// through unchanged for simplication or error diagnostics in later passes. +// through unchanged for simplification or error diagnostics in later passes. // // To reduce the memory overhead variables that are only used by kernels are // excluded from this transform. The analysis to determine whether a variable @@ -28,8 +28,9 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "Utils/AMDGPULDSUtils.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -163,9 +164,10 @@ public: } bool runOnModule(Module &M) override { + CallGraph CG = CallGraph(M); UsedList = getUsedList(M); bool Changed = superAlignLDSGlobals(M); - Changed |= processUsedLDS(M); + Changed |= processUsedLDS(CG, M); for (Function &F : M.functions()) { if (F.isDeclaration()) @@ -174,7 +176,7 @@ public: // Only lower compute kernels' LDS. if (!AMDGPU::isKernel(F.getCallingConv())) continue; - Changed |= processUsedLDS(M, &F); + Changed |= processUsedLDS(CG, M, &F); } UsedList.clear(); @@ -226,7 +228,7 @@ private: return Changed; } - bool processUsedLDS(Module &M, Function *F = nullptr) { + bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); @@ -374,7 +376,20 @@ private: IRBuilder<> Builder(Ctx); for (Function &Func : M.functions()) { if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) { - markUsedByKernel(Builder, &Func, SGV); + const CallGraphNode *N = CG[&Func]; + const bool CalleesRequireModuleLDS = N->size() > 0; + + if (CalleesRequireModuleLDS) { + // If a function this kernel might call requires module LDS, + // annotate the kernel to let later passes know it will allocate + // this structure, even if not apparent from the IR. + markUsedByKernel(Builder, &Func, SGV); + } else { + // However if we are certain this kernel cannot call a function that + // requires module LDS, annotate the kernel so the backend can elide + // the allocation without repeating callgraph walks. + Func.addFnAttr("amdgpu-elide-module-lds"); + } } } } |
