Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877. - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2022-07-03 14:10:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-07-03 14:10:23 +0000
commit	145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree	1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
parent	ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)

vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877

Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')

-rw-r--r--

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

1 files changed, 21 insertions, 6 deletions

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 6e2b5dc471bc..35922341de26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

@@ -14,7 +14,7 @@

// known address. AMDGPUMachineFunction allocates the LDS global.

// Local variables with constant annotation or non-undef initializer are passed

-// through unchanged for simplication or error diagnostics in later passes.

+// through unchanged for simplification or error diagnostics in later passes.

// To reduce the memory overhead variables that are only used by kernels are

// excluded from this transform. The analysis to determine whether a variable

@@ -28,8 +28,9 @@

#include "AMDGPU.h"

#include "Utils/AMDGPUBaseInfo.h"

-#include "Utils/AMDGPULDSUtils.h"

+#include "Utils/AMDGPUMemoryUtils.h"

#include "llvm/ADT/STLExtras.h"

+#include "llvm/Analysis/CallGraph.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/IRBuilder.h"

@@ -163,9 +164,10 @@ public:

}

bool runOnModule(Module &M) override {

+ CallGraph CG = CallGraph(M);

UsedList = getUsedList(M);

bool Changed = superAlignLDSGlobals(M);

- Changed |= processUsedLDS(M);

+ Changed |= processUsedLDS(CG, M);

for (Function &F : M.functions()) {

if (F.isDeclaration())

@@ -174,7 +176,7 @@ public:

// Only lower compute kernels' LDS.

if (!AMDGPU::isKernel(F.getCallingConv()))

continue;

- Changed |= processUsedLDS(M, &F);

+ Changed |= processUsedLDS(CG, M, &F);

}

UsedList.clear();

@@ -226,7 +228,7 @@ private:

return Changed;

}

- bool processUsedLDS(Module &M, Function *F = nullptr) {

+ bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) {

LLVMContext &Ctx = M.getContext();

const DataLayout &DL = M.getDataLayout();

@@ -374,7 +376,20 @@ private:

IRBuilder<> Builder(Ctx);

for (Function &Func : M.functions()) {

if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {

- markUsedByKernel(Builder, &Func, SGV);

+ const CallGraphNode *N = CG[&Func];

+ const bool CalleesRequireModuleLDS = N->size() > 0;

+ if (CalleesRequireModuleLDS) {

+ // If a function this kernel might call requires module LDS,

+ // annotate the kernel to let later passes know it will allocate

+ // this structure, even if not apparent from the IR.

+ markUsedByKernel(Builder, &Func, SGV);

+ } else {

+ // However if we are certain this kernel cannot call a function that

+ // requires module LDS, annotate the kernel so the backend can elide

+ // the allocation without repeating callgraph walks.

+ Func.addFnAttr("amdgpu-elide-module-lds");

+ }

}