src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2024-07-27 23:34:35 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-10-23 18:26:01 +0000
commit	0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch)
tree	6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
parent	6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff)
parent	ac9a064cb179f3425b310fa2847f8764ac970a4d (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp

242

1 files changed, 228 insertions, 14 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 25e628e5cbc5..4cda8b281370 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp

@@ -9,39 +9,36 @@

#include "AMDGPUMemoryUtils.h"

#include "AMDGPU.h"

#include "AMDGPUBaseInfo.h"

+#include "llvm/ADT/SetOperations.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Analysis/CallGraph.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

+#include "llvm/IR/Operator.h"

#include "llvm/IR/ReplaceConstant.h"

#define DEBUG_TYPE "amdgpu-memory-utils"

using namespace llvm;

-namespace llvm {

+namespace llvm::AMDGPU {

-namespace AMDGPU {

-Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {

+Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {

return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),

GV->getValueType());

}

bool isDynamicLDS(const GlobalVariable &GV) {

- // external zero size addrspace(3) without initializer implies cuda/hip extern

- // __shared__ the semantics for such a variable appears to be that all extern

- // __shared__ variables alias one another. This hits different handling.

+ // external zero size addrspace(3) without initializer is dynlds.

const Module *M = GV.getParent();

const DataLayout &DL = M->getDataLayout();

- if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {

+ if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)

return false;

- }

- uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());

- return GV.hasExternalLinkage() && AllocSize == 0;

+ return DL.getTypeAllocSize(GV.getValueType()) == 0;

}

bool isLDSVariableToLower(const GlobalVariable &GV) {

@@ -65,6 +62,225 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {

return true;

}

+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {

+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS

+ // global may have uses from multiple different functions as a result.

+ // This pass specialises LDS variables with respect to the kernel that

+ // allocates them.

+ // This is semantically equivalent to (the unimplemented as slow):

+ // for (auto &F : M.functions())

+ // for (auto &BB : F)

+ // for (auto &I : BB)

+ // for (Use &Op : I.operands())

+ // if (constantExprUsesLDS(Op))

+ // replaceConstantExprInFunction(I, Op);

+ SmallVector<Constant *> LDSGlobals;

+ for (auto &GV : M.globals())

+ if (AMDGPU::isLDSVariableToLower(GV))

+ LDSGlobals.push_back(&GV);

+ return convertUsersOfConstantsToInstructions(LDSGlobals);

+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,

+ FunctionVariableMap &kernels,

+ FunctionVariableMap &Functions) {

+ // Get uses from the current function, excluding uses by called Functions

+ // Two output variables to avoid walking the globals list twice

+ for (auto &GV : M.globals()) {

+ if (!AMDGPU::isLDSVariableToLower(GV))

+ continue;

+ for (User *V : GV.users()) {

+ if (auto *I = dyn_cast<Instruction>(V)) {

+ Function *F = I->getFunction();

+ if (isKernelLDS(F))

+ kernels[F].insert(&GV);

+ else

+ Functions[F].insert(&GV);

+ }

+bool isKernelLDS(const Function *F) {

+ // Some weirdness here. AMDGPU::isKernelCC does not call into

+ // AMDGPU::isKernel with the calling conv, it instead calls into

+ // isModuleEntryFunction which returns true for more calling conventions

+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.

+ // There's also a test that checks that the LDS lowering does not hit on

+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.

+ // Putting LDS in the name of the function to draw attention to this.

+ return AMDGPU::isKernel(F->getCallingConv());

+LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {

+ FunctionVariableMap DirectMapKernel;

+ FunctionVariableMap DirectMapFunction;

+ getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);

+ // Collect variables that are used by functions whose address has escaped

+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;

+ for (Function &F : M.functions()) {

+ if (!isKernelLDS(&F))

+ if (F.hasAddressTaken(nullptr,

+ /* IgnoreCallbackUses */ false,

+ /* IgnoreAssumeLikeCalls */ false,

+ /* IgnoreLLVMUsed */ true,

+ /* IgnoreArcAttachedCall */ false)) {

+ set_union(VariablesReachableThroughFunctionPointer,

+ DirectMapFunction[&F]);

+ }

+ auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {

+ assert(!F->isDeclaration());

+ for (const CallGraphNode::CallRecord &R : *CG[F]) {

+ if (!R.second->getFunction())

+ return true;

+ }

+ return false;

+ };

+ // Work out which variables are reachable through function calls

+ FunctionVariableMap TransitiveMapFunction = DirectMapFunction;

+ // If the function makes any unknown call, assume the worst case that it can

+ // access all variables accessed by functions whose address escaped

+ for (Function &F : M.functions()) {

+ if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {

+ if (!isKernelLDS(&F)) {

+ set_union(TransitiveMapFunction[&F],

+ VariablesReachableThroughFunctionPointer);

+ }

+ // Direct implementation of collecting all variables reachable from each

+ // function

+ for (Function &Func : M.functions()) {

+ if (Func.isDeclaration() || isKernelLDS(&Func))

+ continue;

+ DenseSet<Function *> seen; // catches cycles

+ SmallVector<Function *, 4> wip = {&Func};

+ while (!wip.empty()) {

+ Function *F = wip.pop_back_val();

+ // Can accelerate this by referring to transitive map for functions that

+ // have already been computed, with more care than this

+ set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);

+ for (const CallGraphNode::CallRecord &R : *CG[F]) {

+ Function *Ith = R.second->getFunction();

+ if (Ith) {

+ if (!seen.contains(Ith)) {

+ seen.insert(Ith);

+ wip.push_back(Ith);

+ }

+ // DirectMapKernel lists which variables are used by the kernel

+ // find the variables which are used through a function call

+ FunctionVariableMap IndirectMapKernel;

+ for (Function &Func : M.functions()) {

+ if (Func.isDeclaration() || !isKernelLDS(&Func))

+ continue;

+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {

+ Function *Ith = R.second->getFunction();

+ if (Ith) {

+ set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);

+ } else {

+ set_union(IndirectMapKernel[&Func],

+ VariablesReachableThroughFunctionPointer);

+ }

+ // Verify that we fall into one of 2 cases:

+ // - All variables are either absolute

+ // or direct mapped dynamic LDS that is not lowered.

+ // this is a re-run of the pass

+ // so we don't have anything to do.

+ // - No variables are absolute.

+ std::optional<bool> HasAbsoluteGVs;

+ for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {

+ for (auto &[Fn, GVs] : Map) {

+ for (auto *GV : GVs) {

+ bool IsAbsolute = GV->isAbsoluteSymbolRef();

+ bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);

+ if (IsDirectMapDynLDSGV)

+ continue;

+ if (HasAbsoluteGVs.has_value()) {

+ if (*HasAbsoluteGVs != IsAbsolute) {

+ report_fatal_error(

+ "Module cannot mix absolute and non-absolute LDS GVs");

+ }

+ } else

+ HasAbsoluteGVs = IsAbsolute;

+ }

+ // If we only had absolute GVs, we have nothing to do, return an empty

+ // result.

+ if (HasAbsoluteGVs && *HasAbsoluteGVs)

+ return {FunctionVariableMap(), FunctionVariableMap()};

+ return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};

+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,

+ ArrayRef<StringRef> FnAttrs) {

+ for (StringRef Attr : FnAttrs)

+ KernelRoot->removeFnAttr(Attr);

+ SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};

+ SmallPtrSet<Function *, 8> Visited;

+ bool SeenUnknownCall = false;

+ while (!WorkList.empty()) {

+ Function *F = WorkList.pop_back_val();

+ for (auto &CallRecord : *CG[F]) {

+ if (!CallRecord.second)

+ continue;

+ Function *Callee = CallRecord.second->getFunction();

+ if (!Callee) {

+ if (!SeenUnknownCall) {

+ SeenUnknownCall = true;

+ // If we see any indirect calls, assume nothing about potential

+ // targets.

+ // TODO: This could be refined to possible LDS global users.

+ for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {

+ Function *PotentialCallee =

+ ExternalCallRecord.second->getFunction();

+ assert(PotentialCallee);

+ if (!isKernelLDS(PotentialCallee)) {

+ for (StringRef Attr : FnAttrs)

+ PotentialCallee->removeFnAttr(Attr);

+ }

+ } else {

+ for (StringRef Attr : FnAttrs)

+ Callee->removeFnAttr(Attr);

+ if (Visited.insert(Callee).second)

+ WorkList.push_back(Callee);

+ }

bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {

Instruction *DefInst = Def->getMemoryInst();

@@ -153,6 +369,4 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,

return false;

}

-} // end namespace AMDGPU

-} // end namespace llvm

+} // end namespace llvm::AMDGPU