aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-07-27 23:34:35 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-10-23 18:26:01 +0000
commit0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch)
tree6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
parent6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff)
parentac9a064cb179f3425b310fa2847f8764ac970a4d (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp242
1 files changed, 228 insertions, 14 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 25e628e5cbc5..4cda8b281370 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -9,39 +9,36 @@
#include "AMDGPUMemoryUtils.h"
#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/ReplaceConstant.h"
#define DEBUG_TYPE "amdgpu-memory-utils"
using namespace llvm;
-namespace llvm {
+namespace llvm::AMDGPU {
-namespace AMDGPU {
-
-Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
+Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
GV->getValueType());
}
bool isDynamicLDS(const GlobalVariable &GV) {
- // external zero size addrspace(3) without initializer implies cuda/hip extern
- // __shared__ the semantics for such a variable appears to be that all extern
- // __shared__ variables alias one another. This hits different handling.
+ // external zero size addrspace(3) without initializer is dynlds.
const Module *M = GV.getParent();
const DataLayout &DL = M->getDataLayout();
- if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
+ if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
return false;
- }
- uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
- return GV.hasExternalLinkage() && AllocSize == 0;
+ return DL.getTypeAllocSize(GV.getValueType()) == 0;
}
bool isLDSVariableToLower(const GlobalVariable &GV) {
@@ -65,6 +62,225 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
return true;
}
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
+ // global may have uses from multiple different functions as a result.
+ // This pass specialises LDS variables with respect to the kernel that
+ // allocates them.
+
+ // This is semantically equivalent to (the unimplemented as slow):
+ // for (auto &F : M.functions())
+ // for (auto &BB : F)
+ // for (auto &I : BB)
+ // for (Use &Op : I.operands())
+ // if (constantExprUsesLDS(Op))
+ // replaceConstantExprInFunction(I, Op);
+
+ SmallVector<Constant *> LDSGlobals;
+ for (auto &GV : M.globals())
+ if (AMDGPU::isLDSVariableToLower(GV))
+ LDSGlobals.push_back(&GV);
+ return convertUsersOfConstantsToInstructions(LDSGlobals);
+}
+
+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &Functions) {
+ // Get uses from the current function, excluding uses by called Functions
+ // Two output variables to avoid walking the globals list twice
+ for (auto &GV : M.globals()) {
+ if (!AMDGPU::isLDSVariableToLower(GV))
+ continue;
+ for (User *V : GV.users()) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *F = I->getFunction();
+ if (isKernelLDS(F))
+ kernels[F].insert(&GV);
+ else
+ Functions[F].insert(&GV);
+ }
+ }
+ }
+}
+
+bool isKernelLDS(const Function *F) {
+ // Some weirdness here. AMDGPU::isKernelCC does not call into
+ // AMDGPU::isKernel with the calling conv, it instead calls into
+ // isModuleEntryFunction which returns true for more calling conventions
+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
+ // There's also a test that checks that the LDS lowering does not hit on
+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
+ // Putting LDS in the name of the function to draw attention to this.
+ return AMDGPU::isKernel(F->getCallingConv());
+}
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
+
+ FunctionVariableMap DirectMapKernel;
+ FunctionVariableMap DirectMapFunction;
+ getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
+
+ // Collect variables that are used by functions whose address has escaped
+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
+ for (Function &F : M.functions()) {
+ if (!isKernelLDS(&F))
+ if (F.hasAddressTaken(nullptr,
+ /* IgnoreCallbackUses */ false,
+ /* IgnoreAssumeLikeCalls */ false,
+ /* IgnoreLLVMUsed */ true,
+ /* IgnoreArcAttachedCall */ false)) {
+ set_union(VariablesReachableThroughFunctionPointer,
+ DirectMapFunction[&F]);
+ }
+ }
+
+ auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
+ assert(!F->isDeclaration());
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ if (!R.second->getFunction())
+ return true;
+ }
+ return false;
+ };
+
+ // Work out which variables are reachable through function calls
+ FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
+
+ // If the function makes any unknown call, assume the worst case that it can
+ // access all variables accessed by functions whose address escaped
+ for (Function &F : M.functions()) {
+ if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
+ if (!isKernelLDS(&F)) {
+ set_union(TransitiveMapFunction[&F],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Direct implementation of collecting all variables reachable from each
+ // function
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || isKernelLDS(&Func))
+ continue;
+
+ DenseSet<Function *> seen; // catches cycles
+ SmallVector<Function *, 4> wip = {&Func};
+
+ while (!wip.empty()) {
+ Function *F = wip.pop_back_val();
+
+ // Can accelerate this by referring to transitive map for functions that
+ // have already been computed, with more care than this
+ set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
+
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ if (!seen.contains(Ith)) {
+ seen.insert(Ith);
+ wip.push_back(Ith);
+ }
+ }
+ }
+ }
+ }
+
+ // DirectMapKernel lists which variables are used by the kernel
+ // find the variables which are used through a function call
+ FunctionVariableMap IndirectMapKernel;
+
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
+
+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
+ } else {
+ set_union(IndirectMapKernel[&Func],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Verify that we fall into one of 2 cases:
+ // - All variables are either absolute
+ // or direct mapped dynamic LDS that is not lowered.
+ // this is a re-run of the pass
+ // so we don't have anything to do.
+ // - No variables are absolute.
+ std::optional<bool> HasAbsoluteGVs;
+ for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
+ for (auto &[Fn, GVs] : Map) {
+ for (auto *GV : GVs) {
+ bool IsAbsolute = GV->isAbsoluteSymbolRef();
+ bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
+ if (IsDirectMapDynLDSGV)
+ continue;
+ if (HasAbsoluteGVs.has_value()) {
+ if (*HasAbsoluteGVs != IsAbsolute) {
+ report_fatal_error(
+ "Module cannot mix absolute and non-absolute LDS GVs");
+ }
+ } else
+ HasAbsoluteGVs = IsAbsolute;
+ }
+ }
+ }
+
+ // If we only had absolute GVs, we have nothing to do, return an empty
+ // result.
+ if (HasAbsoluteGVs && *HasAbsoluteGVs)
+ return {FunctionVariableMap(), FunctionVariableMap()};
+
+ return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
+}
+
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+ ArrayRef<StringRef> FnAttrs) {
+ for (StringRef Attr : FnAttrs)
+ KernelRoot->removeFnAttr(Attr);
+
+ SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
+ SmallPtrSet<Function *, 8> Visited;
+ bool SeenUnknownCall = false;
+
+ while (!WorkList.empty()) {
+ Function *F = WorkList.pop_back_val();
+
+ for (auto &CallRecord : *CG[F]) {
+ if (!CallRecord.second)
+ continue;
+
+ Function *Callee = CallRecord.second->getFunction();
+ if (!Callee) {
+ if (!SeenUnknownCall) {
+ SeenUnknownCall = true;
+
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee =
+ ExternalCallRecord.second->getFunction();
+ assert(PotentialCallee);
+ if (!isKernelLDS(PotentialCallee)) {
+ for (StringRef Attr : FnAttrs)
+ PotentialCallee->removeFnAttr(Attr);
+ }
+ }
+ }
+ } else {
+ for (StringRef Attr : FnAttrs)
+ Callee->removeFnAttr(Attr);
+ if (Visited.insert(Callee).second)
+ WorkList.push_back(Callee);
+ }
+ }
+ }
+}
+
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
Instruction *DefInst = Def->getMemoryInst();
@@ -153,6 +369,4 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
return false;
}
-} // end namespace AMDGPU
-
-} // end namespace llvm
+} // end namespace llvm::AMDGPU