diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-07-27 23:34:35 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-10-23 18:26:01 +0000 |
commit | 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch) | |
tree | 6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | |
parent | 6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff) | |
parent | ac9a064cb179f3425b310fa2847f8764ac970a4d (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 242 |
1 files changed, 228 insertions, 14 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp index 25e628e5cbc5..4cda8b281370 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp @@ -9,39 +9,36 @@ #include "AMDGPUMemoryUtils.h" #include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ReplaceConstant.h" #define DEBUG_TYPE "amdgpu-memory-utils" using namespace llvm; -namespace llvm { +namespace llvm::AMDGPU { -namespace AMDGPU { - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { +Align getAlign(const DataLayout &DL, const GlobalVariable *GV) { return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), GV->getValueType()); } bool isDynamicLDS(const GlobalVariable &GV) { - // external zero size addrspace(3) without initializer implies cuda/hip extern - // __shared__ the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another. This hits different handling. + // external zero size addrspace(3) without initializer is dynlds. const Module *M = GV.getParent(); const DataLayout &DL = M->getDataLayout(); - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) return false; - } - uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); - return GV.hasExternalLinkage() && AllocSize == 0; + return DL.getTypeAllocSize(GV.getValueType()) == 0; } bool isLDSVariableToLower(const GlobalVariable &GV) { @@ -65,6 +62,225 @@ bool isLDSVariableToLower(const GlobalVariable &GV) { return true; } +bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) { + // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS + // global may have uses from multiple different functions as a result. + // This pass specialises LDS variables with respect to the kernel that + // allocates them. + + // This is semantically equivalent to (the unimplemented as slow): + // for (auto &F : M.functions()) + // for (auto &BB : F) + // for (auto &I : BB) + // for (Use &Op : I.operands()) + // if (constantExprUsesLDS(Op)) + // replaceConstantExprInFunction(I, Op); + + SmallVector<Constant *> LDSGlobals; + for (auto &GV : M.globals()) + if (AMDGPU::isLDSVariableToLower(GV)) + LDSGlobals.push_back(&GV); + return convertUsersOfConstantsToInstructions(LDSGlobals); +} + +void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, + FunctionVariableMap &kernels, + FunctionVariableMap &Functions) { + // Get uses from the current function, excluding uses by called Functions + // Two output variables to avoid walking the globals list twice + for (auto &GV : M.globals()) { + if (!AMDGPU::isLDSVariableToLower(GV)) + continue; + for (User *V : GV.users()) { + if (auto *I = dyn_cast<Instruction>(V)) { + Function *F = I->getFunction(); + if (isKernelLDS(F)) + kernels[F].insert(&GV); + else + Functions[F].insert(&GV); + } + } + } +} + +bool isKernelLDS(const Function *F) { + // Some weirdness here. AMDGPU::isKernelCC does not call into + // AMDGPU::isKernel with the calling conv, it instead calls into + // isModuleEntryFunction which returns true for more calling conventions + // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel. + // There's also a test that checks that the LDS lowering does not hit on + // a graphics shader, denoted amdgpu_ps, so stay with the limited case. + // Putting LDS in the name of the function to draw attention to this. + return AMDGPU::isKernel(F->getCallingConv()); +} + +LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) { + + FunctionVariableMap DirectMapKernel; + FunctionVariableMap DirectMapFunction; + getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction); + + // Collect variables that are used by functions whose address has escaped + DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer; + for (Function &F : M.functions()) { + if (!isKernelLDS(&F)) + if (F.hasAddressTaken(nullptr, + /* IgnoreCallbackUses */ false, + /* IgnoreAssumeLikeCalls */ false, + /* IgnoreLLVMUsed */ true, + /* IgnoreArcAttachedCall */ false)) { + set_union(VariablesReachableThroughFunctionPointer, + DirectMapFunction[&F]); + } + } + + auto FunctionMakesUnknownCall = [&](const Function *F) -> bool { + assert(!F->isDeclaration()); + for (const CallGraphNode::CallRecord &R : *CG[F]) { + if (!R.second->getFunction()) + return true; + } + return false; + }; + + // Work out which variables are reachable through function calls + FunctionVariableMap TransitiveMapFunction = DirectMapFunction; + + // If the function makes any unknown call, assume the worst case that it can + // access all variables accessed by functions whose address escaped + for (Function &F : M.functions()) { + if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) { + if (!isKernelLDS(&F)) { + set_union(TransitiveMapFunction[&F], + VariablesReachableThroughFunctionPointer); + } + } + } + + // Direct implementation of collecting all variables reachable from each + // function + for (Function &Func : M.functions()) { + if (Func.isDeclaration() || isKernelLDS(&Func)) + continue; + + DenseSet<Function *> seen; // catches cycles + SmallVector<Function *, 4> wip = {&Func}; + + while (!wip.empty()) { + Function *F = wip.pop_back_val(); + + // Can accelerate this by referring to transitive map for functions that + // have already been computed, with more care than this + set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]); + + for (const CallGraphNode::CallRecord &R : *CG[F]) { + Function *Ith = R.second->getFunction(); + if (Ith) { + if (!seen.contains(Ith)) { + seen.insert(Ith); + wip.push_back(Ith); + } + } + } + } + } + + // DirectMapKernel lists which variables are used by the kernel + // find the variables which are used through a function call + FunctionVariableMap IndirectMapKernel; + + for (Function &Func : M.functions()) { + if (Func.isDeclaration() || !isKernelLDS(&Func)) + continue; + + for (const CallGraphNode::CallRecord &R : *CG[&Func]) { + Function *Ith = R.second->getFunction(); + if (Ith) { + set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]); + } else { + set_union(IndirectMapKernel[&Func], + VariablesReachableThroughFunctionPointer); + } + } + } + + // Verify that we fall into one of 2 cases: + // - All variables are either absolute + // or direct mapped dynamic LDS that is not lowered. + // this is a re-run of the pass + // so we don't have anything to do. + // - No variables are absolute. + std::optional<bool> HasAbsoluteGVs; + for (auto &Map : {DirectMapKernel, IndirectMapKernel}) { + for (auto &[Fn, GVs] : Map) { + for (auto *GV : GVs) { + bool IsAbsolute = GV->isAbsoluteSymbolRef(); + bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn); + if (IsDirectMapDynLDSGV) + continue; + if (HasAbsoluteGVs.has_value()) { + if (*HasAbsoluteGVs != IsAbsolute) { + report_fatal_error( + "Module cannot mix absolute and non-absolute LDS GVs"); + } + } else + HasAbsoluteGVs = IsAbsolute; + } + } + } + + // If we only had absolute GVs, we have nothing to do, return an empty + // result. + if (HasAbsoluteGVs && *HasAbsoluteGVs) + return {FunctionVariableMap(), FunctionVariableMap()}; + + return {std::move(DirectMapKernel), std::move(IndirectMapKernel)}; +} + +void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, + ArrayRef<StringRef> FnAttrs) { + for (StringRef Attr : FnAttrs) + KernelRoot->removeFnAttr(Attr); + + SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()}; + SmallPtrSet<Function *, 8> Visited; + bool SeenUnknownCall = false; + + while (!WorkList.empty()) { + Function *F = WorkList.pop_back_val(); + + for (auto &CallRecord : *CG[F]) { + if (!CallRecord.second) + continue; + + Function *Callee = CallRecord.second->getFunction(); + if (!Callee) { + if (!SeenUnknownCall) { + SeenUnknownCall = true; + + // If we see any indirect calls, assume nothing about potential + // targets. + // TODO: This could be refined to possible LDS global users. + for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) { + Function *PotentialCallee = + ExternalCallRecord.second->getFunction(); + assert(PotentialCallee); + if (!isKernelLDS(PotentialCallee)) { + for (StringRef Attr : FnAttrs) + PotentialCallee->removeFnAttr(Attr); + } + } + } + } else { + for (StringRef Attr : FnAttrs) + Callee->removeFnAttr(Attr); + if (Visited.insert(Callee).second) + WorkList.push_back(Callee); + } + } + } +} + bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { Instruction *DefInst = Def->getMemoryInst(); @@ -153,6 +369,4 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, return false; } -} // end namespace AMDGPU - -} // end namespace llvm +} // end namespace llvm::AMDGPU |