diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:03:47 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:04:23 +0000 |
| commit | 7fa27ce4a07f19b07799a767fc29416f3b625afb (patch) | |
| tree | 27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | |
| parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) | |
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 211 |
1 files changed, 97 insertions, 114 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index a6a32b98f44c..44bbfe6f13d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -10,8 +10,11 @@ #include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -41,6 +44,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, // Assume the attribute allocates before any known GDS globals. StaticGDSSize = GDSSize; + // Second value, if present, is the maximum value that can be assigned. + // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics + // during codegen. + std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( + F, "amdgpu-lds-size", {0, UINT32_MAX}, true); + + // The two separate variables are only profitable when the LDS module lowering + // pass is disabled. If graphics does not use dynamic LDS, this is never + // profitable. Leaving cleanup for a later change. + LDSSize = LDSSizeRange.first; + StaticLDSSize = LDSSize; + CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); @@ -63,6 +78,42 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, unsigned Offset; if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + + std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); + if (MaybeAbs) { + // Absolute address LDS variables that exist prior to the LDS lowering + // pass raise a fatal error in that pass. These failure modes are only + // reachable if that lowering pass is disabled or broken. If/when adding + // support for absolute addresses on user specified variables, the + // alignment check moves to the lowering pass and the frame calculation + // needs to take the user variables into consideration. + + uint32_t ObjectStart = *MaybeAbs; + + if (ObjectStart != alignTo(ObjectStart, Alignment)) { + report_fatal_error("Absolute address LDS variable inconsistent with " + "variable alignment"); + } + + if (isModuleEntryFunction()) { + // If this is a module entry function, we can also sanity check against + // the static frame. Strictly it would be better to check against the + // attribute, i.e. that the variable is within the always-allocated + // section, and not within some other non-absolute-address object + // allocated here, but the extra error detection is minimal and we would + // have to pass the Function around or cache the attribute value. + uint32_t ObjectEnd = + ObjectStart + DL.getTypeAllocSize(GV.getValueType()); + if (ObjectEnd > StaticLDSSize) { + report_fatal_error( + "Absolute address LDS variable outside of static frame"); + } + } + + Entry.first->second = ObjectStart; + return ObjectStart; + } + /// TODO: We should sort these to minimize wasted space due to alignment /// padding. Currently the padding is decided by the first encountered use /// during lowering. @@ -87,135 +138,54 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, return Offset; } -static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; - -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - -const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( - const GlobalVariable &GV) { - const Module &M = *GV.getParent(); - StringRef N(GV.getName()); - if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { - return M.getFunction(N); - } - return nullptr; -} - -const GlobalVariable * -AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { +static const GlobalVariable * +getKernelDynLDSGlobalFromFunction(const Function &F) { const Module *M = F.getParent(); - std::string KernelLDSName = "llvm.amdgcn.kernel."; - KernelLDSName += F.getName(); - KernelLDSName += ".lds"; - return M->getNamedGlobal(KernelLDSName); + std::string KernelDynLDSName = "llvm.amdgcn."; + KernelDynLDSName += F.getName(); + KernelDynLDSName += ".dynlds"; + return M->getNamedGlobal(KernelDynLDSName); } -// This kernel calls no functions that require the module lds struct -static bool canElideModuleLDS(const Function &F) { - return F.hasFnAttribute("amdgpu-elide-module-lds"); -} - -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); +std::optional<uint32_t> +AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { + // TODO: Would be more consistent with the abs symbols to use a range + MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + if (MD && MD->getNumOperands() == 1) { + if (ConstantInt *KnownSize = + mdconst::extract<ConstantInt>(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return ZExt; + } + } } - - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; + return {}; } -void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { - const Module *M = F.getParent(); - - // This function is called before allocating any other LDS so that it can - // reliably put values at known addresses. Consequently, dynamic LDS, if - // present, will not yet have been allocated - - assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); - - if (isModuleEntryFunction()) { - - // Pointer values start from zero, memory allocated per-kernel-launch - // Variables can be grouped into a module level struct and a struct per - // kernel function by AMDGPULowerModuleLDSPass. If that is done, they - // are allocated at statically computable addresses here. - // - // Address 0 - // { - // llvm.amdgcn.module.lds - // } - // alignment padding - // { - // llvm.amdgcn.kernel.some-name.lds - // } - // other variables, e.g. dynamic lds, allocated after this call +std::optional<uint32_t> +AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { + if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + return {}; - const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); - const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); + std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); + if (!AbsSymRange) + return {}; - if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); - unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); - } - - if (KV) { - // The per-kernel offset is deterministic because it is allocated - // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); - unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + if (const APInt *V = AbsSymRange->getSingleElement()) { + std::optional<uint64_t> ZExt = V->tryZExtValue(); + if (ZExt && (*ZExt <= UINT32_MAX)) { + return *ZExt; } } -} -std::optional<uint32_t> -AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { - auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); - if (MD && MD->getNumOperands() == 1) { - ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); - if (KnownSize) { - uint64_t V = KnownSize->getZExtValue(); - if (V <= UINT32_MAX) { - return V; - } - } - } return {}; } -void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, +void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, const GlobalVariable &GV) { + const Module *M = F.getParent(); + const DataLayout &DL = M->getDataLayout(); assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); Align Alignment = @@ -225,4 +195,17 @@ void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, LDSSize = alignTo(StaticLDSSize, Alignment); DynLDSAlign = Alignment; + + // If there is a dynamic LDS variable associated with this function F, every + // further dynamic LDS instance (allocated by calling setDynLDSAlign) must + // map to the same address. This holds because no LDS is allocated after the + // lowering pass if there are dynamic LDS variables present. + const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); + if (Dyn) { + unsigned Offset = LDSSize; // return this? + std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); + if (!Expect || (Offset != *Expect)) { + report_fatal_error("Inconsistent metadata on dynamic LDS variable"); + } + } } |
