aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-07-26 19:03:47 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-07-26 19:04:23 +0000
commit7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp211
1 files changed, 97 insertions, 114 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index a6a32b98f44c..44bbfe6f13d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -10,8 +10,11 @@
#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -41,6 +44,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
// Assume the attribute allocates before any known GDS globals.
StaticGDSSize = GDSSize;
+ // Second value, if present, is the maximum value that can be assigned.
+ // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
+ // during codegen.
+ std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
+ F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
+
+ // The two separate variables are only profitable when the LDS module lowering
+ // pass is disabled. If graphics does not use dynamic LDS, this is never
+ // profitable. Leaving cleanup for a later change.
+ LDSSize = LDSSizeRange.first;
+ StaticLDSSize = LDSSize;
+
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
@@ -63,6 +78,42 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
unsigned Offset;
if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+
+ std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
+ if (MaybeAbs) {
+ // Absolute address LDS variables that exist prior to the LDS lowering
+ // pass raise a fatal error in that pass. These failure modes are only
+ // reachable if that lowering pass is disabled or broken. If/when adding
+ // support for absolute addresses on user specified variables, the
+ // alignment check moves to the lowering pass and the frame calculation
+ // needs to take the user variables into consideration.
+
+ uint32_t ObjectStart = *MaybeAbs;
+
+ if (ObjectStart != alignTo(ObjectStart, Alignment)) {
+ report_fatal_error("Absolute address LDS variable inconsistent with "
+ "variable alignment");
+ }
+
+ if (isModuleEntryFunction()) {
+ // If this is a module entry function, we can also sanity check against
+ // the static frame. Strictly it would be better to check against the
+ // attribute, i.e. that the variable is within the always-allocated
+ // section, and not within some other non-absolute-address object
+ // allocated here, but the extra error detection is minimal and we would
+ // have to pass the Function around or cache the attribute value.
+ uint32_t ObjectEnd =
+ ObjectStart + DL.getTypeAllocSize(GV.getValueType());
+ if (ObjectEnd > StaticLDSSize) {
+ report_fatal_error(
+ "Absolute address LDS variable outside of static frame");
+ }
+ }
+
+ Entry.first->second = ObjectStart;
+ return ObjectStart;
+ }
+
/// TODO: We should sort these to minimize wasted space due to alignment
/// padding. Currently the padding is decided by the first encountered use
/// during lowering.
@@ -87,135 +138,54 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
return Offset;
}
-static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
-
-bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
- auto name = GV.getName();
- return (name == ModuleLDSName) ||
- (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
-}
-
-const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
- const GlobalVariable &GV) {
- const Module &M = *GV.getParent();
- StringRef N(GV.getName());
- if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
- return M.getFunction(N);
- }
- return nullptr;
-}
-
-const GlobalVariable *
-AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
+static const GlobalVariable *
+getKernelDynLDSGlobalFromFunction(const Function &F) {
const Module *M = F.getParent();
- std::string KernelLDSName = "llvm.amdgcn.kernel.";
- KernelLDSName += F.getName();
- KernelLDSName += ".lds";
- return M->getNamedGlobal(KernelLDSName);
+ std::string KernelDynLDSName = "llvm.amdgcn.";
+ KernelDynLDSName += F.getName();
+ KernelDynLDSName += ".dynlds";
+ return M->getNamedGlobal(KernelDynLDSName);
}
-// This kernel calls no functions that require the module lds struct
-static bool canElideModuleLDS(const Function &F) {
- return F.hasFnAttribute("amdgpu-elide-module-lds");
-}
-
-unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
- const GlobalVariable &GV) {
- // module.lds, then alignment padding, then kernel.lds, then other variables
- // if any
-
- assert(isKnownAddressLDSGlobal(GV));
- unsigned Offset = 0;
-
- if (GV.getName() == ModuleLDSName) {
- return 0;
- }
-
- const Module *M = GV.getParent();
- const DataLayout &DL = M->getDataLayout();
-
- const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
- const Function *f = getKernelLDSFunctionFromGlobal(GV);
-
- // Account for module.lds if allocated for this function
- if (GVM && f && !canElideModuleLDS(*f)) {
- // allocator aligns this to var align, but it's zero to begin with
- Offset += DL.getTypeAllocSize(GVM->getValueType());
+std::optional<uint32_t>
+AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
+ // TODO: Would be more consistent with the abs symbols to use a range
+ MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+ if (MD && MD->getNumOperands() == 1) {
+ if (ConstantInt *KnownSize =
+ mdconst::extract<ConstantInt>(MD->getOperand(0))) {
+ uint64_t ZExt = KnownSize->getZExtValue();
+ if (ZExt <= UINT32_MAX) {
+ return ZExt;
+ }
+ }
}
-
- // No dynamic LDS alignment done by allocateModuleLDSGlobal
- Offset = alignTo(
- Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
-
- return Offset;
+ return {};
}
-void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
- const Module *M = F.getParent();
-
- // This function is called before allocating any other LDS so that it can
- // reliably put values at known addresses. Consequently, dynamic LDS, if
- // present, will not yet have been allocated
-
- assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
-
- if (isModuleEntryFunction()) {
-
- // Pointer values start from zero, memory allocated per-kernel-launch
- // Variables can be grouped into a module level struct and a struct per
- // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
- // are allocated at statically computable addresses here.
- //
- // Address 0
- // {
- // llvm.amdgcn.module.lds
- // }
- // alignment padding
- // {
- // llvm.amdgcn.kernel.some-name.lds
- // }
- // other variables, e.g. dynamic lds, allocated after this call
+std::optional<uint32_t>
+AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
+ if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ return {};
- const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
- const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
+ std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
+ if (!AbsSymRange)
+ return {};
- if (GV && !canElideModuleLDS(F)) {
- assert(isKnownAddressLDSGlobal(*GV));
- unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
- "Module LDS expected to be allocated before other LDS");
- }
-
- if (KV) {
- // The per-kernel offset is deterministic because it is allocated
- // before any other non-module LDS variables.
- assert(isKnownAddressLDSGlobal(*KV));
- unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
- "Kernel LDS expected to be immediately after module LDS");
+ if (const APInt *V = AbsSymRange->getSingleElement()) {
+ std::optional<uint64_t> ZExt = V->tryZExtValue();
+ if (ZExt && (*ZExt <= UINT32_MAX)) {
+ return *ZExt;
}
}
-}
-std::optional<uint32_t>
-AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
- auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
- if (MD && MD->getNumOperands() == 1) {
- ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
- if (KnownSize) {
- uint64_t V = KnownSize->getZExtValue();
- if (V <= UINT32_MAX) {
- return V;
- }
- }
- }
return {};
}
-void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
+void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
const GlobalVariable &GV) {
+ const Module *M = F.getParent();
+ const DataLayout &DL = M->getDataLayout();
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
Align Alignment =
@@ -225,4 +195,17 @@ void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
LDSSize = alignTo(StaticLDSSize, Alignment);
DynLDSAlign = Alignment;
+
+ // If there is a dynamic LDS variable associated with this function F, every
+ // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
+ // map to the same address. This holds because no LDS is allocated after the
+ // lowering pass if there are dynamic LDS variables present.
+ const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
+ if (Dyn) {
+ unsigned Offset = LDSSize; // return this?
+ std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
+ if (!Expect || (Offset != *Expect)) {
+ report_fatal_error("Inconsistent metadata on dynamic LDS variable");
+ }
+ }
}