aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 11:40:34 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-05-14 11:43:05 +0000
commit349cc55c9796c4596a5b9904cd3281af295f878f (patch)
tree410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
parentcb2ae6163174b90e999326ecec3699ee093a5d43 (diff)
parentc0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp75
1 files changed, 60 insertions, 15 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 70ecea8dbc3e..12d6d35a6917 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -118,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
// that might call a function which accesses a field within it. This is
// presently approximated to 'all kernels' if there are any such functions
- // in the module. This implicit use is reified as an explicit use here so
+ // in the module. This implicit use is redefined as an explicit use here so
// that later passes, specifically PromoteAlloca, account for the required
// memory without any knowledge of this transform.
@@ -162,6 +163,9 @@ public:
bool Changed = processUsedLDS(M);
for (Function &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
+
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
@@ -282,6 +286,21 @@ private:
// so remove the variables from these lists before replaceAllUsesWith
removeFromUsedLists(M, LocalVars);
+ // Create alias.scope and their lists. Each field in the new structure
+ // does not alias with all other fields.
+ SmallVector<MDNode *> AliasScopes;
+ SmallVector<Metadata *> NoAliasList;
+ if (LocalVars.size() > 1) {
+ MDBuilder MDB(Ctx);
+ AliasScopes.reserve(LocalVars.size());
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
+ for (size_t I = 0; I < LocalVars.size(); I++) {
+ MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
+ AliasScopes.push_back(Scope);
+ }
+ NoAliasList.append(&AliasScopes[1], AliasScopes.end());
+ }
+
// Replace uses of ith variable with a constantexpr to the ith field of the
// instance that will be allocated by AMDGPUMachineFunction
Type *I32 = Type::getInt32Ty(Ctx);
@@ -313,7 +332,15 @@ private:
uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
Align A = commonAlignment(StructAlign, Off);
- refineUsesAlignment(GEP, A, DL);
+
+ if (I)
+ NoAliasList[I - 1] = AliasScopes[I - 1];
+ MDNode *NoAlias =
+ NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
+ MDNode *AliasScope =
+ AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
+
+ refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
}
// Mark kernels with asm that reads the address of the allocated structure
@@ -323,23 +350,39 @@ private:
if (!F) {
IRBuilder<> Builder(Ctx);
SmallPtrSet<Function *, 32> Kernels;
- for (auto &I : M.functions()) {
- Function *Func = &I;
- if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) {
- markUsedByKernel(Builder, Func, SGV);
- Kernels.insert(Func);
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration())
+ continue;
+
+ if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) {
+ markUsedByKernel(Builder, &Func, SGV);
+ Kernels.insert(&Func);
}
}
}
return true;
}
- void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
- unsigned MaxDepth = 5) {
- if (!MaxDepth || A == 1)
+ void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
+ MDNode *AliasScope, MDNode *NoAlias,
+ unsigned MaxDepth = 5) {
+ if (!MaxDepth || (A == 1 && !AliasScope))
return;
for (User *U : Ptr->users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (AliasScope && I->mayReadOrWriteMemory()) {
+ MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
+ AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
+ : AliasScope);
+ I->setMetadata(LLVMContext::MD_alias_scope, AS);
+
+ MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
+ NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
+ I->setMetadata(LLVMContext::MD_noalias, NA);
+ }
+ }
+
if (auto *LI = dyn_cast<LoadInst>(U)) {
LI->setAlignment(std::max(A, LI->getAlign()));
continue;
@@ -364,17 +407,19 @@ private:
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt Off(BitWidth, 0);
- if (GEP->getPointerOperand() == Ptr &&
- GEP->accumulateConstantOffset(DL, Off)) {
- Align GA = commonAlignment(A, Off.getLimitedValue());
- refineUsesAlignment(GEP, GA, DL, MaxDepth - 1);
+ if (GEP->getPointerOperand() == Ptr) {
+ Align GA;
+ if (GEP->accumulateConstantOffset(DL, Off))
+ GA = commonAlignment(A, Off.getLimitedValue());
+ refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
+ MaxDepth - 1);
}
continue;
}
if (auto *I = dyn_cast<Instruction>(U)) {
if (I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::AddrSpaceCast)
- refineUsesAlignment(I, A, DL, MaxDepth - 1);
+ refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
}
}
}