diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-03-20 11:40:34 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-05-14 11:43:05 +0000 |
commit | 349cc55c9796c4596a5b9904cd3281af295f878f (patch) | |
tree | 410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | |
parent | cb2ae6163174b90e999326ecec3699ee093a5d43 (diff) | |
parent | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 75 |
1 files changed, 60 insertions, 15 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 70ecea8dbc3e..12d6d35a6917 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -118,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass { // The llvm.amdgcn.module.lds instance is implicitly used by all kernels // that might call a function which accesses a field within it. This is // presently approximated to 'all kernels' if there are any such functions - // in the module. This implicit use is reified as an explicit use here so + // in the module. This implicit use is redefined as an explicit use here so // that later passes, specifically PromoteAlloca, account for the required // memory without any knowledge of this transform. @@ -162,6 +163,9 @@ public: bool Changed = processUsedLDS(M); for (Function &F : M.functions()) { + if (F.isDeclaration()) + continue; + // Only lower compute kernels' LDS. if (!AMDGPU::isKernel(F.getCallingConv())) continue; @@ -282,6 +286,21 @@ private: // so remove the variables from these lists before replaceAllUsesWith removeFromUsedLists(M, LocalVars); + // Create alias.scope and their lists. Each field in the new structure + // does not alias with all other fields. + SmallVector<MDNode *> AliasScopes; + SmallVector<Metadata *> NoAliasList; + if (LocalVars.size() > 1) { + MDBuilder MDB(Ctx); + AliasScopes.reserve(LocalVars.size()); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain(); + for (size_t I = 0; I < LocalVars.size(); I++) { + MDNode *Scope = MDB.createAnonymousAliasScope(Domain); + AliasScopes.push_back(Scope); + } + NoAliasList.append(&AliasScopes[1], AliasScopes.end()); + } + // Replace uses of ith variable with a constantexpr to the ith field of the // instance that will be allocated by AMDGPUMachineFunction Type *I32 = Type::getInt32Ty(Ctx); @@ -313,7 +332,15 @@ private: uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I); Align A = commonAlignment(StructAlign, Off); - refineUsesAlignment(GEP, A, DL); + + if (I) + NoAliasList[I - 1] = AliasScopes[I - 1]; + MDNode *NoAlias = + NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList); + MDNode *AliasScope = + AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]}); + + refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias); } // Mark kernels with asm that reads the address of the allocated structure @@ -323,23 +350,39 @@ private: if (!F) { IRBuilder<> Builder(Ctx); SmallPtrSet<Function *, 32> Kernels; - for (auto &I : M.functions()) { - Function *Func = &I; - if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) { - markUsedByKernel(Builder, Func, SGV); - Kernels.insert(Func); + for (Function &Func : M.functions()) { + if (Func.isDeclaration()) + continue; + + if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) { + markUsedByKernel(Builder, &Func, SGV); + Kernels.insert(&Func); } } } return true; } - void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL, - unsigned MaxDepth = 5) { - if (!MaxDepth || A == 1) + void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL, + MDNode *AliasScope, MDNode *NoAlias, + unsigned MaxDepth = 5) { + if (!MaxDepth || (A == 1 && !AliasScope)) return; for (User *U : Ptr->users()) { + if (auto *I = dyn_cast<Instruction>(U)) { + if (AliasScope && I->mayReadOrWriteMemory()) { + MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope); + AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope) + : AliasScope); + I->setMetadata(LLVMContext::MD_alias_scope, AS); + + MDNode *NA = I->getMetadata(LLVMContext::MD_noalias); + NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias); + I->setMetadata(LLVMContext::MD_noalias, NA); + } + } + if (auto *LI = dyn_cast<LoadInst>(U)) { LI->setAlignment(std::max(A, LI->getAlign())); continue; @@ -364,17 +407,19 @@ private: if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt Off(BitWidth, 0); - if (GEP->getPointerOperand() == Ptr && - GEP->accumulateConstantOffset(DL, Off)) { - Align GA = commonAlignment(A, Off.getLimitedValue()); - refineUsesAlignment(GEP, GA, DL, MaxDepth - 1); + if (GEP->getPointerOperand() == Ptr) { + Align GA; + if (GEP->accumulateConstantOffset(DL, Off)) + GA = commonAlignment(A, Off.getLimitedValue()); + refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias, + MaxDepth - 1); } continue; } if (auto *I = dyn_cast<Instruction>(U)) { if (I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::AddrSpaceCast) - refineUsesAlignment(I, A, DL, MaxDepth - 1); + refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1); } } } |