diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 77 |
1 files changed, 53 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index e3a645977f92..d2a02143e4e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -177,6 +177,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/BitVector.h" @@ -184,8 +185,8 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -204,7 +205,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include <tuple> #include <vector> #include <cstdio> @@ -252,7 +252,8 @@ template <typename T> std::vector<T> sortByName(std::vector<T> &&V) { return {std::move(V)}; } -class AMDGPULowerModuleLDS : public ModulePass { +class AMDGPULowerModuleLDS { + const AMDGPUTargetMachine &TM; static void removeLocalVarsFromUsedLists(Module &M, @@ -291,7 +292,8 @@ class AMDGPULowerModuleLDS : public ModulePass { // equivalent target specific intrinsic which lasts until immediately after // codegen would suffice for that, but one would still need to ensure that // the variables are allocated in the anticpated order. - IRBuilder<> Builder(Func->getEntryBlock().getFirstNonPHI()); + BasicBlock *Entry = &Func->getEntryBlock(); + IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt()); Function *Decl = Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {}); @@ -326,11 +328,7 @@ class AMDGPULowerModuleLDS : public ModulePass { } public: - static char ID; - - AMDGPULowerModuleLDS() : ModulePass(ID) { - initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry()); - } + AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {} using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>; @@ -854,7 +852,7 @@ public: appendToCompilerUsed(M, {static_cast<GlobalValue *>( ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast<Constant>(ModuleScopeReplacement.SGV), - Type::getInt8PtrTy(Ctx)))}); + PointerType::getUnqual(Ctx)))}); // module.lds will be allocated at zero in any kernel that allocates it recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); @@ -1089,7 +1087,7 @@ public: return KernelToCreatedDynamicLDS; } - bool runOnModule(Module &M) override { + bool runOnModule(Module &M) { CallGraph CG = CallGraph(M); bool Changed = superAlignLDSGlobals(M); @@ -1241,6 +1239,7 @@ public: } if (Offset != 0) { + (void)TM; // TODO: Account for target maximum LDS std::string Buffer; raw_string_ostream SS{Buffer}; SS << format("%u", Offset); @@ -1367,9 +1366,9 @@ private: Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding); LocalVars.push_back(new GlobalVariable( - M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy), - "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, - false)); + M, ATy, false, GlobalValue::InternalLinkage, + PoisonValue::get(ATy), "", nullptr, GlobalValue::NotThreadLocal, + AMDGPUAS::LOCAL_ADDRESS, false)); IsPaddingField.push_back(true); CurrentOffset += Padding; } @@ -1391,7 +1390,7 @@ private: Align StructAlign = AMDGPU::getAlign(DL, LocalVars[0]); GlobalVariable *SGV = new GlobalVariable( - M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy), + M, LDSTy, false, GlobalValue::InternalLinkage, PoisonValue::get(LDSTy), VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false); SGV->setAlignment(StructAlign); @@ -1530,21 +1529,51 @@ private: } }; +class AMDGPULowerModuleLDSLegacy : public ModulePass { +public: + const AMDGPUTargetMachine *TM; + static char ID; + + AMDGPULowerModuleLDSLegacy(const AMDGPUTargetMachine *TM_ = nullptr) + : ModulePass(ID), TM(TM_) { + initializeAMDGPULowerModuleLDSLegacyPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + if (!TM) + AU.addRequired<TargetPassConfig>(); + } + + bool runOnModule(Module &M) override { + if (!TM) { + auto &TPC = getAnalysis<TargetPassConfig>(); + TM = &TPC.getTM<AMDGPUTargetMachine>(); + } + + return AMDGPULowerModuleLDS(*TM).runOnModule(M); + } +}; + } // namespace -char AMDGPULowerModuleLDS::ID = 0; +char AMDGPULowerModuleLDSLegacy::ID = 0; -char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID; +char &llvm::AMDGPULowerModuleLDSLegacyPassID = AMDGPULowerModuleLDSLegacy::ID; -INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, - "Lower uses of LDS variables from non-kernel functions", false, - false) +INITIALIZE_PASS_BEGIN(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE, + "Lower uses of LDS variables from non-kernel functions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE, + "Lower uses of LDS variables from non-kernel functions", + false, false) -ModulePass *llvm::createAMDGPULowerModuleLDSPass() { - return new AMDGPULowerModuleLDS(); +ModulePass * +llvm::createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM) { + return new AMDGPULowerModuleLDSLegacy(TM); } PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M, ModuleAnalysisManager &) { - return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none() - : PreservedAnalyses::all(); + return AMDGPULowerModuleLDS(TM).runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } |
