aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp77
1 files changed, 53 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index e3a645977f92..d2a02143e4e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -177,6 +177,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/BitVector.h"
@@ -184,8 +185,8 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
@@ -204,7 +205,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include <tuple>
#include <vector>
#include <cstdio>
@@ -252,7 +252,8 @@ template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
return {std::move(V)};
}
-class AMDGPULowerModuleLDS : public ModulePass {
+class AMDGPULowerModuleLDS {
+ const AMDGPUTargetMachine &TM;
static void
removeLocalVarsFromUsedLists(Module &M,
@@ -291,7 +292,8 @@ class AMDGPULowerModuleLDS : public ModulePass {
// equivalent target specific intrinsic which lasts until immediately after
// codegen would suffice for that, but one would still need to ensure that
// the variables are allocated in the anticpated order.
- IRBuilder<> Builder(Func->getEntryBlock().getFirstNonPHI());
+ BasicBlock *Entry = &Func->getEntryBlock();
+ IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
Function *Decl =
Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
@@ -326,11 +328,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
}
public:
- static char ID;
-
- AMDGPULowerModuleLDS() : ModulePass(ID) {
- initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
@@ -854,7 +852,7 @@ public:
appendToCompilerUsed(M, {static_cast<GlobalValue *>(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
cast<Constant>(ModuleScopeReplacement.SGV),
- Type::getInt8PtrTy(Ctx)))});
+ PointerType::getUnqual(Ctx)))});
// module.lds will be allocated at zero in any kernel that allocates it
recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
@@ -1089,7 +1087,7 @@ public:
return KernelToCreatedDynamicLDS;
}
- bool runOnModule(Module &M) override {
+ bool runOnModule(Module &M) {
CallGraph CG = CallGraph(M);
bool Changed = superAlignLDSGlobals(M);
@@ -1241,6 +1239,7 @@ public:
}
if (Offset != 0) {
+ (void)TM; // TODO: Account for target maximum LDS
std::string Buffer;
raw_string_ostream SS{Buffer};
SS << format("%u", Offset);
@@ -1367,9 +1366,9 @@ private:
Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
LocalVars.push_back(new GlobalVariable(
- M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
- "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
- false));
+ M, ATy, false, GlobalValue::InternalLinkage,
+ PoisonValue::get(ATy), "", nullptr, GlobalValue::NotThreadLocal,
+ AMDGPUAS::LOCAL_ADDRESS, false));
IsPaddingField.push_back(true);
CurrentOffset += Padding;
}
@@ -1391,7 +1390,7 @@ private:
Align StructAlign = AMDGPU::getAlign(DL, LocalVars[0]);
GlobalVariable *SGV = new GlobalVariable(
- M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
+ M, LDSTy, false, GlobalValue::InternalLinkage, PoisonValue::get(LDSTy),
VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
false);
SGV->setAlignment(StructAlign);
@@ -1530,21 +1529,51 @@ private:
}
};
+class AMDGPULowerModuleLDSLegacy : public ModulePass {
+public:
+ const AMDGPUTargetMachine *TM;
+ static char ID;
+
+ AMDGPULowerModuleLDSLegacy(const AMDGPUTargetMachine *TM_ = nullptr)
+ : ModulePass(ID), TM(TM_) {
+ initializeAMDGPULowerModuleLDSLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (!TM)
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ bool runOnModule(Module &M) override {
+ if (!TM) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ TM = &TPC.getTM<AMDGPUTargetMachine>();
+ }
+
+ return AMDGPULowerModuleLDS(*TM).runOnModule(M);
+ }
+};
+
} // namespace
-char AMDGPULowerModuleLDS::ID = 0;
+char AMDGPULowerModuleLDSLegacy::ID = 0;
-char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID;
+char &llvm::AMDGPULowerModuleLDSLegacyPassID = AMDGPULowerModuleLDSLegacy::ID;
-INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
- "Lower uses of LDS variables from non-kernel functions", false,
- false)
+INITIALIZE_PASS_BEGIN(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE,
+ "Lower uses of LDS variables from non-kernel functions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AMDGPULowerModuleLDSLegacy, DEBUG_TYPE,
+ "Lower uses of LDS variables from non-kernel functions",
+ false, false)
-ModulePass *llvm::createAMDGPULowerModuleLDSPass() {
- return new AMDGPULowerModuleLDS();
+ModulePass *
+llvm::createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM) {
+ return new AMDGPULowerModuleLDSLegacy(TM);
}
PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M,
ModuleAnalysisManager &) {
- return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
+ return AMDGPULowerModuleLDS(TM).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}