summaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp148
1 files changed, 94 insertions, 54 deletions
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 5c5cbaff0252..baf2c79cc2b6 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -50,7 +50,7 @@ private:
struct VarInfo {
llvm::GlobalVariable *Var;
const VarDecl *D;
- unsigned Flag;
+ DeviceVarFlags Flags;
};
llvm::SmallVector<VarInfo, 16> DeviceVars;
/// Keeps track of variable containing handle of GPU binary. Populated by
@@ -117,23 +117,38 @@ private:
void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
- std::string getDeviceSideName(const Decl *ND);
+ std::string getDeviceSideName(const NamedDecl *ND) override;
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
- unsigned Flags) override {
- DeviceVars.push_back({&Var, VD, Flags});
+ bool Extern, bool Constant) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Variable, Extern, Constant,
+ /*Normalized*/ false, /*Type*/ 0}});
+ }
+ void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
+ /*Normalized*/ false, Type}});
+ }
+ void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
+ bool Extern, int Type, bool Normalized) override {
+ DeviceVars.push_back({&Var,
+ VD,
+ {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
+ Normalized, Type}});
}
/// Creates module constructor function
llvm::Function *makeModuleCtorFunction() override;
/// Creates module destructor function
llvm::Function *makeModuleDtorFunction() override;
- /// Construct and return the stub name of a kernel.
- std::string getDeviceStubName(llvm::StringRef Name) const override;
};
}
@@ -204,40 +219,30 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
return llvm::FunctionType::get(VoidTy, Params, false);
}
-std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) {
- auto *ND = cast<const NamedDecl>(D);
+std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
+ GlobalDecl GD;
+ // D could be either a kernel or a variable.
+ if (auto *FD = dyn_cast<FunctionDecl>(ND))
+ GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
+ else
+ GD = GlobalDecl(ND);
std::string DeviceSideName;
if (DeviceMC->shouldMangleDeclName(ND)) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
- DeviceMC->mangleName(ND, Out);
- DeviceSideName = Out.str();
+ DeviceMC->mangleName(GD, Out);
+ DeviceSideName = std::string(Out.str());
} else
- DeviceSideName = ND->getIdentifier()->getName();
+ DeviceSideName = std::string(ND->getIdentifier()->getName());
return DeviceSideName;
}
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
- // Ensure either we have different ABIs between host and device compilations,
- // says host compilation following MSVC ABI but device compilation follows
- // Itanium C++ ABI or, if they follow the same ABI, kernel names after
- // mangling should be the same after name stubbing. The later checking is
- // very important as the device kernel name being mangled in host-compilation
- // is used to resolve the device binaries to be executed. Inconsistent naming
- // result in undefined behavior. Even though we cannot check that naming
- // directly between host- and device-compilations, the host- and
- // device-mangling in host compilation could help catching certain ones.
- assert((CGF.CGM.getContext().getAuxTargetInfo() &&
- (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() !=
- CGF.CGM.getContext().getTargetInfo().getCXXABI())) ||
- getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) ==
- CGF.CurFn->getName());
-
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
- CGF.getLangOpts().HIPUseNewLaunchAPI)
+ (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -418,7 +423,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// each emitted kernel.
llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
for (auto &&I : EmittedKernels) {
- llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D));
+ llvm::Constant *KernelName =
+ makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
@@ -434,30 +440,70 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
Builder.CreateCall(RegisterFunc, Args);
}
+ llvm::Type *VarSizeTy = IntTy;
+ // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
+ if (CGM.getLangOpts().HIP ||
+ ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
+ VarSizeTy = SizeTy;
+
// void __cudaRegisterVar(void **, char *, char *, const char *,
// int, int, int, int)
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
- CharPtrTy, IntTy, IntTy,
+ CharPtrTy, IntTy, VarSizeTy,
IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, RegisterVarParams, false),
+ llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));
+ // void __cudaRegisterSurface(void **, const struct surfaceReference *,
+ // const void **, const char *, int, int);
+ llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
+ false),
+ addUnderscoredPrefixToName("RegisterSurface"));
+ // void __cudaRegisterTexture(void **, const struct textureReference *,
+ // const void **, const char *, int, int, int)
+ llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ VoidTy,
+ {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
+ false),
+ addUnderscoredPrefixToName("RegisterTexture"));
for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;
- unsigned Flags = Info.Flag;
llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
- uint64_t VarSize =
- CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
- llvm::Value *Args[] = {
- &GpuBinaryHandlePtr,
- Builder.CreateBitCast(Var, VoidPtrTy),
- VarName,
- VarName,
- llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
- llvm::ConstantInt::get(IntTy, VarSize),
- llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
- llvm::ConstantInt::get(IntTy, 0)};
- Builder.CreateCall(RegisterVar, Args);
+ switch (Info.Flags.getKind()) {
+ case DeviceVarFlags::Variable: {
+ uint64_t VarSize =
+ CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
+ llvm::Value *Args[] = {
+ &GpuBinaryHandlePtr,
+ Builder.CreateBitCast(Var, VoidPtrTy),
+ VarName,
+ VarName,
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
+ llvm::ConstantInt::get(VarSizeTy, VarSize),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
+ llvm::ConstantInt::get(IntTy, 0)};
+ Builder.CreateCall(RegisterVar, Args);
+ break;
+ }
+ case DeviceVarFlags::Surface:
+ Builder.CreateCall(
+ RegisterSurf,
+ {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+ VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
+ break;
+ case DeviceVarFlags::Texture:
+ Builder.CreateCall(
+ RegisterTex,
+ {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
+ VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
+ llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
+ break;
+ }
}
Builder.CreateRetVoid();
@@ -551,8 +597,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
if (CudaGpuBinary) {
// If fatbin is available from early finalization, create a string
// literal containing the fat binary loaded from the given file.
- FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
- FatbinConstantName, 8);
+ FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()),
+ "", FatbinConstantName, 8);
} else {
// If fatbin is not available, create an external symbol
// __hip_fatbin in section .hip_fatbin. The external symbol is supposed
@@ -586,7 +632,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// For CUDA, create a string literal containing the fat binary loaded from
// the given file.
- FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
FatbinConstantName, 8);
FatMagic = CudaFatMagic;
}
@@ -691,8 +737,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
- llvm::Constant *ModuleIDConstant =
- makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
+ llvm::Constant *ModuleIDConstant = makeConstantString(
+ std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
// Create an alias for the FatbinWrapper that nvcc will look for.
llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
@@ -797,12 +843,6 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
return ModuleDtorFunc;
}
-std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const {
- if (!CGM.getLangOpts().HIP)
- return Name;
- return (Name + ".stub").str();
-}
-
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}