aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp398
1 files changed, 334 insertions, 64 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 4ae8ce7e5ccf..31afbc6b4262 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,6 +19,7 @@
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
#include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeAMDGCN.h"
#include "CGOpenMPRuntimeNVPTX.h"
#include "CodeGenFunction.h"
#include "CodeGenPGO.h"
@@ -75,11 +76,11 @@ static const char AnnotationSection[] = "llvm.metadata";
static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
switch (CGM.getTarget().getCXXABI().getKind()) {
+ case TargetCXXABI::AppleARM64:
case TargetCXXABI::Fuchsia:
case TargetCXXABI::GenericAArch64:
case TargetCXXABI::GenericARM:
case TargetCXXABI::iOS:
- case TargetCXXABI::iOS64:
case TargetCXXABI::WatchOS:
case TargetCXXABI::GenericMIPS:
case TargetCXXABI::GenericItanium:
@@ -122,6 +123,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity();
IntAlignInBytes =
C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity();
+ CharTy =
+ llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth());
IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
IntPtrTy = llvm::IntegerType::get(LLVMContext,
C.getTargetInfo().getMaxPointerWidth());
@@ -215,6 +218,11 @@ void CodeGenModule::createOpenMPRuntime() {
"OpenMP NVPTX is only prepared to deal with device code.");
OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
break;
+ case llvm::Triple::amdgcn:
+ assert(getLangOpts().OpenMPIsDevice &&
+ "OpenMP AMDGCN is only prepared to deal with device code.");
+ OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+ break;
default:
if (LangOpts.OpenMPSimd)
OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this));
@@ -366,7 +374,7 @@ void CodeGenModule::checkAliases() {
for (const GlobalDecl &GD : Aliases) {
StringRef MangledName = getMangledName(GD);
llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
- auto *Alias = dyn_cast<llvm::GlobalIndirectSymbol>(Entry);
+ auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry);
Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
Alias->eraseFromParent();
}
@@ -395,6 +403,48 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags,
}
}
+static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO,
+ llvm::Module &M) {
+ if (!LO.VisibilityFromDLLStorageClass)
+ return;
+
+ llvm::GlobalValue::VisibilityTypes DLLExportVisibility =
+ CodeGenModule::GetLLVMVisibility(LO.getDLLExportVisibility());
+ llvm::GlobalValue::VisibilityTypes NoDLLStorageClassVisibility =
+ CodeGenModule::GetLLVMVisibility(LO.getNoDLLStorageClassVisibility());
+ llvm::GlobalValue::VisibilityTypes ExternDeclDLLImportVisibility =
+ CodeGenModule::GetLLVMVisibility(LO.getExternDeclDLLImportVisibility());
+ llvm::GlobalValue::VisibilityTypes ExternDeclNoDLLStorageClassVisibility =
+ CodeGenModule::GetLLVMVisibility(
+ LO.getExternDeclNoDLLStorageClassVisibility());
+
+ for (llvm::GlobalValue &GV : M.global_values()) {
+ if (GV.hasAppendingLinkage() || GV.hasLocalLinkage())
+ continue;
+
+ // Reset DSO locality before setting the visibility. This removes
+ // any effects that visibility options and annotations may have
+ // had on the DSO locality. Setting the visibility will implicitly set
+ // appropriate globals to DSO Local; however, this will be pessimistic
+ // w.r.t. to the normal compiler IRGen.
+ GV.setDSOLocal(false);
+
+ if (GV.isDeclarationForLinker()) {
+ GV.setVisibility(GV.getDLLStorageClass() ==
+ llvm::GlobalValue::DLLImportStorageClass
+ ? ExternDeclDLLImportVisibility
+ : ExternDeclNoDLLStorageClassVisibility);
+ } else {
+ GV.setVisibility(GV.getDLLStorageClass() ==
+ llvm::GlobalValue::DLLExportStorageClass
+ ? DLLExportVisibility
+ : NoDLLStorageClassVisibility);
+ }
+
+ GV.setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+ }
+}
+
void CodeGenModule::Release() {
EmitDeferred();
EmitVTablesOpportunistically();
@@ -486,9 +536,6 @@ void CodeGenModule::Release() {
if (Context.getLangOpts().SemanticInterposition)
// Require various optimization to respect semantic interposition.
getModule().setSemanticInterposition(1);
- else if (Context.getLangOpts().ExplicitNoSemanticInterposition)
- // Allow dso_local on applicable targets.
- getModule().setSemanticInterposition(0);
if (CodeGenOpts.EmitCodeView) {
// Indicate that we want CodeView in the metadata.
@@ -584,6 +631,30 @@ void CodeGenModule::Release() {
1);
}
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 ||
+ Arch == llvm::Triple::aarch64_be) {
+ getModule().addModuleFlag(llvm::Module::Error,
+ "branch-target-enforcement",
+ LangOpts.BranchTargetEnforcement);
+
+ getModule().addModuleFlag(llvm::Module::Error, "sign-return-address",
+ LangOpts.hasSignReturnAddress());
+
+ getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all",
+ LangOpts.isSignReturnAddressScopeAll());
+
+ getModule().addModuleFlag(llvm::Module::Error,
+ "sign-return-address-with-bkey",
+ !LangOpts.isSignReturnAddressWithAKey());
+ }
+
+ if (!CodeGenOpts.MemoryProfileOutput.empty()) {
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ getModule().addModuleFlag(
+ llvm::Module::Error, "MemProfProfileFilename",
+ llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput));
+ }
+
if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) {
// Indicate whether __nvvm_reflect should be configured to flush denormal
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
@@ -658,6 +729,12 @@ void CodeGenModule::Release() {
getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
EmitBackendOptionsMetadata(getCodeGenOpts());
+
+ // Set visibility from DLL storage class
+ // We do this at the end of LLVM IR generation; after any operation
+ // that might affect the DLL storage class or the visibility, and
+ // before anything that might act on these.
+ setVisibilityFromDLLStorageClass(LangOpts, getModule());
}
void CodeGenModule::EmitOpenCLMetadata() {
@@ -868,17 +945,32 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
return true;
- // Only handle COFF and ELF for now.
- if (!TT.isOSBinFormatELF())
- return false;
-
- // If this is not an executable, don't assume anything is local.
const auto &CGOpts = CGM.getCodeGenOpts();
llvm::Reloc::Model RM = CGOpts.RelocationModel;
const auto &LOpts = CGM.getLangOpts();
- if (RM != llvm::Reloc::Static && !LOpts.PIE)
+
+ if (TT.isOSBinFormatMachO()) {
+ if (RM == llvm::Reloc::Static)
+ return true;
+ return GV->isStrongDefinitionForLinker();
+ }
+
+ // Only handle COFF and ELF for now.
+ if (!TT.isOSBinFormatELF())
return false;
+ if (RM != llvm::Reloc::Static && !LOpts.PIE) {
+ // On ELF, if -fno-semantic-interposition is specified and the target
+ // supports local aliases, there will be neither CC1
+ // -fsemantic-interposition nor -fhalf-no-semantic-interposition. Set
+ // dso_local if using a local alias is preferable (can avoid GOT
+ // indirection).
+ if (!GV->canBenefitFromLocalAlias())
+ return false;
+ return !(CGM.getLangOpts().SemanticInterposition ||
+ CGM.getLangOpts().HalfNoSemanticInterposition);
+ }
+
// A definition cannot be preempted from an executable.
if (!GV->isDeclarationForLinker())
return true;
@@ -889,23 +981,31 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage())
return false;
- // PPC has no copy relocations and cannot use a plt entry as a symbol address.
- llvm::Triple::ArchType Arch = TT.getArch();
- if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 ||
- Arch == llvm::Triple::ppc64le)
+ // PowerPC64 prefers TOC indirection to avoid copy relocations.
+ if (TT.isPPC64())
return false;
- // If we can use copy relocations we can assume it is local.
- if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV))
- if (!Var->isThreadLocal() &&
- (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations))
+ if (CGOpts.DirectAccessExternalData) {
+ // If -fdirect-access-external-data (default for -fno-pic), set dso_local
+ // for non-thread-local variables. If the symbol is not defined in the
+ // executable, a copy relocation will be needed at link time. dso_local is
+ // excluded for thread-local variables because they generally don't support
+ // copy relocations.
+ if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV))
+ if (!Var->isThreadLocal())
+ return true;
+
+ // -fno-pic sets dso_local on a function declaration to allow direct
+ // accesses when taking its address (similar to a data symbol). If the
+ // function is not defined in the executable, a canonical PLT entry will be
+ // needed at link time. -fno-direct-access-external-data can avoid the
+ // canonical PLT entry. We don't generalize this condition to -fpie/-fpic as
+ // it could just cause trouble without providing perceptible benefits.
+ if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static)
return true;
+ }
- // If we can use a plt entry as the symbol address we can assume it
- // is local.
- // FIXME: This should work for PIE, but the gold linker doesn't support it.
- if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static)
- return true;
+ // If we can use copy relocations we can assume it is local.
// Otherwise don't assume it is local.
return false;
@@ -1207,8 +1307,10 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
/// AddGlobalDtor - Add a function to the list that will be called
/// when the module is unloaded.
-void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
- if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) {
+void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority,
+ bool IsDtorAttrFunc) {
+ if (CodeGenOpts.RegisterGlobalDtorsWithAtExit &&
+ (!getContext().getTargetInfo().getTriple().isOSAIX() || IsDtorAttrFunc)) {
DtorsUsingAtExit[Priority].push_back(Dtor);
return;
}
@@ -1321,10 +1423,18 @@ static void removeImageAccessQualifier(std::string& TyName) {
// (basically all single AS CPUs).
static unsigned ArgInfoAddressSpace(LangAS AS) {
switch (AS) {
- case LangAS::opencl_global: return 1;
- case LangAS::opencl_constant: return 2;
- case LangAS::opencl_local: return 3;
- case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs.
+ case LangAS::opencl_global:
+ return 1;
+ case LangAS::opencl_constant:
+ return 2;
+ case LangAS::opencl_local:
+ return 3;
+ case LangAS::opencl_generic:
+ return 4; // Not in SPIR 2.0 specs.
+ case LangAS::opencl_global_device:
+ return 5;
+ case LangAS::opencl_global_host:
+ return 6;
default:
return 0; // Assume private.
}
@@ -1658,7 +1768,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::OptimizeForSize);
B.addAttribute(llvm::Attribute::Cold);
}
-
+ if (D->hasAttr<HotAttr>())
+ B.addAttribute(llvm::Attribute::Hot);
if (D->hasAttr<MinSizeAttr>())
B.addAttribute(llvm::Attribute::MinSize);
}
@@ -1708,6 +1819,15 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
}
}
+void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D,
+ llvm::Function *F) {
+ if (D->hasAttr<StrictFPAttr>()) {
+ llvm::AttrBuilder FuncAttrs;
+ FuncAttrs.addAttribute("strictfp");
+ F->addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
+ }
+}
+
void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
const Decl *D = GD.getDecl();
if (dyn_cast_or_null<NamedDecl>(D))
@@ -1732,6 +1852,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// we have a decl for the function and it has a target attribute then
// parse that and add it to the feature set.
StringRef TargetCPU = getTarget().getTargetOpts().CPU;
+ StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU;
std::vector<std::string> Features;
const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
@@ -1752,9 +1873,14 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// the function.
if (TD) {
ParsedTargetAttr ParsedAttr = TD->parse();
- if (ParsedAttr.Architecture != "" &&
- getTarget().isValidCPUName(ParsedAttr.Architecture))
+ if (!ParsedAttr.Architecture.empty() &&
+ getTarget().isValidCPUName(ParsedAttr.Architecture)) {
TargetCPU = ParsedAttr.Architecture;
+ TuneCPU = ""; // Clear the tune CPU.
+ }
+ if (!ParsedAttr.Tune.empty() &&
+ getTarget().isValidCPUName(ParsedAttr.Tune))
+ TuneCPU = ParsedAttr.Tune;
}
} else {
// Otherwise just add the existing target cpu and target features to the
@@ -1762,10 +1888,14 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
Features = getTarget().getTargetOpts().Features;
}
- if (TargetCPU != "") {
+ if (!TargetCPU.empty()) {
Attrs.addAttribute("target-cpu", TargetCPU);
AddedAttr = true;
}
+ if (!TuneCPU.empty()) {
+ Attrs.addAttribute("tune-cpu", TuneCPU);
+ AddedAttr = true;
+ }
if (!Features.empty()) {
llvm::sort(Features);
Attrs.addAttribute("target-features", llvm::join(Features, ","));
@@ -1802,8 +1932,11 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
// We know that GetCPUAndFeaturesAttributes will always have the
// newest set, since it has the newest possible FunctionDecl, so the
// new ones should replace the old.
- F->removeFnAttr("target-cpu");
- F->removeFnAttr("target-features");
+ llvm::AttrBuilder RemoveAttrs;
+ RemoveAttrs.addAttribute("target-cpu");
+ RemoveAttrs.addAttribute("target-features");
+ RemoveAttrs.addAttribute("tune-cpu");
+ F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs);
F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs);
}
}
@@ -1959,7 +2092,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
}
void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
- assert(!GV->isDeclaration() &&
+ assert((isa<llvm::Function>(GV) || !GV->isDeclaration()) &&
"Only globals with definition can force usage.");
LLVMUsed.emplace_back(GV);
}
@@ -2165,6 +2298,11 @@ void CodeGenModule::EmitDeferred() {
assert(DeferredVTables.empty());
}
+ // Emit CUDA/HIP static device variables referenced by host code only.
+ if (getLangOpts().CUDA)
+ for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost)
+ DeferredDeclsToEmit.push_back(V);
+
// Stop if we're out of both deferred vtables and deferred declarations.
if (DeferredDeclsToEmit.empty())
return;
@@ -2280,13 +2418,47 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) {
return llvm::ConstantInt::get(Int32Ty, LineNo);
}
+llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) {
+ ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()};
+ if (Exprs.empty())
+ return llvm::ConstantPointerNull::get(Int8PtrTy);
+
+ llvm::FoldingSetNodeID ID;
+ for (Expr *E : Exprs) {
+ ID.Add(cast<clang::ConstantExpr>(E)->getAPValueResult());
+ }
+ llvm::Constant *&Lookup = AnnotationArgs[ID.ComputeHash()];
+ if (Lookup)
+ return Lookup;
+
+ llvm::SmallVector<llvm::Constant *, 4> LLVMArgs;
+ LLVMArgs.reserve(Exprs.size());
+ ConstantEmitter ConstEmiter(*this);
+ llvm::transform(Exprs, std::back_inserter(LLVMArgs), [&](const Expr *E) {
+ const auto *CE = cast<clang::ConstantExpr>(E);
+ return ConstEmiter.emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(),
+ CE->getType());
+ });
+ auto *Struct = llvm::ConstantStruct::getAnon(LLVMArgs);
+ auto *GV = new llvm::GlobalVariable(getModule(), Struct->getType(), true,
+ llvm::GlobalValue::PrivateLinkage, Struct,
+ ".args");
+ GV->setSection(AnnotationSection);
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, Int8PtrTy);
+
+ Lookup = Bitcasted;
+ return Bitcasted;
+}
+
llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
const AnnotateAttr *AA,
SourceLocation L) {
// Get the globals for file name, annotation, and the line number.
llvm::Constant *AnnoGV = EmitAnnotationString(AA->getAnnotation()),
*UnitGV = EmitAnnotationUnit(L),
- *LineNoCst = EmitAnnotationLineNo(L);
+ *LineNoCst = EmitAnnotationLineNo(L),
+ *Args = EmitAnnotationArgs(AA);
llvm::Constant *ASZeroGV = GV;
if (GV->getAddressSpace() != 0) {
@@ -2295,11 +2467,12 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
}
// Create the ConstantStruct for the global annotation.
- llvm::Constant *Fields[4] = {
- llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy),
- LineNoCst
+ llvm::Constant *Fields[] = {
+ llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy),
+ llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy),
+ llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy),
+ LineNoCst,
+ Args,
};
return llvm::ConstantStruct::getAnon(Fields);
}
@@ -2390,6 +2563,34 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
return true;
}
+bool CodeGenModule::isProfileInstrExcluded(llvm::Function *Fn,
+ SourceLocation Loc) const {
+ const auto &ProfileList = getContext().getProfileList();
+ // If the profile list is empty, then instrument everything.
+ if (ProfileList.isEmpty())
+ return false;
+ CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr();
+ // First, check the function name.
+ Optional<bool> V = ProfileList.isFunctionExcluded(Fn->getName(), Kind);
+ if (V.hasValue())
+ return *V;
+ // Next, check the source location.
+ if (Loc.isValid()) {
+ Optional<bool> V = ProfileList.isLocationExcluded(Loc, Kind);
+ if (V.hasValue())
+ return *V;
+ }
+ // If location is unknown, this may be a compiler-generated function. Assume
+ // it's located in the main file.
+ auto &SM = Context.getSourceManager();
+ if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
+ Optional<bool> V = ProfileList.isFileExcluded(MainFile->getName(), Kind);
+ if (V.hasValue())
+ return *V;
+ }
+ return ProfileList.getDefault();
+}
+
bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
// Never defer when EmitAllDecls is specified.
if (LangOpts.EmitAllDecls)
@@ -2490,6 +2691,33 @@ ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) {
return ConstantAddress(Addr, Alignment);
}
+ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
+ const TemplateParamObjectDecl *TPO) {
+ StringRef Name = getMangledName(TPO);
+ CharUnits Alignment = getNaturalTypeAlignment(TPO->getType());
+
+ if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name))
+ return ConstantAddress(GV, Alignment);
+
+ ConstantEmitter Emitter(*this);
+ llvm::Constant *Init = Emitter.emitForInitializer(
+ TPO->getValue(), TPO->getType().getAddressSpace(), TPO->getType());
+
+ if (!Init) {
+ ErrorUnsupported(TPO, "template parameter object");
+ return ConstantAddress::invalid();
+ }
+
+ auto *GV = new llvm::GlobalVariable(
+ getModule(), Init->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
+ if (supportsCOMDAT())
+ GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
+ Emitter.finalize(GV);
+
+ return ConstantAddress(GV, Alignment);
+}
+
ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
const AliasAttr *AA = VD->getAttr<AliasAttr>();
assert(AA && "No alias?");
@@ -3789,6 +4017,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
if (LangOpts.OpenCL) {
AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
assert(AddrSpace == LangAS::opencl_global ||
+ AddrSpace == LangAS::opencl_global_device ||
+ AddrSpace == LangAS::opencl_global_host ||
AddrSpace == LangAS::opencl_constant ||
AddrSpace == LangAS::opencl_local ||
AddrSpace >= LangAS::FirstTargetAddressSpace);
@@ -3952,13 +4182,14 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// Shadows of initialized device-side global variables are also left
// undefined.
bool IsCUDAShadowVar =
- !getLangOpts().CUDAIsDevice &&
+ !getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
(D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
D->hasAttr<CUDASharedAttr>());
bool IsCUDADeviceShadowVar =
getLangOpts().CUDAIsDevice &&
(D->getType()->isCUDADeviceBuiltinSurfaceType() ||
- D->getType()->isCUDADeviceBuiltinTextureType());
+ D->getType()->isCUDADeviceBuiltinTextureType() ||
+ D->hasAttr<HIPManagedAttr>());
// HIP pinned shadow of initialized host-side global variables are also
// left undefined.
if (getLangOpts().CUDA &&
@@ -4075,7 +4306,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// Shadow variables and their properties must be registered with CUDA
// runtime. Skip Extern global variables, which will be registered in
// the TU where they are defined.
- if (!D->hasExternalStorage())
+ //
+ // Don't register a C++17 inline variable. The local symbol can be
+ // discarded and referencing a discarded local symbol from outside the
+ // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+ // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+ if (!D->hasExternalStorage() && !D->isInline())
getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
D->hasAttr<CUDAConstantAttr>());
} else if (D->hasAttr<CUDASharedAttr>()) {
@@ -4325,13 +4561,16 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
// and must all be equivalent. However, we are not allowed to
// throw away these explicit instantiations.
//
- // We don't currently support CUDA device code spread out across multiple TUs,
+ // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU,
// so say that CUDA templates are either external (for kernels) or internal.
- // This lets llvm perform aggressive inter-procedural optimizations.
+ // This lets llvm perform aggressive inter-procedural optimizations. For
+ // -fgpu-rdc case, device function calls across multiple TU's are allowed,
+ // therefore we need to follow the normal linkage paradigm.
if (Linkage == GVA_StrongODR) {
- if (Context.getLangOpts().AppleKext)
+ if (getLangOpts().AppleKext)
return llvm::Function::ExternalLinkage;
- if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+ !getLangOpts().GPURelocatableDeviceCode)
return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
: llvm::Function::InternalLinkage;
return llvm::Function::WeakODRLinkage;
@@ -4522,9 +4761,11 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
MaybeHandleStaticInExternC(D, Fn);
-
maybeSetTrivialComdat(*D, *Fn);
+ // Set CodeGen attributes that represent floating point environment.
+ setLLVMFunctionFEnvAttributes(D, Fn);
+
CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
setNonAliasAttributes(GD, Fn);
@@ -4533,7 +4774,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
AddGlobalCtor(Fn, CA->getPriority());
if (const DestructorAttr *DA = D->getAttr<DestructorAttr>())
- AddGlobalDtor(Fn, DA->getPriority());
+ AddGlobalDtor(Fn, DA->getPriority(), true);
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
}
@@ -4572,8 +4813,10 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
llvm::PointerType::getUnqual(DeclTy),
/*D=*/nullptr);
- LT = getLLVMLinkageVarDefinition(cast<VarDecl>(GD.getDecl()),
- D->getType().isConstQualified());
+ if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl()))
+ LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified());
+ else
+ LT = getFunctionLinkage(GD);
}
// Create the new alias itself, but don't set a name yet.
@@ -4896,6 +5139,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
switch (Triple.getObjectFormat()) {
case llvm::Triple::UnknownObjectFormat:
llvm_unreachable("unknown file format");
+ case llvm::Triple::GOFF:
+ llvm_unreachable("GOFF is not yet implemented");
case llvm::Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
case llvm::Triple::COFF:
@@ -5373,16 +5618,21 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
Spec->hasDefinition())
DI->completeTemplateDefinition(*Spec);
} LLVM_FALLTHROUGH;
- case Decl::CXXRecord:
- if (CGDebugInfo *DI = getModuleDebugInfo())
+ case Decl::CXXRecord: {
+ CXXRecordDecl *CRD = cast<CXXRecordDecl>(D);
+ if (CGDebugInfo *DI = getModuleDebugInfo()) {
+ if (CRD->hasDefinition())
+ DI->EmitAndRetainType(getContext().getRecordType(cast<RecordDecl>(D)));
if (auto *ES = D->getASTContext().getExternalSource())
if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
- DI->completeUnusedClass(cast<CXXRecordDecl>(*D));
+ DI->completeUnusedClass(*CRD);
+ }
// Emit any static data members, they may be definitions.
- for (auto *I : cast<CXXRecordDecl>(D)->decls())
+ for (auto *I : CRD->decls())
if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
EmitTopLevelDecl(I);
break;
+ }
// No code generation needed.
case Decl::UsingShadow:
case Decl::ClassTemplate:
@@ -5568,6 +5818,25 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D));
break;
+ case Decl::Typedef:
+ case Decl::TypeAlias: // using foo = bar; [C++11]
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ DI->EmitAndRetainType(
+ getContext().getTypedefType(cast<TypedefNameDecl>(D)));
+ break;
+
+ case Decl::Record:
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ if (cast<RecordDecl>(D)->getDefinition())
+ DI->EmitAndRetainType(getContext().getRecordType(cast<RecordDecl>(D)));
+ break;
+
+ case Decl::Enum:
+ if (CGDebugInfo *DI = getModuleDebugInfo())
+ if (cast<EnumDecl>(D)->getDefinition())
+ DI->EmitAndRetainType(getContext().getEnumType(cast<EnumDecl>(D)));
+ break;
+
default:
// Make sure we handled everything we should, every other kind is a
// non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind
@@ -6006,16 +6275,17 @@ CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T,
*BaseInfo = LValueBaseInfo(AlignmentSource::Type);
CharUnits Alignment;
- // For C++ class pointees, we don't know whether we're pointing at a
- // base or a complete object, so we generally need to use the
- // non-virtual alignment.
const CXXRecordDecl *RD;
- if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) {
+ if (T.getQualifiers().hasUnaligned()) {
+ Alignment = CharUnits::One();
+ } else if (forPointeeType && !AlignForArray &&
+ (RD = T->getAsCXXRecordDecl())) {
+ // For C++ class pointees, we don't know whether we're pointing at a
+ // base or a complete object, so we generally need to use the
+ // non-virtual alignment.
Alignment = getClassPointerAlignment(RD);
} else {
Alignment = getContext().getTypeAlignInChars(T);
- if (T.getQualifiers().hasUnaligned())
- Alignment = CharUnits::One();
}
// Cap to the global maximum type alignment unless the alignment