diff options
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 398 |
1 files changed, 334 insertions, 64 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4ae8ce7e5ccf..31afbc6b4262 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,6 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeAMDGCN.h" #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" @@ -75,11 +76,11 @@ static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getTarget().getCXXABI().getKind()) { + case TargetCXXABI::AppleARM64: case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: - case TargetCXXABI::iOS64: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: @@ -122,6 +123,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); + CharTy = + llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth()); IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); @@ -215,6 +218,11 @@ void CodeGenModule::createOpenMPRuntime() { "OpenMP NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); break; + case llvm::Triple::amdgcn: + assert(getLangOpts().OpenMPIsDevice && + "OpenMP AMDGCN is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); @@ -366,7 +374,7 @@ void CodeGenModule::checkAliases() { for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); - auto *Alias = dyn_cast<llvm::GlobalIndirectSymbol>(Entry); + auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } @@ -395,6 +403,48 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, } } +static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, + llvm::Module &M) { + if (!LO.VisibilityFromDLLStorageClass) + return; + + llvm::GlobalValue::VisibilityTypes DLLExportVisibility = + CodeGenModule::GetLLVMVisibility(LO.getDLLExportVisibility()); + llvm::GlobalValue::VisibilityTypes NoDLLStorageClassVisibility = + CodeGenModule::GetLLVMVisibility(LO.getNoDLLStorageClassVisibility()); + llvm::GlobalValue::VisibilityTypes ExternDeclDLLImportVisibility = + CodeGenModule::GetLLVMVisibility(LO.getExternDeclDLLImportVisibility()); + llvm::GlobalValue::VisibilityTypes ExternDeclNoDLLStorageClassVisibility = + CodeGenModule::GetLLVMVisibility( + LO.getExternDeclNoDLLStorageClassVisibility()); + + for (llvm::GlobalValue &GV : M.global_values()) { + if (GV.hasAppendingLinkage() || GV.hasLocalLinkage()) + continue; + + // Reset DSO locality before setting the visibility. This removes + // any effects that visibility options and annotations may have + // had on the DSO locality. Setting the visibility will implicitly set + // appropriate globals to DSO Local; however, this will be pessimistic + // w.r.t. to the normal compiler IRGen. + GV.setDSOLocal(false); + + if (GV.isDeclarationForLinker()) { + GV.setVisibility(GV.getDLLStorageClass() == + llvm::GlobalValue::DLLImportStorageClass + ? ExternDeclDLLImportVisibility + : ExternDeclNoDLLStorageClassVisibility); + } else { + GV.setVisibility(GV.getDLLStorageClass() == + llvm::GlobalValue::DLLExportStorageClass + ? DLLExportVisibility + : NoDLLStorageClassVisibility); + } + + GV.setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); + } +} + void CodeGenModule::Release() { EmitDeferred(); EmitVTablesOpportunistically(); @@ -486,9 +536,6 @@ void CodeGenModule::Release() { if (Context.getLangOpts().SemanticInterposition) // Require various optimization to respect semantic interposition. getModule().setSemanticInterposition(1); - else if (Context.getLangOpts().ExplicitNoSemanticInterposition) - // Allow dso_local on applicable targets. - getModule().setSemanticInterposition(0); if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. @@ -584,6 +631,30 @@ void CodeGenModule::Release() { 1); } + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 || + Arch == llvm::Triple::aarch64_be) { + getModule().addModuleFlag(llvm::Module::Error, + "branch-target-enforcement", + LangOpts.BranchTargetEnforcement); + + getModule().addModuleFlag(llvm::Module::Error, "sign-return-address", + LangOpts.hasSignReturnAddress()); + + getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all", + LangOpts.isSignReturnAddressScopeAll()); + + getModule().addModuleFlag(llvm::Module::Error, + "sign-return-address-with-bkey", + !LangOpts.isSignReturnAddressWithAKey()); + } + + if (!CodeGenOpts.MemoryProfileOutput.empty()) { + llvm::LLVMContext &Ctx = TheModule.getContext(); + getModule().addModuleFlag( + llvm::Module::Error, "MemProfProfileFilename", + llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput)); + } + if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" @@ -658,6 +729,12 @@ void CodeGenModule::Release() { getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); + + // Set visibility from DLL storage class + // We do this at the end of LLVM IR generation; after any operation + // that might affect the DLL storage class or the visibility, and + // before anything that might act on these. + setVisibilityFromDLLStorageClass(LangOpts, getModule()); } void CodeGenModule::EmitOpenCLMetadata() { @@ -868,17 +945,32 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) return true; - // Only handle COFF and ELF for now. - if (!TT.isOSBinFormatELF()) - return false; - - // If this is not an executable, don't assume anything is local. const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); - if (RM != llvm::Reloc::Static && !LOpts.PIE) + + if (TT.isOSBinFormatMachO()) { + if (RM == llvm::Reloc::Static) + return true; + return GV->isStrongDefinitionForLinker(); + } + + // Only handle COFF and ELF for now. + if (!TT.isOSBinFormatELF()) return false; + if (RM != llvm::Reloc::Static && !LOpts.PIE) { + // On ELF, if -fno-semantic-interposition is specified and the target + // supports local aliases, there will be neither CC1 + // -fsemantic-interposition nor -fhalf-no-semantic-interposition. Set + // dso_local if using a local alias is preferable (can avoid GOT + // indirection). + if (!GV->canBenefitFromLocalAlias()) + return false; + return !(CGM.getLangOpts().SemanticInterposition || + CGM.getLangOpts().HalfNoSemanticInterposition); + } + // A definition cannot be preempted from an executable. if (!GV->isDeclarationForLinker()) return true; @@ -889,23 +981,31 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage()) return false; - // PPC has no copy relocations and cannot use a plt entry as a symbol address. - llvm::Triple::ArchType Arch = TT.getArch(); - if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || - Arch == llvm::Triple::ppc64le) + // PowerPC64 prefers TOC indirection to avoid copy relocations. + if (TT.isPPC64()) return false; - // If we can use copy relocations we can assume it is local. - if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV)) - if (!Var->isThreadLocal() && - (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations)) + if (CGOpts.DirectAccessExternalData) { + // If -fdirect-access-external-data (default for -fno-pic), set dso_local + // for non-thread-local variables. If the symbol is not defined in the + // executable, a copy relocation will be needed at link time. dso_local is + // excluded for thread-local variables because they generally don't support + // copy relocations. + if (auto *Var = dyn_cast<llvm::GlobalVariable>(GV)) + if (!Var->isThreadLocal()) + return true; + + // -fno-pic sets dso_local on a function declaration to allow direct + // accesses when taking its address (similar to a data symbol). If the + // function is not defined in the executable, a canonical PLT entry will be + // needed at link time. -fno-direct-access-external-data can avoid the + // canonical PLT entry. We don't generalize this condition to -fpie/-fpic as + // it could just cause trouble without providing perceptible benefits. + if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) return true; + } - // If we can use a plt entry as the symbol address we can assume it - // is local. - // FIXME: This should work for PIE, but the gold linker doesn't support it. - if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) - return true; + // If we can use copy relocations we can assume it is local. // Otherwise don't assume it is local. return false; @@ -1207,8 +1307,10 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, /// AddGlobalDtor - Add a function to the list that will be called /// when the module is unloaded. -void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) { - if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) { +void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority, + bool IsDtorAttrFunc) { + if (CodeGenOpts.RegisterGlobalDtorsWithAtExit && + (!getContext().getTargetInfo().getTriple().isOSAIX() || IsDtorAttrFunc)) { DtorsUsingAtExit[Priority].push_back(Dtor); return; } @@ -1321,10 +1423,18 @@ static void removeImageAccessQualifier(std::string& TyName) { // (basically all single AS CPUs). static unsigned ArgInfoAddressSpace(LangAS AS) { switch (AS) { - case LangAS::opencl_global: return 1; - case LangAS::opencl_constant: return 2; - case LangAS::opencl_local: return 3; - case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. + case LangAS::opencl_global: + return 1; + case LangAS::opencl_constant: + return 2; + case LangAS::opencl_local: + return 3; + case LangAS::opencl_generic: + return 4; // Not in SPIR 2.0 specs. + case LangAS::opencl_global_device: + return 5; + case LangAS::opencl_global_host: + return 6; default: return 0; // Assume private. } @@ -1658,7 +1768,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } - + if (D->hasAttr<HotAttr>()) + B.addAttribute(llvm::Attribute::Hot); if (D->hasAttr<MinSizeAttr>()) B.addAttribute(llvm::Attribute::MinSize); } @@ -1708,6 +1819,15 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, } } +void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D, + llvm::Function *F) { + if (D->hasAttr<StrictFPAttr>()) { + llvm::AttrBuilder FuncAttrs; + FuncAttrs.addAttribute("strictfp"); + F->addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + } +} + void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); if (dyn_cast_or_null<NamedDecl>(D)) @@ -1732,6 +1852,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; + StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU; std::vector<std::string> Features; const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; @@ -1752,9 +1873,14 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, // the function. if (TD) { ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.Architecture != "" && - getTarget().isValidCPUName(ParsedAttr.Architecture)) + if (!ParsedAttr.Architecture.empty() && + getTarget().isValidCPUName(ParsedAttr.Architecture)) { TargetCPU = ParsedAttr.Architecture; + TuneCPU = ""; // Clear the tune CPU. + } + if (!ParsedAttr.Tune.empty() && + getTarget().isValidCPUName(ParsedAttr.Tune)) + TuneCPU = ParsedAttr.Tune; } } else { // Otherwise just add the existing target cpu and target features to the @@ -1762,10 +1888,14 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, Features = getTarget().getTargetOpts().Features; } - if (TargetCPU != "") { + if (!TargetCPU.empty()) { Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } + if (!TuneCPU.empty()) { + Attrs.addAttribute("tune-cpu", TuneCPU); + AddedAttr = true; + } if (!Features.empty()) { llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); @@ -1802,8 +1932,11 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. - F->removeFnAttr("target-cpu"); - F->removeFnAttr("target-features"); + llvm::AttrBuilder RemoveAttrs; + RemoveAttrs.addAttribute("target-cpu"); + RemoveAttrs.addAttribute("target-features"); + RemoveAttrs.addAttribute("tune-cpu"); + F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs); F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); } } @@ -1959,7 +2092,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { - assert(!GV->isDeclaration() && + assert((isa<llvm::Function>(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } @@ -2165,6 +2298,11 @@ void CodeGenModule::EmitDeferred() { assert(DeferredVTables.empty()); } + // Emit CUDA/HIP static device variables referenced by host code only. + if (getLangOpts().CUDA) + for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) + DeferredDeclsToEmit.push_back(V); + // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; @@ -2280,13 +2418,47 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { return llvm::ConstantInt::get(Int32Ty, LineNo); } +llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { + ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()}; + if (Exprs.empty()) + return llvm::ConstantPointerNull::get(Int8PtrTy); + + llvm::FoldingSetNodeID ID; + for (Expr *E : Exprs) { + ID.Add(cast<clang::ConstantExpr>(E)->getAPValueResult()); + } + llvm::Constant *&Lookup = AnnotationArgs[ID.ComputeHash()]; + if (Lookup) + return Lookup; + + llvm::SmallVector<llvm::Constant *, 4> LLVMArgs; + LLVMArgs.reserve(Exprs.size()); + ConstantEmitter ConstEmiter(*this); + llvm::transform(Exprs, std::back_inserter(LLVMArgs), [&](const Expr *E) { + const auto *CE = cast<clang::ConstantExpr>(E); + return ConstEmiter.emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), + CE->getType()); + }); + auto *Struct = llvm::ConstantStruct::getAnon(LLVMArgs); + auto *GV = new llvm::GlobalVariable(getModule(), Struct->getType(), true, + llvm::GlobalValue::PrivateLinkage, Struct, + ".args"); + GV->setSection(AnnotationSection); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, Int8PtrTy); + + Lookup = Bitcasted; + return Bitcasted; +} + llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, const AnnotateAttr *AA, SourceLocation L) { // Get the globals for file name, annotation, and the line number. llvm::Constant *AnnoGV = EmitAnnotationString(AA->getAnnotation()), *UnitGV = EmitAnnotationUnit(L), - *LineNoCst = EmitAnnotationLineNo(L); + *LineNoCst = EmitAnnotationLineNo(L), + *Args = EmitAnnotationArgs(AA); llvm::Constant *ASZeroGV = GV; if (GV->getAddressSpace() != 0) { @@ -2295,11 +2467,12 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, } // Create the ConstantStruct for the global annotation. - llvm::Constant *Fields[4] = { - llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), - LineNoCst + llvm::Constant *Fields[] = { + llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy), + llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), + llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), + LineNoCst, + Args, }; return llvm::ConstantStruct::getAnon(Fields); } @@ -2390,6 +2563,34 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, return true; } +bool CodeGenModule::isProfileInstrExcluded(llvm::Function *Fn, + SourceLocation Loc) const { + const auto &ProfileList = getContext().getProfileList(); + // If the profile list is empty, then instrument everything. + if (ProfileList.isEmpty()) + return false; + CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr(); + // First, check the function name. + Optional<bool> V = ProfileList.isFunctionExcluded(Fn->getName(), Kind); + if (V.hasValue()) + return *V; + // Next, check the source location. + if (Loc.isValid()) { + Optional<bool> V = ProfileList.isLocationExcluded(Loc, Kind); + if (V.hasValue()) + return *V; + } + // If location is unknown, this may be a compiler-generated function. Assume + // it's located in the main file. + auto &SM = Context.getSourceManager(); + if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { + Optional<bool> V = ProfileList.isFileExcluded(MainFile->getName(), Kind); + if (V.hasValue()) + return *V; + } + return ProfileList.getDefault(); +} + bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) @@ -2490,6 +2691,33 @@ ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { return ConstantAddress(Addr, Alignment); } +ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( + const TemplateParamObjectDecl *TPO) { + StringRef Name = getMangledName(TPO); + CharUnits Alignment = getNaturalTypeAlignment(TPO->getType()); + + if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) + return ConstantAddress(GV, Alignment); + + ConstantEmitter Emitter(*this); + llvm::Constant *Init = Emitter.emitForInitializer( + TPO->getValue(), TPO->getType().getAddressSpace(), TPO->getType()); + + if (!Init) { + ErrorUnsupported(TPO, "template parameter object"); + return ConstantAddress::invalid(); + } + + auto *GV = new llvm::GlobalVariable( + getModule(), Init->getType(), + /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); + if (supportsCOMDAT()) + GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); + Emitter.finalize(GV); + + return ConstantAddress(GV, Alignment); +} + ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { const AliasAttr *AA = VD->getAttr<AliasAttr>(); assert(AA && "No alias?"); @@ -3789,6 +4017,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { if (LangOpts.OpenCL) { AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AddrSpace == LangAS::opencl_global || + AddrSpace == LangAS::opencl_global_device || + AddrSpace == LangAS::opencl_global_host || AddrSpace == LangAS::opencl_constant || AddrSpace == LangAS::opencl_local || AddrSpace >= LangAS::FirstTargetAddressSpace); @@ -3952,13 +4182,14 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Shadows of initialized device-side global variables are also left // undefined. bool IsCUDAShadowVar = - !getLangOpts().CUDAIsDevice && + !getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() && (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDASharedAttr>()); bool IsCUDADeviceShadowVar = getLangOpts().CUDAIsDevice && (D->getType()->isCUDADeviceBuiltinSurfaceType() || - D->getType()->isCUDADeviceBuiltinTextureType()); + D->getType()->isCUDADeviceBuiltinTextureType() || + D->hasAttr<HIPManagedAttr>()); // HIP pinned shadow of initialized host-side global variables are also // left undefined. if (getLangOpts().CUDA && @@ -4075,7 +4306,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Shadow variables and their properties must be registered with CUDA // runtime. Skip Extern global variables, which will be registered in // the TU where they are defined. - if (!D->hasExternalStorage()) + // + // Don't register a C++17 inline variable. The local symbol can be + // discarded and referencing a discarded local symbol from outside the + // comdat (__cuda_register_globals) is disallowed by the ELF spec. + // TODO: Reject __device__ constexpr and __device__ inline in Sema. + if (!D->hasExternalStorage() && !D->isInline()) getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), D->hasAttr<CUDAConstantAttr>()); } else if (D->hasAttr<CUDASharedAttr>()) { @@ -4325,13 +4561,16 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. // - // We don't currently support CUDA device code spread out across multiple TUs, + // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU, // so say that CUDA templates are either external (for kernels) or internal. - // This lets llvm perform aggressive inter-procedural optimizations. + // This lets llvm perform aggressive inter-procedural optimizations. For + // -fgpu-rdc case, device function calls across multiple TU's are allowed, + // therefore we need to follow the normal linkage paradigm. if (Linkage == GVA_StrongODR) { - if (Context.getLangOpts().AppleKext) + if (getLangOpts().AppleKext) return llvm::Function::ExternalLinkage; - if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + !getLangOpts().GPURelocatableDeviceCode) return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; return llvm::Function::WeakODRLinkage; @@ -4522,9 +4761,11 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, MaybeHandleStaticInExternC(D, Fn); - maybeSetTrivialComdat(*D, *Fn); + // Set CodeGen attributes that represent floating point environment. + setLLVMFunctionFEnvAttributes(D, Fn); + CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); @@ -4533,7 +4774,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>()) AddGlobalCtor(Fn, CA->getPriority()); if (const DestructorAttr *DA = D->getAttr<DestructorAttr>()) - AddGlobalDtor(Fn, DA->getPriority()); + AddGlobalDtor(Fn, DA->getPriority(), true); if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, Fn); } @@ -4572,8 +4813,10 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), llvm::PointerType::getUnqual(DeclTy), /*D=*/nullptr); - LT = getLLVMLinkageVarDefinition(cast<VarDecl>(GD.getDecl()), - D->getType().isConstQualified()); + if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl())) + LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified()); + else + LT = getFunctionLinkage(GD); } // Create the new alias itself, but don't set a name yet. @@ -4896,6 +5139,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); + case llvm::Triple::GOFF: + llvm_unreachable("GOFF is not yet implemented"); case llvm::Triple::XCOFF: llvm_unreachable("XCOFF is not yet implemented"); case llvm::Triple::COFF: @@ -5373,16 +5618,21 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { Spec->hasDefinition()) DI->completeTemplateDefinition(*Spec); } LLVM_FALLTHROUGH; - case Decl::CXXRecord: - if (CGDebugInfo *DI = getModuleDebugInfo()) + case Decl::CXXRecord: { + CXXRecordDecl *CRD = cast<CXXRecordDecl>(D); + if (CGDebugInfo *DI = getModuleDebugInfo()) { + if (CRD->hasDefinition()) + DI->EmitAndRetainType(getContext().getRecordType(cast<RecordDecl>(D))); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) - DI->completeUnusedClass(cast<CXXRecordDecl>(*D)); + DI->completeUnusedClass(*CRD); + } // Emit any static data members, they may be definitions. - for (auto *I : cast<CXXRecordDecl>(D)->decls()) + for (auto *I : CRD->decls()) if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) EmitTopLevelDecl(I); break; + } // No code generation needed. case Decl::UsingShadow: case Decl::ClassTemplate: @@ -5568,6 +5818,25 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D)); break; + case Decl::Typedef: + case Decl::TypeAlias: // using foo = bar; [C++11] + if (CGDebugInfo *DI = getModuleDebugInfo()) + DI->EmitAndRetainType( + getContext().getTypedefType(cast<TypedefNameDecl>(D))); + break; + + case Decl::Record: + if (CGDebugInfo *DI = getModuleDebugInfo()) + if (cast<RecordDecl>(D)->getDefinition()) + DI->EmitAndRetainType(getContext().getRecordType(cast<RecordDecl>(D))); + break; + + case Decl::Enum: + if (CGDebugInfo *DI = getModuleDebugInfo()) + if (cast<EnumDecl>(D)->getDefinition()) + DI->EmitAndRetainType(getContext().getEnumType(cast<EnumDecl>(D))); + break; + default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind @@ -6006,16 +6275,17 @@ CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T, *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; - // For C++ class pointees, we don't know whether we're pointing at a - // base or a complete object, so we generally need to use the - // non-virtual alignment. const CXXRecordDecl *RD; - if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) { + if (T.getQualifiers().hasUnaligned()) { + Alignment = CharUnits::One(); + } else if (forPointeeType && !AlignForArray && + (RD = T->getAsCXXRecordDecl())) { + // For C++ class pointees, we don't know whether we're pointing at a + // base or a complete object, so we generally need to use the + // non-virtual alignment. Alignment = getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); - if (T.getQualifiers().hasUnaligned()) - Alignment = CharUnits::One(); } // Cap to the global maximum type alignment unless the alignment |