diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-09 13:28:42 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-09 13:28:42 +0000 |
| commit | b1c73532ee8997fe5dfbeb7d223027bdf99758a0 (patch) | |
| tree | 7d6e51c294ab6719475d660217aa0c0ad0526292 /clang/lib/CodeGen | |
| parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) | |
Diffstat (limited to 'clang/lib/CodeGen')
77 files changed, 5188 insertions, 4054 deletions
diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index 7c30cecfdb9b..2b20d5a13346 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -246,7 +246,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction &CGF, Address Addr1, } bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD, - bool AllowArrays) { + bool AllowArrays, bool AsIfNoUniqueAddr) { if (FD->isUnnamedBitfield()) return true; @@ -280,13 +280,14 @@ bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD, // not arrays of records, so we must also check whether we stripped off an // array type above. if (isa<CXXRecordDecl>(RT->getDecl()) && - (WasArray || !FD->hasAttr<NoUniqueAddressAttr>())) + (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr<NoUniqueAddressAttr>()))) return false; - return isEmptyRecord(Context, FT, AllowArrays); + return isEmptyRecord(Context, FT, AllowArrays, AsIfNoUniqueAddr); } -bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { +bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr) { const RecordType *RT = T->getAs<RecordType>(); if (!RT) return false; @@ -297,11 +298,11 @@ bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) for (const auto &I : CXXRD->bases()) - if (!isEmptyRecord(Context, I.getType(), true)) + if (!isEmptyRecord(Context, I.getType(), true, AsIfNoUniqueAddr)) return false; for (const auto *I : RD->fields()) - if (!isEmptyField(Context, I, AllowArrays)) + if (!isEmptyField(Context, I, AllowArrays, AsIfNoUniqueAddr)) return false; return true; } diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h index 5f0cc289af68..afde08ba100c 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.h +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -122,13 +122,19 @@ Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block2, const llvm::Twine &Name = ""); /// isEmptyField - Return true iff a the field is "empty", that is it -/// is an unnamed bit-field or an (array of) empty record(s). -bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays); +/// is an unnamed bit-field or an (array of) empty record(s). If +/// AsIfNoUniqueAddr is true, then C++ record fields are considered empty if +/// the [[no_unique_address]] attribute would have made them empty. +bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not -/// considered empty. -bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); +/// considered empty. If AsIfNoUniqueAddr is true, then C++ record fields are +/// considered empty if the [[no_unique_address]] attribute would have made +/// them empty. +bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h new file mode 100644 index 000000000000..72a814cd43d7 --- /dev/null +++ b/clang/lib/CodeGen/BackendConsumer.h @@ -0,0 +1,166 @@ +//===--- BackendConsumer.h - LLVM BackendConsumer Header File -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_BACKENDCONSUMER_H +#define LLVM_CLANG_LIB_CODEGEN_BACKENDCONSUMER_H + +#include "clang/CodeGen/BackendUtil.h" +#include "clang/CodeGen/CodeGenAction.h" + +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Support/Timer.h" + +namespace llvm { + class DiagnosticInfoDontCall; +} + +namespace clang { +class ASTContext; +class CodeGenAction; +class CoverageSourceInfo; + +class BackendConsumer : public ASTConsumer { + using LinkModule = CodeGenAction::LinkModule; + + virtual void anchor(); + DiagnosticsEngine &Diags; + BackendAction Action; + const HeaderSearchOptions &HeaderSearchOpts; + const CodeGenOptions &CodeGenOpts; + const TargetOptions &TargetOpts; + const LangOptions &LangOpts; + std::unique_ptr<raw_pwrite_stream> AsmOutStream; + ASTContext *Context; + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; + + llvm::Timer LLVMIRGeneration; + unsigned LLVMIRGenerationRefCount; + + /// True if we've finished generating IR. This prevents us from generating + /// additional LLVM IR after emitting output in HandleTranslationUnit. This + /// can happen when Clang plugins trigger additional AST deserialization. + bool IRGenFinished = false; + + bool TimerIsEnabled = false; + + std::unique_ptr<CodeGenerator> Gen; + + SmallVector<LinkModule, 4> LinkModules; + + // A map from mangled names to their function's source location, used for + // backend diagnostics as the Clang AST may be unavailable. We actually use + // the mangled name's hash as the key because mangled names can be very + // long and take up lots of space. Using a hash can cause name collision, + // but that is rare and the consequences are pointing to a wrong source + // location which is not severe. This is a vector instead of an actual map + // because we optimize for time building this map rather than time + // retrieving an entry, as backend diagnostics are uncommon. + std::vector<std::pair<llvm::hash_code, FullSourceLoc>> + ManglingFullSourceLocs; + + + // This is here so that the diagnostic printer knows the module a diagnostic + // refers to. + llvm::Module *CurLinkModule = nullptr; + +public: + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, const std::string &InFile, + SmallVector<LinkModule, 4> LinkModules, + std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr); + + // This constructor is used in installing an empty BackendConsumer + // to use the clang diagnostic handler for IR input files. It avoids + // initializing the OS field. + BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, llvm::Module *Module, + SmallVector<LinkModule, 4> LinkModules, llvm::LLVMContext &C, + CoverageSourceInfo *CoverageInfo = nullptr); + + llvm::Module *getModule() const; + std::unique_ptr<llvm::Module> takeModule(); + + CodeGenerator *getCodeGenerator(); + + void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override; + void Initialize(ASTContext &Ctx) override; + bool HandleTopLevelDecl(DeclGroupRef D) override; + void HandleInlineFunctionDefinition(FunctionDecl *D) override; + void HandleInterestingDecl(DeclGroupRef D) override; + void HandleTranslationUnit(ASTContext &C) override; + void HandleTagDeclDefinition(TagDecl *D) override; + void HandleTagDeclRequiredDefinition(const TagDecl *D) override; + void CompleteTentativeDefinition(VarDecl *D) override; + void CompleteExternalDeclaration(VarDecl *D) override; + void AssignInheritanceModel(CXXRecordDecl *RD) override; + void HandleVTable(CXXRecordDecl *RD) override; + + + // Links each entry in LinkModules into our module. Returns true on error. + bool LinkInModules(llvm::Module *M, bool ShouldLinkFiles = true); + + /// Get the best possible source location to represent a diagnostic that + /// may have associated debug info. + const FullSourceLoc getBestLocationFromDebugLoc( + const llvm::DiagnosticInfoWithLocationBase &D, + bool &BadDebugInfo, StringRef &Filename, + unsigned &Line, unsigned &Column) const; + + std::optional<FullSourceLoc> getFunctionSourceLocation( + const llvm::Function &F) const; + + void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); + /// Specialized handler for InlineAsm diagnostic. + /// \return True if the diagnostic has been successfully reported, false + /// otherwise. + bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D); + /// Specialized handler for diagnostics reported using SMDiagnostic. + void SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &D); + /// Specialized handler for StackSize diagnostic. + /// \return True if the diagnostic has been successfully reported, false + /// otherwise. + bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); + /// Specialized handler for ResourceLimit diagnostic. + /// \return True if the diagnostic has been successfully reported, false + /// otherwise. + bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D); + + /// Specialized handler for unsupported backend feature diagnostic. + void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); + /// Specialized handlers for optimization remarks. + /// Note that these handlers only accept remarks and they always handle + /// them. + void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, + unsigned DiagID); + void + OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D); + void OptimizationRemarkHandler( + const llvm::OptimizationRemarkAnalysisFPCommute &D); + void OptimizationRemarkHandler( + const llvm::OptimizationRemarkAnalysisAliasing &D); + void OptimizationFailureHandler( + const llvm::DiagnosticInfoOptimizationFailure &D); + void DontCallDiagHandler(const llvm::DiagnosticInfoDontCall &D); + /// Specialized handler for misexpect warnings. + /// Note that misexpect remarks are emitted through ORE + void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D); +}; + +} // namespace clang +#endif diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cda03d69522d..8c666e2cb463 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/BackendUtil.h" +#include "BackendConsumer.h" +#include "LinkInModulesPass.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" @@ -27,6 +29,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/LegacyPassManager.h" @@ -55,6 +58,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/SubtargetFeature.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/EmbedBitcodePass.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" @@ -70,6 +74,7 @@ #include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" @@ -90,19 +95,24 @@ using namespace llvm; #include "llvm/Support/Extension.def" namespace llvm { -extern cl::opt<bool> DebugInfoCorrelate; +extern cl::opt<bool> PrintPipelinePasses; // Experiment to move sanitizers earlier. static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false)); + +// Re-link builtin bitcodes after optimization +cl::opt<bool> ClRelinkBuiltinBitcodePostop( + "relink-builtin-bitcode-postop", cl::Optional, + cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false)); } namespace { // Default filename used for profile generation. std::string getDefaultProfileGenName() { - return DebugInfoCorrelate ? "default_%p.proflite" : "default_%m.profraw"; + return DebugInfoCorrelate ? "default_%m.proflite" : "default_%m.profraw"; } class EmitAssemblyHelper { @@ -111,7 +121,7 @@ class EmitAssemblyHelper { const CodeGenOptions &CodeGenOpts; const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; - Module *TheModule; + llvm::Module *TheModule; IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS; Timer CodeGenerationTime; @@ -154,10 +164,9 @@ class EmitAssemblyHelper { return F; } - void - RunOptimizationPipeline(BackendAction Action, - std::unique_ptr<raw_pwrite_stream> &OS, - std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS); + void RunOptimizationPipeline( + BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, + std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS, BackendConsumer *BC); void RunCodegenPipeline(BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, std::unique_ptr<llvm::ToolOutputFile> &DwoOS); @@ -177,7 +186,7 @@ public: const HeaderSearchOptions &HeaderSearchOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, Module *M, + const LangOptions &LOpts, llvm::Module *M, IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), VFS(std::move(VFS)), @@ -192,8 +201,8 @@ public: std::unique_ptr<TargetMachine> TM; // Emit output using the new pass manager for the optimization pipeline. - void EmitAssembly(BackendAction Action, - std::unique_ptr<raw_pwrite_stream> OS); + void EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS, + BackendConsumer *BC); }; } @@ -256,45 +265,6 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return false; } -static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, - const CodeGenOptions &CodeGenOpts) { - TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); - - switch (CodeGenOpts.getVecLib()) { - case CodeGenOptions::Accelerate: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate, - TargetTriple); - break; - case CodeGenOptions::LIBMVEC: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86, - TargetTriple); - break; - case CodeGenOptions::MASSV: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, - TargetTriple); - break; - case CodeGenOptions::SVML: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, - TargetTriple); - break; - case CodeGenOptions::SLEEF: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI, - TargetTriple); - break; - case CodeGenOptions::Darwin_libsystem_m: - TLII->addVectorizableFunctionsFromVecLib( - TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple); - break; - case CodeGenOptions::ArmPL: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL, - TargetTriple); - break; - default: - break; - } - return TLII; -} - static std::optional<llvm::CodeModel::Model> getCodeModel(const CodeGenOptions &CodeGenOpts) { unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) @@ -313,12 +283,12 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) { static CodeGenFileType getCodeGenFileType(BackendAction Action) { if (Action == Backend_EmitObj) - return CGFT_ObjectFile; + return CodeGenFileType::ObjectFile; else if (Action == Backend_EmitMCNull) - return CGFT_Null; + return CodeGenFileType::Null; else { assert(Action == Backend_EmitAssembly && "Invalid action!"); - return CGFT_AssemblyFile; + return CodeGenFileType::AssemblyFile; } } @@ -486,6 +456,8 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.Argv0 = CodeGenOpts.Argv0; Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; Options.MCOptions.AsSecureLogFile = CodeGenOpts.AsSecureLogFile; + Options.MCOptions.PPCUseFullRegisterNames = + CodeGenOpts.PPCUseFullRegisterNames; Options.MisExpect = CodeGenOpts.MisExpect; return true; @@ -560,10 +532,10 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); llvm::Reloc::Model RM = CodeGenOpts.RelocationModel; - std::optional<CodeGenOpt::Level> OptLevelOrNone = + std::optional<CodeGenOptLevel> OptLevelOrNone = CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel); assert(OptLevelOrNone && "Invalid optimization level!"); - CodeGenOpt::Level OptLevel = *OptLevelOrNone; + CodeGenOptLevel OptLevel = *OptLevelOrNone; llvm::TargetOptions Options; if (!initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts, @@ -571,6 +543,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); + TM->setLargeDataThreshold(CodeGenOpts.LargeDataThreshold); } bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, @@ -579,7 +552,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, raw_pwrite_stream *DwoOS) { // Add LibraryInfo. std::unique_ptr<TargetLibraryInfoImpl> TLII( - createTLII(TargetTriple, CodeGenOpts)); + llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); // Normal mode, emit a .s or .o file by running the code generator. Note, @@ -688,7 +661,7 @@ static void addSanitizers(const Triple &TargetTriple, // the logic of the original code, but operates on "shadow" values. It // can benefit from re-running some general purpose optimization // passes. - MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + MPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>()); FunctionPassManager FPM; FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); FPM.addPass(InstCombinePass()); @@ -747,7 +720,7 @@ static void addSanitizers(const Triple &TargetTriple, SanitizersCallback(NewMPM, Level); if (!NewMPM.isEmpty()) { // Sanitizers can abandon<GlobalsAA>. - NewMPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>()); MPM.addPass(std::move(NewMPM)); } }); @@ -759,7 +732,7 @@ static void addSanitizers(const Triple &TargetTriple, void EmitAssemblyHelper::RunOptimizationPipeline( BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS, - std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) { + std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS, BackendConsumer *BC) { std::optional<PGOOptions> PGOOpt; if (CodeGenOpts.hasProfileIRInstr()) @@ -768,7 +741,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() : CodeGenOpts.InstrProfileOutput, "", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr, - PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, + /*PseudoProbeForProfiling=*/false, CodeGenOpts.AtomicProfileUpdate); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse @@ -902,6 +876,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( << PluginFN << toString(PassPlugin.takeError()); } } + for (auto PassCallback : CodeGenOpts.PassBuilderCallbacks) + PassCallback(PB); #define HANDLE_EXTENSION(Ext) \ get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); #include "llvm/Support/Extension.def" @@ -909,7 +885,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Register the target library analysis directly and give it a customized // preset TLI. std::unique_ptr<TargetLibraryInfoImpl> TLII( - createTLII(TargetTriple, CodeGenOpts)); + llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); // Register all the basic analyses with the managers. @@ -920,14 +896,17 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); ModulePassManager MPM; + // Add a verifier pass, before any other passes, to catch CodeGen issues. + if (CodeGenOpts.VerifyModule) + MPM.addPass(VerifierPass()); if (!CodeGenOpts.DisableLLVMPasses) { // Map our optimization levels into one of the distinct levels used to // configure the pipeline. OptimizationLevel Level = mapToLevel(CodeGenOpts); - bool IsThinLTO = CodeGenOpts.PrepareForThinLTO; - bool IsLTO = CodeGenOpts.PrepareForLTO; + const bool PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; + const bool PrepareForLTO = CodeGenOpts.PrepareForLTO; if (LangOpts.ObjCAutoRefCount) { PB.registerPipelineStartEPCallback( @@ -1016,30 +995,37 @@ void EmitAssemblyHelper::RunOptimizationPipeline( }); } - bool IsThinOrUnifiedLTO = IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO); if (CodeGenOpts.FatLTO) { - MPM = PB.buildFatLTODefaultPipeline(Level, IsThinOrUnifiedLTO, - IsThinOrUnifiedLTO || - shouldEmitRegularLTOSummary()); - } else if (IsThinOrUnifiedLTO) { - MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); - } else if (IsLTO) { - MPM = PB.buildLTOPreLinkDefaultPipeline(Level); + assert(CodeGenOpts.UnifiedLTO && "FatLTO requires UnifiedLTO"); + MPM.addPass(PB.buildFatLTODefaultPipeline(Level)); + } else if (PrepareForThinLTO) { + MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level)); + } else if (PrepareForLTO) { + MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(Level)); } else { - MPM = PB.buildPerModuleDefaultPipeline(Level); + MPM.addPass(PB.buildPerModuleDefaultPipeline(Level)); } } + // Re-link against any bitcodes supplied via the -mlink-builtin-bitcode option + // Some optimizations may generate new function calls that would not have + // been linked pre-optimization (i.e. fused sincos calls generated by + // AMDGPULibCalls::fold_sincos.) + if (ClRelinkBuiltinBitcodePostop) + MPM.addPass(LinkInModulesPass(BC, false)); + // Add a verifier pass if requested. We don't have to do this if the action // requires code generation because there will already be a verifier pass in // the code-generation pipeline. + // Since we already added a verifier pass above, this + // might even not run the analysis, if previous passes caused no changes. if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) MPM.addPass(VerifierPass()); if (Action == Backend_EmitBC || Action == Backend_EmitLL) { if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", CodeGenOpts.EnableSplitLTOUnit); if (Action == Backend_EmitBC) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { @@ -1048,26 +1034,25 @@ void EmitAssemblyHelper::RunOptimizationPipeline( return; } if (CodeGenOpts.UnifiedLTO) - TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); MPM.addPass(ThinLTOBitcodeWriterPass( *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, /*EmitLTOSummary=*/true)); } - } else { // Emit a module summary by default for Regular LTO except for ld64 // targets bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO) - TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + TheModule->addModuleFlag(llvm::Module::Error, "ThinLTO", uint32_t(0)); if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", uint32_t(1)); if (CodeGenOpts.UnifiedLTO) - TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); } if (Action == Backend_EmitBC) MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, @@ -1080,19 +1065,32 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (CodeGenOpts.FatLTO) { // Set module flags, like EnableSplitLTOUnit and UnifiedLTO, since FatLTO // uses a different action than Backend_EmitBC or Backend_EmitLL. - bool IsThinOrUnifiedLTO = - CodeGenOpts.PrepareForThinLTO || - (CodeGenOpts.PrepareForLTO && CodeGenOpts.UnifiedLTO); if (!TheModule->getModuleFlag("ThinLTO")) - TheModule->addModuleFlag(Module::Error, "ThinLTO", - uint32_t(IsThinOrUnifiedLTO)); + TheModule->addModuleFlag(llvm::Module::Error, "ThinLTO", + uint32_t(CodeGenOpts.PrepareForThinLTO)); if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", uint32_t(CodeGenOpts.EnableSplitLTOUnit)); - if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO")) - TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + // FatLTO always means UnifiedLTO + if (!TheModule->getModuleFlag("UnifiedLTO")) + TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); } + // Print a textual, '-passes=' compatible, representation of pipeline if + // requested. + if (PrintPipelinePasses) { + MPM.printPipeline(outs(), [&PIC](StringRef ClassName) { + auto PassName = PIC.getPassNameForClassName(ClassName); + return PassName.empty() ? ClassName : PassName; + }); + outs() << "\n"; + return; + } + + if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice && + LangOpts.HIPStdParInterposeAlloc) + MPM.addPass(HipStdParAllocationInterpositionPass()); + // Now that we have all of the passes ready, run them. { PrettyStackTraceString CrashInfo("Optimizer"); @@ -1130,6 +1128,13 @@ void EmitAssemblyHelper::RunCodegenPipeline( return; } + // If -print-pipeline-passes is requested, don't run the legacy pass manager. + // FIXME: when codegen is switched to use the new pass manager, it should also + // emit pass names here. + if (PrintPipelinePasses) { + return; + } + { PrettyStackTraceString CrashInfo("Code generation"); llvm::TimeTraceScope TimeScope("CodeGenPasses"); @@ -1138,7 +1143,8 @@ void EmitAssemblyHelper::RunCodegenPipeline( } void EmitAssemblyHelper::EmitAssembly(BackendAction Action, - std::unique_ptr<raw_pwrite_stream> OS) { + std::unique_ptr<raw_pwrite_stream> OS, + BackendConsumer *BC) { TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); @@ -1154,7 +1160,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, cl::PrintOptionValues(); std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS; - RunOptimizationPipeline(Action, OS, ThinLinkOS); + RunOptimizationPipeline(Action, OS, ThinLinkOS, BC); RunCodegenPipeline(Action, OS, DwoOS); if (ThinLinkOS) @@ -1164,12 +1170,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, } static void runThinLTOBackend( - DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex, Module *M, - const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, - const clang::TargetOptions &TOpts, const LangOptions &LOpts, - std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile, - std::string ProfileRemapping, BackendAction Action) { - StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> + DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex, + llvm::Module *M, const HeaderSearchOptions &HeaderOpts, + const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, + const LangOptions &LOpts, std::unique_ptr<raw_pwrite_stream> OS, + std::string SampleProfile, std::string ProfileRemapping, + BackendAction Action) { + DenseMap<StringRef, DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); @@ -1200,7 +1207,7 @@ static void runThinLTOBackend( Conf.CodeModel = getCodeModel(CGOpts); Conf.MAttrs = TOpts.Features; Conf.RelocModel = CGOpts.RelocationModel; - std::optional<CodeGenOpt::Level> OptLevelOrNone = + std::optional<CodeGenOptLevel> OptLevelOrNone = CodeGenOpt::getLevel(CGOpts.OptimizationLevel); assert(OptLevelOrNone && "Invalid optimization level!"); Conf.CGOptLevel = *OptLevelOrNone; @@ -1237,18 +1244,18 @@ static void runThinLTOBackend( Conf.SplitDwarfOutput = CGOpts.SplitDwarfOutput; switch (Action) { case Backend_EmitNothing: - Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { + Conf.PreCodeGenModuleHook = [](size_t Task, const llvm::Module &Mod) { return false; }; break; case Backend_EmitLL: - Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + Conf.PreCodeGenModuleHook = [&](size_t Task, const llvm::Module &Mod) { M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists); return false; }; break; case Backend_EmitBC: - Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + Conf.PreCodeGenModuleHook = [&](size_t Task, const llvm::Module &Mod) { WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists); return false; }; @@ -1267,14 +1274,12 @@ static void runThinLTOBackend( } } -void clang::EmitBackendOutput(DiagnosticsEngine &Diags, - const HeaderSearchOptions &HeaderOpts, - const CodeGenOptions &CGOpts, - const clang::TargetOptions &TOpts, - const LangOptions &LOpts, StringRef TDesc, - Module *M, BackendAction Action, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - std::unique_ptr<raw_pwrite_stream> OS) { +void clang::EmitBackendOutput( + DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderOpts, + const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, + const LangOptions &LOpts, StringRef TDesc, llvm::Module *M, + BackendAction Action, IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, + std::unique_ptr<raw_pwrite_stream> OS, BackendConsumer *BC) { llvm::TimeTraceScope TimeScope("Backend"); @@ -1317,7 +1322,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, } EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M, VFS); - AsmHelper.EmitAssembly(Action, std::move(OS)); + AsmHelper.EmitAssembly(Action, std::move(OS), BC); // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's // DataLayout. @@ -1352,7 +1357,7 @@ void clang::EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts, for (StringRef OffloadObject : CGOpts.OffloadObjects) { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr = llvm::MemoryBuffer::getFileOrSTDIN(OffloadObject); - if (std::error_code EC = ObjectOrErr.getError()) { + if (ObjectOrErr.getError()) { auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "could not open '%0' for embedding"); Diags.Report(DiagID) << OffloadObject; diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 222b0a192c85..52e6ddb7d6af 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -87,8 +87,7 @@ namespace { llvm::Value *StoragePtr = CGF.Builder.CreateConstGEP1_64( CGF.Int8Ty, BitFieldPtr, OffsetInChars.getQuantity()); StoragePtr = CGF.Builder.CreateAddrSpaceCast( - StoragePtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), - "atomic_bitfield_base"); + StoragePtr, CGF.UnqualPtrTy, "atomic_bitfield_base"); BFI = OrigBFI; BFI.Offset = Offset; BFI.StorageSize = AtomicSizeInBits; @@ -102,9 +101,9 @@ namespace { llvm::APInt Size( /*numBits=*/32, C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity()); - AtomicTy = - C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); + AtomicTy = C.getConstantArrayType(C.CharTy, Size, nullptr, + ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0); } AtomicAlign = ValueAlign = lvalue.getAlignment(); } else if (lvalue.isVectorElt()) { @@ -384,8 +383,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, llvm::Value *Desired = CGF.Builder.CreateLoad(Val2); llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg( - Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, - Scope); + Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope); Pair->setVolatile(E->isVolatile()); Pair->setWeak(IsWeak); @@ -509,9 +507,11 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder, default: llvm_unreachable("Unexpected min/max operation"); case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_max_fetch: Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT; break; case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__scoped_atomic_min_fetch: Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT; break; } @@ -546,7 +546,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__atomic_compare_exchange: - case AtomicExpr::AO__atomic_compare_exchange_n: { + case AtomicExpr::AO__atomic_compare_exchange_n: + case AtomicExpr::AO__scoped_atomic_compare_exchange: + case AtomicExpr::AO__scoped_atomic_compare_exchange_n: { if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) { emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr, Val1, Val2, FailureOrder, Size, Order, Scope); @@ -579,7 +581,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: - case AtomicExpr::AO__atomic_load: { + case AtomicExpr::AO__atomic_load: + case AtomicExpr::AO__scoped_atomic_load_n: + case AtomicExpr::AO__scoped_atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); Load->setAtomic(Order, Scope); Load->setVolatile(E->isVolatile()); @@ -591,7 +595,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: - case AtomicExpr::AO__atomic_store_n: { + case AtomicExpr::AO__atomic_store_n: + case AtomicExpr::AO__scoped_atomic_store: + case AtomicExpr::AO__scoped_atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); Store->setAtomic(Order, Scope); @@ -604,10 +610,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange_n: + case AtomicExpr::AO__scoped_atomic_exchange: Op = llvm::AtomicRMWInst::Xchg; break; case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__scoped_atomic_add_fetch: PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd : llvm::Instruction::Add; [[fallthrough]]; @@ -615,11 +624,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__scoped_atomic_fetch_add: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::Add; break; case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__scoped_atomic_sub_fetch: PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub : llvm::Instruction::Sub; [[fallthrough]]; @@ -627,17 +638,20 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: + case AtomicExpr::AO__scoped_atomic_fetch_sub: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub : llvm::AtomicRMWInst::Sub; break; case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__scoped_atomic_min_fetch: PostOpMinMax = true; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_min: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FMin : (E->getValueType()->isSignedIntegerType() @@ -646,12 +660,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, break; case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_max_fetch: PostOpMinMax = true; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_fetch_max: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FMax : (E->getValueType()->isSignedIntegerType() @@ -660,47 +676,55 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, break; case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__scoped_atomic_and_fetch: PostOp = llvm::Instruction::And; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__scoped_atomic_fetch_and: Op = llvm::AtomicRMWInst::And; break; case AtomicExpr::AO__atomic_or_fetch: + case AtomicExpr::AO__scoped_atomic_or_fetch: PostOp = llvm::Instruction::Or; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__scoped_atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; break; case AtomicExpr::AO__atomic_xor_fetch: + case AtomicExpr::AO__scoped_atomic_xor_fetch: PostOp = llvm::Instruction::Xor; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; break; case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__scoped_atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_fetch_nand: Op = llvm::AtomicRMWInst::Nand; break; } llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope); + CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -713,7 +737,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, else if (PostOp) Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI, LoadVal1); - if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) + if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch || + E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch) Result = CGF.Builder.CreateNot(Result); CGF.Builder.CreateStore(Result, Dest); } @@ -862,41 +887,50 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); + case AtomicExpr::AO__atomic_load_n: + case AtomicExpr::AO__scoped_atomic_load_n: case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__hip_atomic_load: - case AtomicExpr::AO__atomic_load_n: break; case AtomicExpr::AO__atomic_load: + case AtomicExpr::AO__scoped_atomic_load: Dest = EmitPointerWithAlignment(E->getVal1()); break; case AtomicExpr::AO__atomic_store: + case AtomicExpr::AO__scoped_atomic_store: Val1 = EmitPointerWithAlignment(E->getVal1()); break; case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); Dest = EmitPointerWithAlignment(E->getVal2()); break; - case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__atomic_compare_exchange: + case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: - case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: - case AtomicExpr::AO__hip_atomic_compare_exchange_weak: - case AtomicExpr::AO__atomic_compare_exchange_n: - case AtomicExpr::AO__atomic_compare_exchange: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__scoped_atomic_compare_exchange: + case AtomicExpr::AO__scoped_atomic_compare_exchange_n: Val1 = EmitPointerWithAlignment(E->getVal1()); - if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange) + if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange || + E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange) Val2 = EmitPointerWithAlignment(E->getVal2()); else Val2 = EmitValToTemp(*this, E->getVal2()); OrderFail = EmitScalarExpr(E->getOrderFail()); if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n || - E->getOp() == AtomicExpr::AO__atomic_compare_exchange) + E->getOp() == AtomicExpr::AO__atomic_compare_exchange || + E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n || + E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange) IsWeak = EmitScalarExpr(E->getWeak()); break; @@ -936,35 +970,53 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_add: + case AtomicExpr::AO__scoped_atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_sub: + case AtomicExpr::AO__scoped_atomic_add_fetch: + case AtomicExpr::AO__scoped_atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_min_fetch: + case AtomicExpr::AO__scoped_atomic_sub_fetch: ShouldCastToIntPtrTy = !MemTy->isFloatingType(); [[fallthrough]]; - case AtomicExpr::AO__c11_atomic_store: - case AtomicExpr::AO__c11_atomic_exchange: - case AtomicExpr::AO__opencl_atomic_store: - case AtomicExpr::AO__hip_atomic_store: - case AtomicExpr::AO__opencl_atomic_exchange: - case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__atomic_fetch_nand: + case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__atomic_or_fetch: + case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_store_n: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: - case AtomicExpr::AO__c11_atomic_fetch_nand: - case AtomicExpr::AO__opencl_atomic_fetch_and: - case AtomicExpr::AO__opencl_atomic_fetch_or: - case AtomicExpr::AO__opencl_atomic_fetch_xor: - case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__hip_atomic_fetch_and: - case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: - case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: - case AtomicExpr::AO__atomic_fetch_nand: - case AtomicExpr::AO__atomic_and_fetch: - case AtomicExpr::AO__atomic_or_fetch: - case AtomicExpr::AO__atomic_xor_fetch: - case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__hip_atomic_store: + case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_fetch_and: + case AtomicExpr::AO__scoped_atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_fetch_or: + case AtomicExpr::AO__scoped_atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_and_fetch: + case AtomicExpr::AO__scoped_atomic_nand_fetch: + case AtomicExpr::AO__scoped_atomic_or_fetch: + case AtomicExpr::AO__scoped_atomic_xor_fetch: + case AtomicExpr::AO__scoped_atomic_store_n: + case AtomicExpr::AO__scoped_atomic_exchange_n: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -1003,44 +1055,60 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); - case AtomicExpr::AO__c11_atomic_fetch_add: - case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: - case AtomicExpr::AO__hip_atomic_fetch_add: - case AtomicExpr::AO__c11_atomic_fetch_and: - case AtomicExpr::AO__opencl_atomic_fetch_and: - case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: - case AtomicExpr::AO__c11_atomic_fetch_or: - case AtomicExpr::AO__opencl_atomic_fetch_or: - case AtomicExpr::AO__hip_atomic_fetch_or: - case AtomicExpr::AO__atomic_fetch_or: - case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_nand: - case AtomicExpr::AO__c11_atomic_fetch_sub: - case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_sub: - case AtomicExpr::AO__hip_atomic_fetch_sub: - case AtomicExpr::AO__c11_atomic_fetch_xor: - case AtomicExpr::AO__opencl_atomic_fetch_xor: - case AtomicExpr::AO__opencl_atomic_fetch_min: - case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: - case AtomicExpr::AO__hip_atomic_fetch_xor: - case AtomicExpr::AO__c11_atomic_fetch_max: - case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_nand_fetch: case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__atomic_xor_fetch: - case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_max: - case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: - case AtomicExpr::AO__atomic_max_fetch: - case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__hip_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_fetch_add: + case AtomicExpr::AO__scoped_atomic_fetch_and: + case AtomicExpr::AO__scoped_atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_fetch_or: + case AtomicExpr::AO__scoped_atomic_fetch_sub: + case AtomicExpr::AO__scoped_atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_add_fetch: + case AtomicExpr::AO__scoped_atomic_and_fetch: + case AtomicExpr::AO__scoped_atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_min_fetch: + case AtomicExpr::AO__scoped_atomic_nand_fetch: + case AtomicExpr::AO__scoped_atomic_or_fetch: + case AtomicExpr::AO__scoped_atomic_sub_fetch: + case AtomicExpr::AO__scoped_atomic_xor_fetch: // For these, only library calls for certain sizes exist. UseOptimizedLibcall = true; break; @@ -1049,30 +1117,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_exchange: case AtomicExpr::AO__atomic_compare_exchange: + case AtomicExpr::AO__scoped_atomic_load: + case AtomicExpr::AO__scoped_atomic_store: + case AtomicExpr::AO__scoped_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_compare_exchange: // Use the generic version if we don't know that the operand will be // suitably aligned for the optimized version. if (Misaligned) break; [[fallthrough]]; + case AtomicExpr::AO__atomic_load_n: + case AtomicExpr::AO__atomic_store_n: + case AtomicExpr::AO__atomic_exchange_n: + case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_load: + case AtomicExpr::AO__hip_atomic_store: + case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_load: - case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__opencl_atomic_store: - case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: - case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: - case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: - case AtomicExpr::AO__atomic_load_n: - case AtomicExpr::AO__atomic_store_n: - case AtomicExpr::AO__atomic_exchange_n: - case AtomicExpr::AO__atomic_compare_exchange_n: + case AtomicExpr::AO__scoped_atomic_load_n: + case AtomicExpr::AO__scoped_atomic_store_n: + case AtomicExpr::AO__scoped_atomic_exchange_n: + case AtomicExpr::AO__scoped_atomic_compare_exchange_n: // Only use optimized library calls for sizes for which they exist. // FIXME: Size == 16 optimized library functions exist too. if (Size == 1 || Size == 2 || Size == 4 || Size == 8) @@ -1125,14 +1201,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void *desired, int success, int failure) // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired, // int success, int failure) + case AtomicExpr::AO__atomic_compare_exchange: + case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: - case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__hip_atomic_compare_exchange_weak: - case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__hip_atomic_compare_exchange_strong: - case AtomicExpr::AO__atomic_compare_exchange: - case AtomicExpr::AO__atomic_compare_exchange_n: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__scoped_atomic_compare_exchange: + case AtomicExpr::AO__scoped_atomic_compare_exchange_n: LibCallName = "__atomic_compare_exchange"; RetTy = getContext().BoolTy; HaveRetTy = true; @@ -1147,22 +1225,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_exchange(size_t size, void *mem, void *val, void *return, // int order) // T __atomic_exchange_N(T *mem, T val, int order) - case AtomicExpr::AO__c11_atomic_exchange: - case AtomicExpr::AO__opencl_atomic_exchange: - case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__atomic_exchange_n: + case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange_n: LibCallName = "__atomic_exchange"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); break; // void __atomic_store(size_t size, void *mem, void *val, int order) // void __atomic_store_N(T *mem, T val, int order) - case AtomicExpr::AO__c11_atomic_store: - case AtomicExpr::AO__opencl_atomic_store: - case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: + case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__hip_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__scoped_atomic_store: + case AtomicExpr::AO__scoped_atomic_store_n: LibCallName = "__atomic_store"; RetTy = getContext().VoidTy; HaveRetTy = true; @@ -1171,22 +1253,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { break; // void __atomic_load(size_t size, void *mem, void *return, int order) // T __atomic_load_N(T *mem, int order) - case AtomicExpr::AO__c11_atomic_load: - case AtomicExpr::AO__opencl_atomic_load: - case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: + case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__hip_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__scoped_atomic_load: + case AtomicExpr::AO__scoped_atomic_load_n: LibCallName = "__atomic_load"; break; // T __atomic_add_fetch_N(T *mem, T val, int order) // T __atomic_fetch_add_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__scoped_atomic_add_fetch: PostOp = llvm::Instruction::Add; [[fallthrough]]; - case AtomicExpr::AO__c11_atomic_fetch_add: - case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__scoped_atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), TInfo.Width); @@ -1194,12 +1280,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_and_fetch_N(T *mem, T val, int order) // T __atomic_fetch_and_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__scoped_atomic_and_fetch: PostOp = llvm::Instruction::And; [[fallthrough]]; + case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_and: - case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: - case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__scoped_atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); @@ -1207,12 +1295,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_or_fetch_N(T *mem, T val, int order) // T __atomic_fetch_or_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_or_fetch: + case AtomicExpr::AO__scoped_atomic_or_fetch: PostOp = llvm::Instruction::Or; [[fallthrough]]; + case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_or: - case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__hip_atomic_fetch_or: - case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__scoped_atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); @@ -1220,12 +1310,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_sub_fetch_N(T *mem, T val, int order) // T __atomic_fetch_sub_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__scoped_atomic_sub_fetch: PostOp = llvm::Instruction::Sub; [[fallthrough]]; + case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_sub: - case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__hip_atomic_fetch_sub: - case AtomicExpr::AO__atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__scoped_atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), TInfo.Width); @@ -1233,21 +1325,25 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_xor_fetch_N(T *mem, T val, int order) // T __atomic_fetch_xor_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_xor_fetch: + case AtomicExpr::AO__scoped_atomic_xor_fetch: PostOp = llvm::Instruction::Xor; [[fallthrough]]; + case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_xor: - case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__hip_atomic_fetch_xor: - case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); break; case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__scoped_atomic_min_fetch: PostOpMinMax = true; [[fallthrough]]; - case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_min: case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() @@ -1257,12 +1353,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LoweredMemTy, E->getExprLoc(), TInfo.Width); break; case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_max_fetch: PostOpMinMax = true; [[fallthrough]]; - case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_max" : "__atomic_fetch_umax"; @@ -1272,10 +1370,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_nand_fetch_N(T *mem, T val, int order) // T __atomic_fetch_nand_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__scoped_atomic_nand_fetch: PostOp = llvm::Instruction::And; // the NOT is special cased below [[fallthrough]]; - case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: + case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_fetch_nand: LibCallName = "__atomic_fetch_nand"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); @@ -1332,7 +1432,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal(); ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1); } - if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) + if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch || + E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch) ResVal = Builder.CreateNot(ResVal); Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType())); @@ -1349,12 +1450,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { E->getOp() == AtomicExpr::AO__opencl_atomic_store || E->getOp() == AtomicExpr::AO__hip_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || - E->getOp() == AtomicExpr::AO__atomic_store_n; + E->getOp() == AtomicExpr::AO__atomic_store_n || + E->getOp() == AtomicExpr::AO__scoped_atomic_store || + E->getOp() == AtomicExpr::AO__scoped_atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || E->getOp() == AtomicExpr::AO__opencl_atomic_load || E->getOp() == AtomicExpr::AO__hip_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || - E->getOp() == AtomicExpr::AO__atomic_load_n; + E->getOp() == AtomicExpr::AO__atomic_load_n || + E->getOp() == AtomicExpr::AO__scoped_atomic_load || + E->getOp() == AtomicExpr::AO__scoped_atomic_load_n; if (isa<llvm::ConstantInt>(Order)) { auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); @@ -1741,8 +1846,7 @@ std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp( llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) { // Do the atomic store. Address Addr = getAtomicAddressAsAtomicIntPointer(); - auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(), - ExpectedVal, DesiredVal, + auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal, Success, Failure); // Other decoration. Inst->setVolatile(LVal.isVolatileQualified()); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index cfbe3272196e..0cbace7b7f7b 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -66,28 +66,6 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM, namespace { -/// Represents a captured entity that requires extra operations in order for -/// this entity to be copied or destroyed correctly. -struct BlockCaptureManagedEntity { - BlockCaptureEntityKind CopyKind, DisposeKind; - BlockFieldFlags CopyFlags, DisposeFlags; - const BlockDecl::Capture *CI; - const CGBlockInfo::Capture *Capture; - - BlockCaptureManagedEntity(BlockCaptureEntityKind CopyType, - BlockCaptureEntityKind DisposeType, - BlockFieldFlags CopyFlags, - BlockFieldFlags DisposeFlags, - const BlockDecl::Capture &CI, - const CGBlockInfo::Capture &Capture) - : CopyKind(CopyType), DisposeKind(DisposeType), CopyFlags(CopyFlags), - DisposeFlags(DisposeFlags), CI(&CI), Capture(&Capture) {} - - bool operator<(const BlockCaptureManagedEntity &Other) const { - return Capture->getOffset() < Other.Capture->getOffset(); - } -}; - enum class CaptureStrKind { // String for the copy helper. CopyHelper, @@ -174,9 +152,8 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, cast<llvm::IntegerType>(CGM.getTypes().ConvertType(C.UnsignedLongTy)); llvm::PointerType *i8p = nullptr; if (CGM.getLangOpts().OpenCL) - i8p = - llvm::Type::getInt8PtrTy( - CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant)); + i8p = llvm::PointerType::get( + CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant)); else i8p = CGM.VoidPtrTy; @@ -187,8 +164,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, CGM.getLangOpts().getGC() == LangOptions::NonGC) { descName = getBlockDescriptorName(blockInfo, CGM); if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName)) - return llvm::ConstantExpr::getBitCast(desc, - CGM.getBlockDescriptorType()); + return desc; } // If there isn't an equivalent block descriptor global variable, create a new @@ -226,8 +202,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, // Signature. Mandatory ObjC-style method descriptor @encode sequence. std::string typeAtEncoding = CGM.getContext().getObjCEncodingForBlock(blockInfo.getBlockExpr()); - elements.add(llvm::ConstantExpr::getBitCast( - CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p)); + elements.add(CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer()); // GC layout. if (C.getLangOpts().ObjC) { @@ -266,7 +241,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); } - return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType()); + return global; } /* @@ -832,7 +807,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { llvm::Constant *blockISA = blockInfo.NoEscape ? CGM.getNSConcreteGlobalBlock() : CGM.getNSConcreteStackBlock(); - isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy); + isa = blockISA; // Build the block descriptor. descriptor = buildBlockDescriptor(CGM, blockInfo); @@ -964,7 +939,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { if (CI.isNested()) byrefPointer = Builder.CreateLoad(src, "byref.capture"); else - byrefPointer = Builder.CreateBitCast(src.getPointer(), VoidPtrTy); + byrefPointer = src.getPointer(); // Write that void* into the capture field. Builder.CreateStore(byrefPointer, blockField); @@ -1017,7 +992,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Fake up a new variable so that EmitScalarInit doesn't think // we're referring to the variable in its own initializer. ImplicitParamDecl BlockFieldPseudoVar(getContext(), type, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // We use one of these or the other depending on whether the // reference is nested. @@ -1212,8 +1187,8 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, } } else { // Bitcast the block literal to a generic block literal. - BlockPtr = Builder.CreatePointerCast( - BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); + BlockPtr = + Builder.CreatePointerCast(BlockPtr, UnqualPtrTy, "block.literal"); // Get pointer to the block invoke function llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); @@ -1231,12 +1206,6 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBlockFunctionCall(Args, FuncTy); - // Cast the function pointer to the right type. - llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo); - - llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy); - Func = Builder.CreatePointerCast(Func, BlockFTyPtr); - // Prepare the callee. CGCallee Callee(CGCalleeInfo(), Func); @@ -1481,7 +1450,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction( ImplicitParamDecl SelfDecl(getContext(), const_cast<BlockDecl *>(blockDecl), SourceLocation(), II, selfTy, - ImplicitParamDecl::ObjCSelf); + ImplicitParamKind::ObjCSelf); args.push_back(&SelfDecl); // Now add the rest of the parameters. @@ -1689,7 +1658,6 @@ struct CallBlockRelease final : EHScopeStack::Cleanup { llvm::Value *BlockVarAddr; if (LoadBlockVarAddr) { BlockVarAddr = CGF.Builder.CreateLoad(Addr); - BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy); } else { BlockVarAddr = Addr.getPointer(); } @@ -1740,7 +1708,7 @@ static std::string getBlockCaptureStr(const CGBlockInfo::Capture &Cap, Str += "c"; SmallString<256> TyStr; llvm::raw_svector_ostream Out(TyStr); - CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out); + CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(CaptureTy, Out); Str += llvm::to_string(TyStr.size()) + TyStr.c_str(); break; } @@ -1899,16 +1867,16 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { CaptureStrKind::CopyHelper, CGM); if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) - return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + return Func; ASTContext &C = getContext(); QualType ReturnTy = C.VoidTy; FunctionArgList args; - ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamKind::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamKind::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = @@ -1997,9 +1965,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { } case BlockCaptureEntityKind::BlockObject: { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); - srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); - llvm::Value *dstAddr = - Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); + llvm::Value *dstAddr = dstField.getPointer(); llvm::Value *args[] = { dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; @@ -2022,7 +1988,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } static BlockFieldFlags @@ -2088,14 +2054,14 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { CaptureStrKind::DisposeHelper, CGM); if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) - return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + return Func; ASTContext &C = getContext(); QualType ReturnTy = C.VoidTy; FunctionArgList args; - ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamKind::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = @@ -2145,7 +2111,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } namespace { @@ -2337,10 +2303,10 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, QualType ReturnTy = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamKind::Other); args.push_back(&Dst); - ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamKind::Other); args.push_back(&Src); const CGFunctionInfo &FI = @@ -2384,7 +2350,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, CGF.FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); + return Fn; } /// Build the copy helper for a __block variable. @@ -2405,7 +2371,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, FunctionArgList args; ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); args.push_back(&Src); const CGFunctionInfo &FI = @@ -2440,7 +2406,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, CGF.FinishFunction(); - return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); + return Fn; } /// Build the dispose helper for a __block variable. @@ -2615,11 +2581,11 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { SmallVector<llvm::Type *, 8> types; // void *__isa; - types.push_back(Int8PtrTy); + types.push_back(VoidPtrTy); size += getPointerSize(); // void *__forwarding; - types.push_back(llvm::PointerType::getUnqual(byrefType)); + types.push_back(VoidPtrTy); size += getPointerSize(); // int32_t __flags; @@ -2634,11 +2600,11 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { bool hasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D); if (hasCopyAndDispose) { /// void *__copy_helper; - types.push_back(Int8PtrTy); + types.push_back(VoidPtrTy); size += getPointerSize(); /// void *__destroy_helper; - types.push_back(Int8PtrTy); + types.push_back(VoidPtrTy); size += getPointerSize(); } @@ -2647,7 +2613,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) && HasByrefExtendedLayout) { /// void *__byref_variable_layout; - types.push_back(Int8PtrTy); + types.push_back(VoidPtrTy); size += CharUnits::fromQuantity(PointerSizeInBytes); } @@ -2796,10 +2762,8 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags, bool CanThrow) { llvm::FunctionCallee F = CGM.getBlockObjectDispose(); - llvm::Value *args[] = { - Builder.CreateBitCast(V, Int8PtrTy), - llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) - }; + llvm::Value *args[] = {V, + llvm::ConstantInt::get(Int32Ty, flags.getBitMask())}; if (CanThrow) EmitRuntimeCallOrInvoke(F, args); diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 68535920088c..bf5ab171d720 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -126,25 +126,22 @@ public: return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One()); } - // Temporarily use old signature; clang will be updated to an Address overload - // in a subsequent patch. llvm::AtomicCmpXchgInst * - CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New, + CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID = llvm::SyncScope::System) { return CGBuilderBaseTy::CreateAtomicCmpXchg( - Ptr, Cmp, New, llvm::MaybeAlign(), SuccessOrdering, FailureOrdering, - SSID); + Addr.getPointer(), Cmp, New, Addr.getAlignment().getAsAlign(), + SuccessOrdering, FailureOrdering, SSID); } - // Temporarily use old signature; clang will be updated to an Address overload - // in a subsequent patch. llvm::AtomicRMWInst * - CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr, - llvm::Value *Val, llvm::AtomicOrdering Ordering, + CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, + llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID = llvm::SyncScope::System) { - return CGBuilderBaseTy::CreateAtomicRMW(Op, Ptr, Val, llvm::MaybeAlign(), + return CGBuilderBaseTy::CreateAtomicRMW(Op, Addr.getPointer(), Val, + Addr.getAlignment().getAsAlign(), Ordering, SSID); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 30f5f4e7061c..83d0a72aac54 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -25,8 +25,10 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/OSLog.h" +#include "clang/AST/OperationKinds.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/APFloat.h" @@ -43,7 +45,6 @@ #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsHexagon.h" -#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" @@ -55,6 +56,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/X86TargetParser.h" @@ -145,13 +147,12 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, // PPC, after backend supports IEEE 128-bit style libcalls. if (getTriple().isPPC64() && &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() && - F128Builtins.find(BuiltinID) != F128Builtins.end()) + F128Builtins.contains(BuiltinID)) Name = F128Builtins[BuiltinID]; else if (getTriple().isOSAIX() && &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEdouble() && - AIXLongDouble64Builtins.find(BuiltinID) != - AIXLongDouble64Builtins.end()) + AIXLongDouble64Builtins.contains(BuiltinID)) Name = AIXLongDouble64Builtins[BuiltinID]; else Name = Context.BuiltinInfo.getName(BuiltinID).substr(10); @@ -187,8 +188,7 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, return V; } -static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF, - const CallExpr *E) { +static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) { ASTContext &Ctx = CGF.getContext(); Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0)); unsigned Bytes = Ptr.getElementType()->isPointerTy() @@ -198,8 +198,10 @@ static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF, if (Align % Bytes != 0) { DiagnosticsEngine &Diags = CGF.CGM.getDiags(); Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned); + // Force address to be at least naturally-aligned. + return Ptr.withAlignment(CharUnits::fromQuantity(Bytes)); } - return Ptr.getPointer(); + return Ptr; } /// Utility to insert an atomic instruction based on Intrinsic::ID @@ -214,23 +216,17 @@ static Value *MakeBinaryAtomicValue( E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + Address DestAddr = CheckAtomicAlignment(CGF, E); - llvm::IntegerType *IntType = - llvm::IntegerType::get(CGF.getLLVMContext(), - CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = - llvm::PointerType::get(CGF.getLLVMContext(), AddrSpace); + llvm::IntegerType *IntType = llvm::IntegerType::get( + CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Value *Args[2]; - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Val->getType(); + Val = EmitToInt(CGF, Val, T, IntType); - llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], Ordering); + llvm::Value *Result = + CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering); return EmitFromInt(CGF, Result, T, ValueType); } @@ -238,12 +234,8 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { Value *Val = CGF.EmitScalarExpr(E->getArg(0)); Value *Address = CGF.EmitScalarExpr(E->getArg(1)); - // Convert the type of the pointer to a pointer to the stored type. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); - unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace(); - Value *BC = CGF.Builder.CreateBitCast( - Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); + LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType()); LV.setNontemporal(true); CGF.EmitStoreOfScalar(Val, LV, false); return nullptr; @@ -277,20 +269,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); + Address DestAddr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Value *Args[2]; - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); - Args[0] = DestPtr; + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Val->getType(); + Val = EmitToInt(CGF, Val, T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); - Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); + Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent); + Result = CGF.Builder.CreateBinOp(Op, Result, Val); if (Invert) Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, @@ -316,20 +306,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); - llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); + Address DestAddr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - Value *Args[3]; - Args[0] = DestPtr; - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); - Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); + Value *Cmp = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Cmp->getType(); + Cmp = EmitToInt(CGF, Cmp, T, IntType); + Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); Value *Pair = CGF.Builder.CreateAtomicCmpXchg( - Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, + DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); if (ReturnBool) // Extract boolean success flag and zext it to int. @@ -365,7 +353,8 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), E->getArg(2)->getType())); - auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); + Address DestAddr = CheckAtomicAlignment(CGF, E); + auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); @@ -379,8 +368,7 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, // _Interlocked* operations in the future, we will have to remove the volatile // marker. auto *Result = CGF.Builder.CreateAtomicCmpXchg( - Destination, Comparand, Exchange, - SuccessOrdering, FailureOrdering); + DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); Result->setVolatile(true); return CGF.Builder.CreateExtractValue(Result, 0); } @@ -393,29 +381,34 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, // __int64 _ExchangeHigh, // __int64 _ExchangeLow, // __int64 * _ComparandResult); +// +// Note that Destination is assumed to be at least 16-byte aligned, despite +// being typed int64. + static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering) { assert(E->getNumArgs() == 4); - llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); - llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3)); + Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3)); - assert(Destination->getType()->isPointerTy()); + assert(DestPtr->getType()->isPointerTy()); assert(!ExchangeHigh->getType()->isPointerTy()); assert(!ExchangeLow->getType()->isPointerTy()); - assert(ComparandPtr->getType()->isPointerTy()); // For Release ordering, the failure ordering should be Monotonic. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? AtomicOrdering::Monotonic : SuccessOrdering; - // Convert to i128 pointers and values. + // Convert to i128 pointers and values. Alignment is also overridden for + // destination pointer. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); - Address ComparandResult(ComparandPtr, Int128Ty, - CGF.getContext().toCharUnitsFromBits(128)); + Address DestAddr(DestPtr, Int128Ty, + CGF.getContext().toCharUnitsFromBits(128)); + ComparandAddr = ComparandAddr.withElementType(Int128Ty); // (((i128)hi) << 64) | ((i128)lo) ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); @@ -425,9 +418,9 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); // Load the comparand for the instruction. - llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult); + llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr); - auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); // The atomic instruction is marked volatile for consistency with MSVC. This @@ -438,7 +431,7 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, // Store the result as an outparameter. CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), - ComparandResult); + ComparandAddr); // Get the success boolean and zero extend it to i8. Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); @@ -450,24 +443,21 @@ static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); + Address DestAddr = CheckAtomicAlignment(CGF, E); auto *Result = CGF.Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - CGF.EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - Ordering); + AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); } -static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, +static Value *EmitAtomicDecrementValue( + CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); + Address DestAddr = CheckAtomicAlignment(CGF, E); auto *Result = CGF.Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - CGF.EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - Ordering); + AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } @@ -503,8 +493,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned ConstrainedIntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); } else { @@ -800,11 +790,6 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { } Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { - llvm::Type *DestType = Int8PtrTy; - if (ArgValue->getType() != DestType) - ArgValue = - Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); - Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); } @@ -834,6 +819,165 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } +llvm::Value * +CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType) { + // The code generated here calculates the size of a struct with a flexible + // array member that uses the counted_by attribute. There are two instances + // we handle: + // + // struct s { + // unsigned long flags; + // int count; + // int array[] __attribute__((counted_by(count))); + // } + // + // 1) bdos of the flexible array itself: + // + // __builtin_dynamic_object_size(p->array, 1) == + // p->count * sizeof(*p->array) + // + // 2) bdos of a pointer into the flexible array: + // + // __builtin_dynamic_object_size(&p->array[42], 1) == + // (p->count - 42) * sizeof(*p->array) + // + // 2) bdos of the whole struct, including the flexible array: + // + // __builtin_dynamic_object_size(p, 1) == + // max(sizeof(struct s), + // offsetof(struct s, array) + p->count * sizeof(*p->array)) + // + ASTContext &Ctx = getContext(); + const Expr *Base = E->IgnoreParenImpCasts(); + const Expr *Idx = nullptr; + + if (const auto *UO = dyn_cast<UnaryOperator>(Base); + UO && UO->getOpcode() == UO_AddrOf) { + Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) { + Base = ASE->getBase()->IgnoreParenImpCasts(); + Idx = ASE->getIdx()->IgnoreParenImpCasts(); + + if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) { + int64_t Val = IL->getValue().getSExtValue(); + if (Val < 0) + // __bdos returns 0 for negative indexes into an array in a struct. + return getDefaultBuiltinObjectSizeResult(Type, ResType); + + if (Val == 0) + // The index is 0, so we don't need to take it into account. + Idx = nullptr; + } + } else { + // Potential pointer to another element in the struct. + Base = SubExpr; + } + } + + // Get the flexible array member Decl. + const ValueDecl *FAMDecl = nullptr; + if (const auto *ME = dyn_cast<MemberExpr>(Base)) { + // Check if \p Base is referencing the FAM itself. + if (const ValueDecl *MD = ME->getMemberDecl()) { + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + if (!Decl::isFlexibleArrayMemberLike( + Ctx, MD, MD->getType(), StrictFlexArraysLevel, + /*IgnoreTemplateOrMacroSubstitution=*/true)) + return nullptr; + + FAMDecl = MD; + } + } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { + // Check if we're pointing to the whole struct. + QualType Ty = DRE->getDecl()->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + + if (const auto *RD = Ty->getAsRecordDecl()) + // Don't use the outer lexical record because the FAM might be in a + // different RecordDecl. + FAMDecl = FindFlexibleArrayMemberField(Ctx, RD); + } + + if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>()) + // No flexible array member found or it doesn't have the "counted_by" + // attribute. + return nullptr; + + const ValueDecl *CountedByFD = FindCountedByField(Base); + if (!CountedByFD) + // Can't find the field referenced by the "counted_by" attribute. + return nullptr; + + // Build a load of the counted_by field. + bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); + const Expr *CountedByExpr = BuildCountedByFieldExpr(Base, CountedByFD); + Value *CountedByInst = EmitAnyExprToTemp(CountedByExpr).getScalarVal(); + llvm::Type *CountedByTy = CountedByInst->getType(); + + // Build a load of the index and subtract it from the count. + Value *IdxInst = nullptr; + if (Idx) { + bool IdxSigned = Idx->getType()->isSignedIntegerType(); + IdxInst = EmitAnyExprToTemp(Idx).getScalarVal(); + IdxInst = IdxSigned ? Builder.CreateSExtOrTrunc(IdxInst, CountedByTy) + : Builder.CreateZExtOrTrunc(IdxInst, CountedByTy); + + // We go ahead with the calculation here. If the index turns out to be + // negative, we'll catch it at the end. + CountedByInst = + Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned); + } + + // Calculate how large the flexible array member is in bytes. + const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType()); + CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); + llvm::Constant *ElemSize = + llvm::ConstantInt::get(CountedByTy, Size.getQuantity(), IsSigned); + Value *FAMSize = + Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); + FAMSize = IsSigned ? Builder.CreateSExtOrTrunc(FAMSize, ResType) + : Builder.CreateZExtOrTrunc(FAMSize, ResType); + Value *Res = FAMSize; + + if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { + // The whole struct is specificed in the __bdos. + const RecordDecl *OuterRD = + CountedByFD->getDeclContext()->getOuterLexicalRecordContext(); + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD); + + // Get the offset of the FAM. + CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl)); + llvm::Constant *FAMOffset = + ConstantInt::get(ResType, Offset.getQuantity(), IsSigned); + Value *OffsetAndFAMSize = + Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned); + + // Get the full size of the struct. + llvm::Constant *SizeofStruct = + ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned); + + // max(sizeof(struct s), + // offsetof(struct s, array) + p->count * sizeof(*p->array)) + Res = IsSigned + ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax, + OffsetAndFAMSize, SizeofStruct) + : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax, + OffsetAndFAMSize, SizeofStruct); + } + + // A negative \p IdxInst or \p CountedByInst means that the index lands + // outside of the flexible array member. If that's the case, we want to + // return 0. + Value *Cmp = Builder.CreateIsNotNeg(CountedByInst); + if (IdxInst) + Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp); + + return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned)); +} + /// Returns a Value corresponding to the size of the given expression. /// This Value may be either of the following: /// - A llvm::Argument (if E is a param with the pass_object_size attribute on @@ -866,6 +1010,13 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } } + if (IsDynamic) { + // Emit special code for a flexible array member with the "counted_by" + // attribute. + if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType)) + return V; + } + // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. @@ -991,9 +1142,8 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); - llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext()); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGF.Int8Ty, {PtrType, IntType}, false); + llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); @@ -1062,8 +1212,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, Mask = CGF.Builder.CreateNot(Mask); RMWOp = llvm::AtomicRMWInst::And; } - OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask, - Ordering); + OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering); } else { // Emit a plain load for the non-interlocked intrinsics. OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); @@ -1132,7 +1281,7 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, Constraints += MachineClobbers; } - llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + llvm::Type *PtrType = CGF.UnqualPtrTy; llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); llvm::InlineAsm *IA = @@ -1782,6 +1931,45 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, return ArgValue; } +static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { + return CGF.Builder.CreateBinaryIntrinsic( + Intrinsic::abs, ArgValue, + ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW)); +} + +static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, + bool SanitizeOverflow) { + Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0)); + + // Try to eliminate overflow check. + if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) { + if (!VCI->isMinSignedValue()) + return EmitAbs(CGF, ArgValue, true); + } + + CodeGenFunction::SanitizerScope SanScope(&CGF); + + Constant *Zero = Constant::getNullValue(ArgValue->getType()); + Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic( + Intrinsic::ssub_with_overflow, Zero, ArgValue); + Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0); + Value *NotOverflow = CGF.Builder.CreateNot( + CGF.Builder.CreateExtractValue(ResultAndOverflow, 1)); + + // TODO: support -ftrapv-handler. + if (SanitizeOverflow) { + CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}}, + SanitizerHandler::NegateOverflow, + {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()), + CGF.EmitCheckTypeDescriptor(E->getType())}, + {ArgValue}); + } else + CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow); + + Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); + return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs"); +} + /// Get the argument type for arguments to os_log_helper. static CanQualType getOSLogArgType(ASTContext &C, int Size) { QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); @@ -1812,7 +2000,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( FunctionArgList Args; Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { @@ -1824,7 +2012,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); ArgTys.emplace_back(ArgTy); } @@ -2251,6 +2439,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, return nullptr; } +static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, + const FunctionDecl *FD) { + auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; + auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + SmallVector<Value *, 16> Args; + for (auto &&FormalTy : FnTy->params()) + Args.push_back(llvm::PoisonValue::get(FormalTy)); + + return RValue::get(CGF->Builder.CreateCall(UBF, Args)); +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -2283,6 +2484,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const unsigned BuiltinIDIfNoAsmLabel = FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID; + std::optional<bool> ErrnoOverriden; + // ErrnoOverriden is true if math-errno is overriden via the + // '#pragma float_control(precise, on)'. This pragma disables fast-math, + // which implies math-errno. + if (E->hasStoredFPFeatures()) { + FPOptionsOverride OP = E->getFPFeatures(); + if (OP.hasMathErrnoOverride()) + ErrnoOverriden = OP.getMathErrnoOverride(); + } + // True if 'atttibute__((optnone)) is used. This attibute overrides + // fast-math which implies math-errno. + bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>(); + + // True if we are compiling at -O2 and errno has been disabled + // using the '#pragma float_control(precise, off)', and + // attribute opt-none hasn't been seen. + bool ErrnoOverridenToFalseWithOpt = + ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone && + CGM.getCodeGenOpts().OptimizationLevel != 0; + // There are LLVM math intrinsics/instructions corresponding to math library // functions except the LLVM op will never set errno while the math library // might. Also, math builtins have the same semantics as their math library @@ -2290,13 +2511,69 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // LLVM counterparts if the call is marked 'const' (known to never set errno). // In case FP exceptions are enabled, the experimental versions of the // intrinsics model those. + bool ConstAlways = + getContext().BuiltinInfo.isConst(BuiltinID); + + // There's a special case with the fma builtins where they are always const + // if the target environment is GNU or the target is OS is Windows and we're + // targeting the MSVCRT.dll environment. + // FIXME: This list can be become outdated. Need to find a way to get it some + // other way. + switch (BuiltinID) { + case Builtin::BI__builtin_fma: + case Builtin::BI__builtin_fmaf: + case Builtin::BI__builtin_fmal: + case Builtin::BIfma: + case Builtin::BIfmaf: + case Builtin::BIfmal: { + auto &Trip = CGM.getTriple(); + if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT()) + ConstAlways = true; + break; + } + default: + break; + } + bool ConstWithoutErrnoAndExceptions = getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID); bool ConstWithoutExceptions = getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID); - if (FD->hasAttr<ConstAttr>() || - ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) && - (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) { + + // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is + // disabled. + // Math intrinsics are generated only when math-errno is disabled. Any pragmas + // or attributes that affect math-errno should prevent or allow math + // intrincs to be generated. Intrinsics are generated: + // 1- In fast math mode, unless math-errno is overriden + // via '#pragma float_control(precise, on)', or via an + // 'attribute__((optnone))'. + // 2- If math-errno was enabled on command line but overriden + // to false via '#pragma float_control(precise, off))' and + // 'attribute__((optnone))' hasn't been used. + // 3- If we are compiling with optimization and errno has been disabled + // via '#pragma float_control(precise, off)', and + // 'attribute__((optnone))' hasn't been used. + + bool ConstWithoutErrnoOrExceptions = + ConstWithoutErrnoAndExceptions || ConstWithoutExceptions; + bool GenerateIntrinsics = + (ConstAlways && !OptNone) || + (!getLangOpts().MathErrno && + !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); + if (!GenerateIntrinsics) { + GenerateIntrinsics = + ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions; + if (!GenerateIntrinsics) + GenerateIntrinsics = + ConstWithoutErrnoOrExceptions && + (!getLangOpts().MathErrno && + !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); + if (!GenerateIntrinsics) + GenerateIntrinsics = + ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; + } + if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIceil: case Builtin::BIceilf: @@ -2355,7 +2632,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); - + case Builtin::BI__builtin_exp10: + case Builtin::BI__builtin_exp10f: + case Builtin::BI__builtin_exp10f16: + case Builtin::BI__builtin_exp10l: + case Builtin::BI__builtin_exp10f128: { + // TODO: strictfp support + if (Builder.getIsFPConstrained()) + break; + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10)); + } case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: @@ -2544,7 +2830,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - case Builtin::BI__builtin_sqrtf128: { + case Builtin::BI__builtin_sqrtf128: + case Builtin::BI__builtin_elementwise_sqrt: { llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt); SetSqrtFPAccuracy(Call); @@ -2619,6 +2906,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } + // Check NonnullAttribute/NullabilityArg and Alignment. + auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg, + unsigned ParmNum) { + Value *Val = A.getPointer(); + EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD, + ParmNum); + + if (SanOpts.has(SanitizerKind::Alignment)) { + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::All); + SkippedChecks.clear(SanitizerKind::Alignment); + SourceLocation Loc = Arg->getExprLoc(); + // Strip an implicit cast. + if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg)) + if (CE->getCastKind() == CK_BitCast) + Arg = CE->getSubExpr(); + EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(), + SkippedChecks); + } + }; + switch (BuiltinIDIfNoAsmLabel) { default: break; case Builtin::BI__builtin___CFStringMakeConstantString: @@ -2636,24 +2944,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_va_copy: { Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); - - llvm::Type *Type = Int8PtrTy; - - DstPtr = Builder.CreateBitCast(DstPtr, Type); - SrcPtr = Builder.CreateBitCast(SrcPtr, Type); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); return RValue::get(nullptr); } + case Builtin::BIabs: + case Builtin::BIlabs: + case Builtin::BIllabs: case Builtin::BI__builtin_abs: case Builtin::BI__builtin_labs: case Builtin::BI__builtin_llabs: { - // X < 0 ? -X : X - // The negation has 'nsw' because abs of INT_MIN is undefined. - Value *ArgValue = EmitScalarExpr(E->getArg(0)); - Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg"); - Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType()); - Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); - Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs"); + bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow); + + Value *Result; + switch (getLangOpts().getSignedOverflowBehavior()) { + case LangOptions::SOB_Defined: + Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false); + break; + case LangOptions::SOB_Undefined: + if (!SanitizeOverflow) { + Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true); + break; + } + [[fallthrough]]; + case LangOptions::SOB_Trapping: + // TODO: Somehow handle the corner case when the address of abs is taken. + Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow); + break; + } return RValue::get(Result); } case Builtin::BI__builtin_complex: { @@ -3146,6 +3463,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConvertType(E->getType()))); } + case Builtin::BI__builtin_issignaling: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan), + ConvertType(E->getType()))); + } + case Builtin::BI__builtin_isinf: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); @@ -3180,6 +3505,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConvertType(E->getType()))); } + case Builtin::BI__builtin_issubnormal: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal), + ConvertType(E->getType()))); + } + + case Builtin::BI__builtin_iszero: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero), + ConvertType(E->getType()))); + } + case Builtin::BI__builtin_isfpclass: { Expr::EvalResult Result; if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext())) @@ -3237,6 +3578,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_pow: { return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow)); } + case Builtin::BI__builtin_elementwise_bitreverse: + return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse, + "elt.bitreverse")); case Builtin::BI__builtin_elementwise_cos: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); @@ -3514,6 +3858,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + // An alloca will always return a pointer to the alloca (stack) address + // space. This address space need not be the same as the AST / Language + // default (e.g. in C / C++ auto vars are in the generic address space). At + // the AST level this is handled within CreateTempAlloca et al., but for the + // builtin / dynamic alloca we have to handle it here. We use an explicit cast + // instead of passing an AS to CreateAlloca so as to not inhibit optimisation. case Builtin::BIalloca: case Builtin::BI_alloca: case Builtin::BI__builtin_alloca_uninitialized: @@ -3529,6 +3879,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, AI->setAlignment(SuitableAlignmentInBytes); if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized) initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); + LangAS AAS = getASTAllocaAddressSpace(); + LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); + if (AAS != EAS) { + llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); + return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, + EAS, Ty)); + } return RValue::get(AI); } @@ -3544,6 +3901,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, AI->setAlignment(AlignmentInBytes); if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized) initializeAlloca(*this, AI, Size, AlignmentInBytes); + LangAS AAS = getASTAllocaAddressSpace(); + LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); + if (AAS != EAS) { + llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); + return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, + EAS, Ty)); + } return RValue::get(AI); } @@ -3556,6 +3920,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); return RValue::get(nullptr); } + + case Builtin::BIbcopy: + case Builtin::BI__builtin_bcopy: { + Address Src = EmitPointerWithAlignment(E->getArg(0)); + Address Dest = EmitPointerWithAlignment(E->getArg(1)); + Value *SizeVal = EmitScalarExpr(E->getArg(2)); + EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(), + E->getArg(1)->getExprLoc(), FD, 0); + Builder.CreateMemMove(Dest, Src, SizeVal, false); + return RValue::get(Dest.getPointer()); + } + case Builtin::BImemcpy: case Builtin::BI__builtin_memcpy: case Builtin::BImempcpy: @@ -3563,10 +3941,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), - E->getArg(1)->getExprLoc(), FD, 1); + EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); + EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemCpy(Dest, Src, SizeVal, false); if (BuiltinID == Builtin::BImempcpy || BuiltinID == Builtin::BI__builtin_mempcpy) @@ -3581,10 +3957,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Src = EmitPointerWithAlignment(E->getArg(1)); uint64_t Size = E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), - E->getArg(1)->getExprLoc(), FD, 1); + EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); + EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemCpyInline(Dest, Src, Size); return RValue::get(nullptr); } @@ -3641,10 +4015,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), - E->getArg(1)->getExprLoc(), FD, 1); + EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); + EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(Dest.getPointer()); } @@ -3906,8 +4278,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateStore(FrameAddr, Buf); // Store the stack pointer to the setjmp buffer. - Value *StackAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); + Value *StackAddr = Builder.CreateStackSave(); + assert(Buf.getPointer()->getType() == StackAddr->getType()); + Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); @@ -3917,7 +4290,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_longjmp: { Value *Buf = EmitScalarExpr(E->getArg(0)); - Buf = Builder.CreateBitCast(Buf, Int8PtrTy); // Call LLVM's EH longjmp, which is lightweight. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); @@ -4080,14 +4452,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__sync_lock_release_4: case Builtin::BI__sync_lock_release_8: case Builtin::BI__sync_lock_release_16: { - Value *Ptr = CheckAtomicAlignment(*this, E); + Address Ptr = CheckAtomicAlignment(*this, E); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = - llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8); + + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + getContext().getTypeSize(ElTy)); llvm::StoreInst *Store = - Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, - StoreSize); + Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); Store->setAtomic(llvm::AtomicOrdering::Release); return RValue::get(nullptr); } @@ -4138,7 +4509,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, bool Volatile = PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); - Value *Ptr = EmitScalarExpr(E->getArg(0)); + Address Ptr = + EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty); + Value *NewVal = Builder.getInt8(1); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa<llvm::ConstantInt>(Order)) { @@ -4659,7 +5032,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::IntegerType *IntType = IntegerType::get( getLLVMContext(), getContext().getTypeSize(E->getType())); - llvm::Value *Destination = EmitScalarExpr(E->getArg(0)); + Address DestAddr = CheckAtomicAlignment(*this, E); llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); RTy = Exchange->getType(); @@ -4672,7 +5045,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; - auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, Ordering, Ordering); Result->setVolatile(true); @@ -4784,7 +5157,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) - return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); + return RValue::get(GV); break; } @@ -4834,8 +5207,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Type of the generic packet parameter. unsigned GenericAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); - llvm::Type *I8PTy = llvm::PointerType::get( - llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); + llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS); // Testing which overloaded version we should generate the call for. if (2U == E->getNumArgs()) { @@ -4980,11 +5352,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIto_local: case Builtin::BIto_private: { auto Arg0 = EmitScalarExpr(E->getArg(0)); - auto NewArgT = llvm::PointerType::get(Int8Ty, - CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); - auto NewRetT = llvm::PointerType::get(Int8Ty, - CGM.getContext().getTargetAddressSpace( - E->getType()->getPointeeType().getAddressSpace())); + auto NewArgT = llvm::PointerType::get( + getLLVMContext(), + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); + auto NewRetT = llvm::PointerType::get( + getLLVMContext(), + CGM.getContext().getTargetAddressSpace( + E->getType()->getPointeeType().getAddressSpace())); auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); llvm::Value *NewArg; if (Arg0->getType()->getPointerAddressSpace() != @@ -5006,7 +5380,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, unsigned NumArgs = E->getNumArgs(); llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); - llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); @@ -5050,7 +5424,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( - getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal, + getContext().getSizeType(), ArraySize, nullptr, + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); @@ -5184,7 +5559,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { - llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); @@ -5199,7 +5574,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {Kernel, Arg})); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { - llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); @@ -5215,7 +5590,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: case Builtin::BIget_kernel_sub_group_count_for_ndrange: { - llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); @@ -5367,12 +5742,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address DestAddr = EmitMSVAListRef(E->getArg(0)); Address SrcAddr = EmitMSVAListRef(E->getArg(1)); - llvm::Type *BPP = Int8PtrPtrTy; - - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), - Int8PtrTy, DestAddr.getAlignment()); - SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), - Int8PtrTy, SrcAddr.getAlignment()); + DestAddr = DestAddr.withElementType(Int8PtrTy); + SrcAddr = SrcAddr.withElementType(Int8PtrTy); Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); @@ -5441,18 +5812,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = F->getFunctionType(); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { - Value *ArgValue; - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - ArgValue = EmitScalarExpr(E->getArg(i)); - } else { - // If this is required to be a constant, constant fold it so that we - // know that the generated intrinsic gets a ConstantInt. - ArgValue = llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext())); - } - + Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); // If the intrinsic arg type is different from the builtin arg type // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); @@ -5541,6 +5901,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } + if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) + return EmitHipStdParUnsupportedBuiltin(this, FD); + ErrorUnsupported(E, "builtin function"); // Unknown builtin, for now just dump it out and return undef. @@ -5551,6 +5914,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + // Returning nullptr here leads to the builtin being handled in + // EmitStdParUnsupportedBuiltin. + if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice && + Arch != CGF->getTarget().getTriple().getArch()) + return nullptr; + switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: @@ -5588,9 +5961,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); - case llvm::Triple::loongarch32: - case llvm::Triple::loongarch64: - return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -6313,13 +6683,21 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), + NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), + NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), + NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), + NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), @@ -7231,13 +7609,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld1q_x3_v: case NEON::BI__builtin_neon_vld1_x4_v: case NEON::BI__builtin_neon_vld1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_v: @@ -7256,8 +7630,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld1_dup_v: @@ -7281,8 +7653,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops[I] = Builder.CreateBitCast(Ops[I], Ty); Ops.push_back(getAlignmentValue32(PtrOp1)); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vmovl_v: { @@ -7461,16 +7831,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vst1q_x3_v: case NEON::BI__builtin_neon_vst1_x4_v: case NEON::BI__builtin_neon_vst1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType()); // TODO: Currently in AArch32 mode the pointer operand comes first, whereas // in AArch64 it comes last. We may want to stick to one or another. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || Arch == llvm::Triple::aarch64_32) { - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } - llvm::Type *Tys[2] = { PTy, VTy }; + llvm::Type *Tys[2] = {UnqualPtrTy, VTy}; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } case NEON::BI__builtin_neon_vsubhn_v: { @@ -7492,7 +7861,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vtrn_v: case NEON::BI__builtin_neon_vtrnq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -7520,7 +7888,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vuzp_v: case NEON::BI__builtin_neon_vuzpq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -7543,7 +7910,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -7747,6 +8113,26 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. +static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, + llvm::Type *RegisterType, + llvm::Type *ValueType, bool isExecHi) { + CodeGen::CGBuilderTy &Builder = CGF.Builder; + CodeGen::CodeGenModule &CGM = CGF.CGM; + + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); + llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { + Value *Rt2 = Builder.CreateLShr(Call, 32); + Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); + return Rt2; + } + + return Call; +} + // Generates the IR for the read/write special register builtin, // ValueType is the type of the value that is to be written or read, // RegisterType is the type of the register being written to or read from. @@ -8031,8 +8417,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } Value *LdPtr = EmitScalarExpr(E->getArg(0)); - Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), - "ldrexd"); + Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd"); Value *Val0 = Builder.CreateExtractValue(Val, 1); Value *Val1 = Builder.CreateExtractValue(Val, 0); @@ -8053,12 +8438,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); - llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex, - PtrTy); + UnqualPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); @@ -8091,7 +8475,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); - Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); + Value *StPtr = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); } @@ -8307,15 +8691,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } switch (BuiltinID) { @@ -9081,6 +9457,11 @@ static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { // the elements of the specified datatype. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, llvm::ScalableVectorType *VTy) { + + if (isa<TargetExtType>(Pred->getType()) && + cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount") + return Pred; + auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); if (Pred->getType() == RTy) return Pred; @@ -9116,13 +9497,6 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, auto *OverloadedTy = llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); - // At the ACLE level there's only one predicate type, svbool_t, which is - // mapped to <n x 16 x i1>. However, this might be incompatible with the - // actual type being loaded. For example, when loading doubles (i64) the - // predicated should be <n x 2 x i1> instead. At the IR level the type of - // the predicate and the data being loaded must match. Cast accordingly. - Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); - Function *F = nullptr; if (Ops[1]->getType()->isVectorTy()) // This is the "vector base, scalar offset" case. In order to uniquely @@ -9136,6 +9510,16 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, // intrinsic. F = CGM.getIntrinsic(IntID, OverloadedTy); + // At the ACLE level there's only one predicate type, svbool_t, which is + // mapped to <n x 16 x i1>. However, this might be incompatible with the + // actual type being loaded. For example, when loading doubles (i64) the + // predicate should be <n x 2 x i1> instead. At the IR level the type of + // the predicate and the data being loaded must match. Cast to the type + // expected by the intrinsic. The intrinsic itself should be defined in + // a way than enforces relations between parameter types. + Ops[0] = EmitSVEPredicateCast( + Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType())); + // Pass 0 when the offset is missing. This can only be applied when using // the "vector base" addressing mode for which ACLE allows no offset. The // corresponding LLVM IR always requires an offset. @@ -9200,8 +9584,11 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, // mapped to <n x 16 x i1>. However, this might be incompatible with the // actual type being stored. For example, when storing doubles (i64) the // predicated should be <n x 2 x i1> instead. At the IR level the type of - // the predicate and the data being stored must match. Cast accordingly. - Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy); + // the predicate and the data being stored must match. Cast to the type + // expected by the intrinsic. The intrinsic itself should be defined in + // a way that enforces relations between parameter types. + Ops[1] = EmitSVEPredicateCast( + Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType())); // For "vector base, scalar index" scale the index so that it becomes a // scalar offset. @@ -9251,18 +9638,23 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value*> &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); - auto VecPtrTy = llvm::PointerType::getUnqual(VTy); - auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_ld2_sret: + case Intrinsic::aarch64_sve_ld1_pn_x2: + case Intrinsic::aarch64_sve_ldnt1_pn_x2: + case Intrinsic::aarch64_sve_ld2q_sret: N = 2; break; case Intrinsic::aarch64_sve_ld3_sret: + case Intrinsic::aarch64_sve_ld3q_sret: N = 3; break; case Intrinsic::aarch64_sve_ld4_sret: + case Intrinsic::aarch64_sve_ld1_pn_x4: + case Intrinsic::aarch64_sve_ldnt1_pn_x4: + case Intrinsic::aarch64_sve_ld4q_sret: N = 4; break; default: @@ -9271,14 +9663,13 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, auto RetTy = llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); - Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); - Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); - BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); Function *F = CGM.getIntrinsic(IntID, {VTy}); Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); unsigned MinElts = VTy->getMinNumElements(); @@ -9295,18 +9686,23 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value*> &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); - auto VecPtrTy = llvm::PointerType::getUnqual(VTy); - auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_st1_pn_x2: + case Intrinsic::aarch64_sve_stnt1_pn_x2: + case Intrinsic::aarch64_sve_st2q: N = 2; break; case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_st3q: N = 3; break; case Intrinsic::aarch64_sve_st4: + case Intrinsic::aarch64_sve_st1_pn_x4: + case Intrinsic::aarch64_sve_stnt1_pn_x4: + case Intrinsic::aarch64_sve_st4q: N = 4; break; default: @@ -9314,26 +9710,20 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, } Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); - Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *BasePtr = Ops[1]; // Does the store have an offset? - if (Ops.size() > 3) + if (Ops.size() > (2 + N)) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); - BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); - Value *Val = Ops.back(); - // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we // need to break up the tuple vector. SmallVector<llvm::Value*, 5> Operands; - unsigned MinElts = VTy->getElementCount().getKnownMinValue(); - for (unsigned I = 0; I < N; ++I) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx)); - } + for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) + Operands.push_back(Ops[I]); Operands.append({Predicate, BasePtr}); - Function *F = CGM.getIntrinsic(IntID, { VTy }); + return Builder.CreateCall(F, Operands); } @@ -9388,7 +9778,7 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, llvm::Type *ReturnTy, SmallVectorImpl<Value *> &Ops, - unsigned BuiltinID, + unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( @@ -9397,28 +9787,46 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, // The vector type that is returned may be different from the // eventual type loaded from memory. auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy); - auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + llvm::ScalableVectorType *MemoryTy = nullptr; + llvm::ScalableVectorType *PredTy = nullptr; + bool IsQuadLoad = false; + switch (IntrinsicID) { + case Intrinsic::aarch64_sve_ld1uwq: + case Intrinsic::aarch64_sve_ld1udq: + MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); + PredTy = llvm::ScalableVectorType::get( + llvm::Type::getInt1Ty(getLLVMContext()), 1); + IsQuadLoad = true; + break; + default: + MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + PredTy = MemoryTy; + break; + } - Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); - Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy); auto *Load = cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); + if (IsQuadLoad) + return Load; + return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) - : Builder.CreateSExt(Load, VectorTy); + : Builder.CreateSExt(Load, VectorTy); } Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl<Value *> &Ops, - unsigned BuiltinID) { + unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( LangPTy->castAs<PointerType>()->getPointeeType()); @@ -9428,17 +9836,34 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType()); auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); - Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + auto PredTy = MemoryTy; + auto AddrMemoryTy = MemoryTy; + bool IsQuadStore = false; + + switch (IntrinsicID) { + case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1udq: + AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); + PredTy = + llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1); + IsQuadStore = true; + break; + default: + break; + } + Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the store have an offset? if (Ops.size() == 4) - BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); + BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]); // Last value is always the data - llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); + Value *Val = + IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy); - Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Function *F = + CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy); auto *Store = cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); @@ -9446,59 +9871,49 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } -Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { - llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); - return Builder.CreateAdd(Base, CastOffset, "tileslice"); -} - -Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { - Ops[3] = EmitSVEPredicateCast( - Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); + Ops[2] = EmitSVEPredicateCast( + Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); SmallVector<Value *> NewOps; - NewOps.push_back(Ops[3]); + NewOps.push_back(Ops[2]); - llvm::Value *BasePtr = Ops[4]; + llvm::Value *BasePtr = Ops[3]; // If the intrinsic contains the vnum parameter, multiply it with the vector // size in bytes. - if (Ops.size() == 6) { + if (Ops.size() == 5) { Function *StreamingVectorLength = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); llvm::Value *StreamingVectorLengthCall = Builder.CreateCall(StreamingVectorLength); llvm::Value *Mulvl = - Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); // The type of the ptr parameter is void *, so use Int8Ty here. - BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl); + BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); } NewOps.push_back(BasePtr); NewOps.push_back(Ops[0]); - NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + NewOps.push_back(Ops[1]); Function *F = CGM.getIntrinsic(IntID); return Builder.CreateCall(F, NewOps); } -Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { auto *VecTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(IntID, VecTy); - if (TypeFlags.isReadZA()) { + if (TypeFlags.isReadZA()) Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); - Ops[3] = EmitTileslice(Ops[4], Ops[3]); - Ops.erase(&Ops[4]); - } else if (TypeFlags.isWriteZA()) { - Ops[1] = EmitTileslice(Ops[2], Ops[1]); - Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); - Ops.erase(&Ops[3]); - } + else if (TypeFlags.isWriteZA()) + Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { // svzero_za() intrinsic zeros the entire za tile and has no paramters. @@ -9508,18 +9923,13 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned IntID) { - Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); - llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - llvm::Value *MulVL = Builder.CreateMul( - CntsbCall, - Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()), - "mulvl"); - Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); - Ops[0] = EmitTileslice(Ops[1], Ops[0]); - Ops.erase(&Ops[1]); + if (Ops.size() == 2) + Ops.push_back(Builder.getInt32(0)); + else + Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true); Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } @@ -9612,26 +10022,59 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { + // Multi-vector results should be broken up into a single (wide) result + // vector. + auto *StructTy = dyn_cast<StructType>(Call->getType()); + if (!StructTy) + return Call; + + auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U)); + if (!VTy) + return Call; + unsigned N = StructTy->getNumElements(); + + // We may need to emit a cast to a svbool_t + bool IsPredTy = VTy->getElementType()->isIntegerTy(1); + unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); + + ScalableVectorType *WideVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts * N); + Value *Ret = llvm::PoisonValue::get(WideVTy); + for (unsigned I = 0; I < N; ++I) { + Value *SRet = Builder.CreateExtractValue(Call, I); + assert(SRet->getType() == VTy && "Unexpected type for result value"); + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + + if (IsPredTy) + SRet = EmitSVEPredicateCast( + SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); + + Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); + } + Call = Ret; + + return Call; +} + +void CodeGenFunction::GetAArch64SVEProcessedOperands( + unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, + SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { - Value *Val = EmitScalarExpr(E->getArg(0)); - return EmitSVEReinterpret(Val, Ty); - } + // Tuple set/get only requires one insert/extract vector, which is + // created by EmitSVETupleSetOrGet. + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector<Value *, 4> Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - else { + bool IsICE = ICEArguments & (1 << i); + Value *Arg = EmitScalarExpr(E->getArg(i)); + + if (IsICE) { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. std::optional<llvm::APSInt> Result = @@ -9643,12 +10086,49 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, // immediate requires more than a handful of bits. *Result = Result->extOrTrunc(32); Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); + continue; + } + + if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) { + Ops.push_back(Arg); + continue; } + + auto *VTy = cast<ScalableVectorType>(Arg->getType()); + unsigned MinElts = VTy->getMinNumElements(); + bool IsPred = VTy->getElementType()->isIntegerTy(1); + unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); + + if (N == 1) { + Ops.push_back(Arg); + continue; + } + + for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); + auto *NewVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts / N); + Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); + } + } +} + +Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) { + Value *Val = EmitScalarExpr(E->getArg(0)); + return EmitSVEReinterpret(Val, Ty); } auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); + + llvm::SmallVector<Value *, 4> Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); @@ -9662,14 +10142,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherPrefetch()) return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructLoad()) - return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructStore()) - return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructLoad()) + return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructStore()) + return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) - return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); + return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); else if (TypeFlags.isTupleCreate()) - return EmitSVETupleCreate(TypeFlags, Ty, Ops); + return EmitSVETupleCreate(TypeFlags, Ty, Ops); else if (TypeFlags.isUndef()) return UndefValue::get(Ty); else if (Builtin->LLVMIntrinsic != 0) { @@ -9725,13 +10205,55 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (PredTy->getScalarType()->isIntegerTy(1)) Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); - return Call; + return FormSVEBuiltinResult(Call); } switch (BuiltinID) { default: return nullptr; + case SVE::BI__builtin_sve_svreinterpret_b: { + auto SVCountTy = + llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); + Function *CastFromSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); + return Builder.CreateCall(CastFromSVCountF, Ops[0]); + } + case SVE::BI__builtin_sve_svreinterpret_c: { + auto SVCountTy = + llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); + Function *CastToSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); + return Builder.CreateCall(CastToSVCountF, Ops[0]); + } + + case SVE::BI__builtin_sve_svpsel_lane_b8: + case SVE::BI__builtin_sve_svpsel_lane_b16: + case SVE::BI__builtin_sve_svpsel_lane_b32: + case SVE::BI__builtin_sve_svpsel_lane_b64: + case SVE::BI__builtin_sve_svpsel_lane_c8: + case SVE::BI__builtin_sve_svpsel_lane_c16: + case SVE::BI__builtin_sve_svpsel_lane_c32: + case SVE::BI__builtin_sve_svpsel_lane_c64: { + bool IsSVCount = isa<TargetExtType>(Ops[0]->getType()); + assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() == + "aarch64.svcount")) && + "Unexpected TargetExtType"); + auto SVCountTy = + llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); + Function *CastFromSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); + Function *CastToSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); + + auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier)); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy); + llvm::Value *Ops0 = + IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0]; + llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy); + llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]}); + return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel; + } case SVE::BI__builtin_sve_svmov_b_z: { // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) SVETypeFlags TypeFlags(Builtin->TypeModifier); @@ -9853,6 +10375,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svpfalse_b: return ConstantInt::getFalse(Ty); + case SVE::BI__builtin_sve_svpfalse_c: { + auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16); + Function *CastToSVCountF = + CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty); + return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy)); + } + case SVE::BI__builtin_sve_svlen_bf16: case SVE::BI__builtin_sve_svlen_f16: case SVE::BI__builtin_sve_svlen_f32: @@ -9888,13 +10417,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF)); - Value *V0 = Builder.CreateExtractVector(VTy, Ops[0], - ConstantInt::get(CGM.Int64Ty, 0)); - unsigned MinElts = VTy->getMinNumElements(); - Value *V1 = Builder.CreateExtractVector( - VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); - return Builder.CreateCall(F, {V0, V1, Ops[1]}); + return Builder.CreateCall(F, Ops); } case SVE::BI__builtin_sve_svset_neonq_s8: @@ -9952,35 +10476,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - // Find out if any arguments are required to be integer constant expressions. - unsigned ICEArguments = 0; - ASTContext::GetBuiltinTypeError Error; - getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); - assert(Error == ASTContext::GE_None && "Should not codegen an error"); - - llvm::Type *Ty = ConvertType(E->getType()); - llvm::SmallVector<Value *, 4> Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - std::optional<llvm::APSInt> Result = - E->getArg(i)->getIntegerConstantExpr(getContext()); - assert(Result && "Expected argument to be a constant"); - - // Immediates for SVE llvm intrinsics are always 32bit. We can safely - // truncate because the immediate has been range checked and no valid - // immediate requires more than a handful of bits. - *Result = Result->extOrTrunc(32); - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); - } - } - auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, AArch64SMEIntrinsicsProvenSorted); + + llvm::SmallVector<Value *, 4> Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) @@ -9989,23 +10491,28 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, BuiltinID == SME::BI__builtin_sme_svzero_za) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || - BuiltinID == SME::BI__builtin_sme_svstr_vnum_za) + BuiltinID == SME::BI__builtin_sme_svstr_vnum_za || + BuiltinID == SME::BI__builtin_sme_svldr_za || + BuiltinID == SME::BI__builtin_sme_svstr_za) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (Builtin->LLVMIntrinsic != 0) { - // Predicates must match the main datatype. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) - if (PredTy->getElementType()->isIntegerTy(1)) - Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); - Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, - getSVEOverloadTypes(TypeFlags, Ty, Ops)); - Value *Call = Builder.CreateCall(F, Ops); - return Call; - } + // Should not happen! + if (Builtin->LLVMIntrinsic == 0) + return nullptr; - /// Should not happen - return nullptr; + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) + Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); + + Function *F = + TypeFlags.isOverloadNone() + ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) + : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); + Value *Call = Builder.CreateCall(F, Ops); + + return FormSVEBuiltinResult(Call); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, @@ -10210,8 +10717,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_ldxp); Value *LdPtr = EmitScalarExpr(E->getArg(0)); - Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), - "ldxp"); + Value *Val = Builder.CreateCall(F, LdPtr, "ldxp"); Value *Val0 = Builder.CreateExtractValue(Val, 1); Value *Val1 = Builder.CreateExtractValue(Val, 0); @@ -10231,13 +10737,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); - llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr, - PtrTy); + UnqualPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); @@ -10268,8 +10773,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); - Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), - Int8PtrTy); + Value *StPtr = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); } @@ -10578,8 +11082,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID == AArch64::BI__writex18word || BuiltinID == AArch64::BI__writex18dword || BuiltinID == AArch64::BI__writex18qword) { - llvm::Type *IntTy = ConvertType(E->getArg(1)->getType()); - // Read x18 as i8* LLVMContext &Context = CGM.getLLVMContext(); llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; @@ -10588,12 +11090,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); llvm::Value *X18 = Builder.CreateCall(F, Metadata); - X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); // Store val at x18 + offset Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); - Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); Value *Val = EmitScalarExpr(E->getArg(1)); StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); return Store; @@ -10613,16 +11114,79 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); llvm::Value *X18 = Builder.CreateCall(F, Metadata); - X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); // Load x18 + offset Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); - Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); return Load; } + if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || + BuiltinID == AArch64::BI_CopyFloatFromInt32 || + BuiltinID == AArch64::BI_CopyInt32FromFloat || + BuiltinID == AArch64::BI_CopyInt64FromDouble) { + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *RetTy = ConvertType(E->getType()); + return Builder.CreateBitCast(Arg, RetTy); + } + + if (BuiltinID == AArch64::BI_CountLeadingOnes || + BuiltinID == AArch64::BI_CountLeadingOnes64 || + BuiltinID == AArch64::BI_CountLeadingZeros || + BuiltinID == AArch64::BI_CountLeadingZeros64) { + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Arg->getType(); + + if (BuiltinID == AArch64::BI_CountLeadingOnes || + BuiltinID == AArch64::BI_CountLeadingOnes64) + Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType)); + + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + + if (BuiltinID == AArch64::BI_CountLeadingOnes64 || + BuiltinID == AArch64::BI_CountLeadingZeros64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + + if (BuiltinID == AArch64::BI_CountLeadingSigns || + BuiltinID == AArch64::BI_CountLeadingSigns64) { + Value *Arg = EmitScalarExpr(E->getArg(0)); + + Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns) + ? CGM.getIntrinsic(Intrinsic::aarch64_cls) + : CGM.getIntrinsic(Intrinsic::aarch64_cls64); + + Value *Result = Builder.CreateCall(F, Arg, "cls"); + if (BuiltinID == AArch64::BI_CountLeadingSigns64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + + if (BuiltinID == AArch64::BI_CountOneBits || + BuiltinID == AArch64::BI_CountOneBits64) { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = ArgValue->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + + Value *Result = Builder.CreateCall(F, ArgValue); + if (BuiltinID == AArch64::BI_CountOneBits64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + + if (BuiltinID == AArch64::BI__prefetch) { + Value *Address = EmitScalarExpr(E->getArg(0)); + Value *RW = llvm::ConstantInt::get(Int32Ty, 0); + Value *Locality = ConstantInt::get(Int32Ty, 3); + Value *Data = llvm::ConstantInt::get(Int32Ty, 1); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); + return Builder.CreateCall(F, {Address, RW, Locality, Data}); + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional<MSVCIntrin> MsvcIntId = @@ -10669,15 +11233,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, continue; } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); @@ -10718,14 +11274,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vldrq_p128: { llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); - llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); - Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); + Value *Ptr = EmitScalarExpr(E->getArg(0)); return Builder.CreateAlignedLoad(Int128Ty, Ptr, CharUnits::fromQuantity(16)); } case NEON::BI__builtin_neon_vstrq_p128: { - llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); - Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); + Value *Ptr = Ops[0]; return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); } case NEON::BI__builtin_neon_vcvts_f32_u32: @@ -11360,12 +11914,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case clang::AArch64::BI_InterlockedAdd: { - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - Value *Arg1 = EmitScalarExpr(E->getArg(1)); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, Arg0, Arg1, - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, Arg1); + Address DestAddr = CheckAtomicAlignment(*this, E); + Value *Val = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = + Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Val); } } @@ -11679,25 +12233,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } case NEON::BI__builtin_neon_vrnd32x_f32: - case NEON::BI__builtin_neon_vrnd32xq_f32: { + case NEON::BI__builtin_neon_vrnd32xq_f32: + case NEON::BI__builtin_neon_vrnd32x_f64: + case NEON::BI__builtin_neon_vrnd32xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); } case NEON::BI__builtin_neon_vrnd32z_f32: - case NEON::BI__builtin_neon_vrnd32zq_f32: { + case NEON::BI__builtin_neon_vrnd32zq_f32: + case NEON::BI__builtin_neon_vrnd32z_f64: + case NEON::BI__builtin_neon_vrnd32zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); } case NEON::BI__builtin_neon_vrnd64x_f32: - case NEON::BI__builtin_neon_vrnd64xq_f32: { + case NEON::BI__builtin_neon_vrnd64xq_f32: + case NEON::BI__builtin_neon_vrnd64x_f64: + case NEON::BI__builtin_neon_vrnd64xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); } case NEON::BI__builtin_neon_vrnd64z_f32: - case NEON::BI__builtin_neon_vrnd64zq_f32: { + case NEON::BI__builtin_neon_vrnd64zq_f32: + case NEON::BI__builtin_neon_vrnd64z_f64: + case NEON::BI__builtin_neon_vrnd64zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); @@ -12243,19 +12805,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment()); } case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); @@ -12263,8 +12821,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vldap1_lane_s64: case NEON::BI__builtin_neon_vldap1q_lane_s64: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); llvm::LoadInst *LI = Builder.CreateAlignedLoad( VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); LI->setAtomic(llvm::AtomicOrdering::Acquire); @@ -12274,8 +12830,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = PoisonValue::get(Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); @@ -12286,86 +12840,56 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst1q_lane_v: Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty), - PtrOp0.getAlignment()); + return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vstl1_lane_s64: case NEON::BI__builtin_neon_vstl1q_lane_s64: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - llvm::StoreInst *SI = Builder.CreateAlignedStore( - Ops[1], Builder.CreateBitCast(Ops[0], Ty), PtrOp0.getAlignment()); + llvm::StoreInst *SI = + Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); SI->setAtomic(llvm::AtomicOrdering::Release); return SI; } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: { - llvm::Type *PTy = - llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: { - llvm::Type *PTy = - llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: { - llvm::Type *PTy = - llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[1] = Builder.CreateBitCast(Ops[1], PTy); - llvm::Type *Tys[2] = { VTy, PTy }; + llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_lane_v: @@ -12377,8 +12901,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_lane_v: @@ -12391,8 +12913,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_lane_v: @@ -12406,8 +12926,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[4] = Builder.CreateBitCast(Ops[4], Ty); Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vst2_v: @@ -12457,7 +12975,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vtrn_v: case NEON::BI__builtin_neon_vtrnq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -12476,7 +12993,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vuzp_v: case NEON::BI__builtin_neon_vuzpq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -12494,7 +13010,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; @@ -12713,9 +13228,7 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, Align Alignment) { - // Cast the pointer to right type. - Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(Ops[1]->getType())); + Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], @@ -12726,10 +13239,8 @@ static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, Align Alignment) { - // Cast the pointer to right type. llvm::Type *Ty = Ops[1]->getType(); - Value *Ptr = - CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); + Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements()); @@ -12740,11 +13251,7 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); - llvm::Type *PtrTy = ResultTy->getElementType(); - - // Cast the pointer to element type. - Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(PtrTy)); + Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements()); @@ -12770,11 +13277,7 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF, static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); - llvm::Type *PtrTy = ResultTy->getElementType(); - - // Cast the pointer to element type. - Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], - llvm::PointerType::getUnqual(PtrTy)); + Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); @@ -13316,13 +13819,10 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs)); } -llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { - uint32_t Features1 = Lo_32(FeaturesMask); - uint32_t Features2 = Hi_32(FeaturesMask); - +llvm::Value * +CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) { Value *Result = Builder.getTrue(); - - if (Features1 != 0) { + if (FeatureMask[0] != 0) { // Matching the struct layout from the compiler-rt/libgcc structure that is // filled in: // unsigned int __cpu_vendor; @@ -13345,22 +13845,26 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. - Value *Mask = Builder.getInt32(Features1); + Value *Mask = Builder.getInt32(FeatureMask[0]); Value *Bitset = Builder.CreateAnd(Features, Mask); Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); Result = Builder.CreateAnd(Result, Cmp); } - if (Features2 != 0) { - llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty, - "__cpu_features2"); - cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); - - Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2, - CharUnits::fromQuantity(4)); - + llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3); + llvm::Constant *CpuFeatures2 = + CGM.CreateRuntimeVariable(ATy, "__cpu_features2"); + cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); + for (int i = 1; i != 4; ++i) { + const uint32_t M = FeatureMask[i]; + if (!M) + continue; + Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; + Value *Features = Builder.CreateAlignedLoad( + Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs), + CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. - Value *Mask = Builder.getInt32(Features2); + Value *Mask = Builder.getInt32(M); Value *Bitset = Builder.CreateAnd(Features, Mask); Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); Result = Builder.CreateAnd(Result, Cmp); @@ -13372,7 +13876,7 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { Value *CodeGenFunction::EmitAArch64CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::FunctionCallee Func = - CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver"); + CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver"); cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); cast<llvm::GlobalValue>(Func.getCallee()) ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); @@ -13441,16 +13945,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } // These exist so that the builtin that takes an immediate can be bounds @@ -13585,13 +14080,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Builder.CreateStore(Ops[0], Tmp); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), - Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); + Tmp.getPointer()); } case X86::BI_mm_getcsr: case X86::BI__builtin_ia32_stmxcsr: { Address Tmp = CreateMemTemp(E->getType()); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), - Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); + Tmp.getPointer()); return Builder.CreateLoad(Tmp, "stmxcsr"); } case X86::BI__builtin_ia32_xsave: @@ -14629,12 +15124,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, BuiltinID == X86::BI__builtin_ia32_movntss) Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); - // Convert the type of the pointer to a pointer to the stored type. - Value *BC = Builder.CreateBitCast( - Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); - // Unaligned nontemporal store of the scalar value. - StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); + StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr); SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); SI->setAlignment(llvm::Align(1)); return SI; @@ -15443,6 +15934,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmppd256: IID = Intrinsic::x86_avx_cmp_pd_256; break; + case X86::BI__builtin_ia32_cmpph128_mask: + IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128; + break; + case X86::BI__builtin_ia32_cmpph256_mask: + IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256; + break; + case X86::BI__builtin_ia32_cmpph512_mask: + IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512; + break; case X86::BI__builtin_ia32_cmpps512_mask: IID = Intrinsic::x86_avx512_mask_cmp_ps_512; break; @@ -15696,8 +16196,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readfsdword: case X86::BI__readfsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = - Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257)); + Value *Ptr = Builder.CreateIntToPtr( + Ops[0], llvm::PointerType::get(getLLVMContext(), 257)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); @@ -15708,8 +16208,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readgsdword: case X86::BI__readgsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = - Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256)); + Value *Ptr = Builder.CreateIntToPtr( + Ops[0], llvm::PointerType::get(getLLVMContext(), 256)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); @@ -15723,8 +16223,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (int i = 0; i < 3; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); - Ptr = Builder.CreateBitCast( - Ptr, llvm::PointerType::getUnqual(Extract->getType())); Builder.CreateAlignedStore(Extract, Ptr, Align(1)); } @@ -15739,8 +16237,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (int i = 0; i < 4; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); - Ptr = Builder.CreateBitCast( - Ptr, llvm::PointerType::getUnqual(Extract->getType())); Builder.CreateAlignedStore(Extract, Ptr, Align(1)); } @@ -15941,11 +16437,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, SmallVector<Value *, 2> Ops; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); - if(BuiltinID == PPC::BI__builtin_vsx_lxvl || - BuiltinID == PPC::BI__builtin_vsx_lxvll){ - Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); - }else { - Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); + if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || + BuiltinID == PPC::BI__builtin_vsx_lxvll)) { Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); Ops.pop_back(); } @@ -16013,11 +16506,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops.push_back(EmitScalarExpr(E->getArg(2))); - if(BuiltinID == PPC::BI__builtin_vsx_stxvl || - BuiltinID == PPC::BI__builtin_vsx_stxvll ){ - Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); - }else { - Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); + if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || + BuiltinID == PPC::BI__builtin_vsx_stxvll)) { Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); Ops.pop_back(); } @@ -16553,7 +17043,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } case PPC::BI__builtin_ppc_load2r: { Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); - Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } @@ -16778,7 +17268,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_sthcx: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); - Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); return Builder.CreateCall(F, {Op0, Op1}); } @@ -16852,10 +17342,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, BuiltinID == PPC::BI__builtin_mma_stxvp) { if (BuiltinID == PPC::BI__builtin_vsx_lxvp || BuiltinID == PPC::BI__builtin_mma_lxvp) { - Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); } else { - Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); } Ops.pop_back(); @@ -17062,6 +17550,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } + case PPC::BI__builtin_ppc_set_fpscr_rn: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), + {EmitScalarExpr(E->getArg(0))}); + case PPC::BI__builtin_ppc_mffs: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } } @@ -17094,24 +17587,66 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { } // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. +/// Emit code based on Code Object ABI version. +/// COV_4 : Emit code to use dispatch ptr +/// COV_5 : Emit code to use implicitarg ptr +/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" +/// and use its value for COV_4 or COV_5 approach. It is used for +/// compiling device libraries in an ABI-agnostic way. +/// +/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by +/// clang during compilation of user code. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { - bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion == - clang::TargetOptions::COV_5; - Constant *Offset; - Value *DP; - if (IsCOV_5) { + llvm::LoadInst *LD; + + auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; + + if (Cov == CodeObjectVersionKind::COV_None) { + StringRef Name = "__oclc_ABI_version"; + auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); + if (!ABIVersionC) + ABIVersionC = new llvm::GlobalVariable( + CGF.CGM.getModule(), CGF.Int32Ty, false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); + + // This load will be eliminated by the IPSCCP because it is constant + // weak_odr without externally_initialized. Either changing it to weak or + // adding externally_initialized will keep the load. + Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC, + CGF.CGM.getIntAlign()); + + Value *IsCOV5 = CGF.Builder.CreateICmpSGE( + ABIVersion, + llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5)); + // Indexing the implicit kernarg segment. - Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2); - DP = EmitAMDGPUImplicitArgPtr(CGF); - } else { + Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32( + CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); + // Indexing the HSA kernel_dispatch_packet struct. - Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2); - DP = EmitAMDGPUDispatchPtr(CGF); + Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32( + CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); + + auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP); + LD = CGF.Builder.CreateLoad( + Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); + } else { + Value *GEP = nullptr; + if (Cov == CodeObjectVersionKind::COV_5) { + // Indexing the implicit kernarg segment. + GEP = CGF.Builder.CreateConstGEP1_32( + CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); + } else { + // Indexing the HSA kernel_dispatch_packet struct. + GEP = CGF.Builder.CreateConstGEP1_32( + CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); + } + LD = CGF.Builder.CreateLoad( + Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); } - auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); - auto *LD = CGF.Builder.CreateLoad( - Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); @@ -17175,6 +17710,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = getLLVMContext().getOrInsertSyncScopeID(scp); } +llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, + unsigned Idx, + const CallExpr *E) { + llvm::Value *Arg = nullptr; + if ((ICEArguments & (1 << Idx)) == 0) { + Arg = EmitScalarExpr(E->getArg(Idx)); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(Idx)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); + } + return Arg; +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -17225,8 +17777,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; - for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); + // Find out if any arguments are required to be integer constant + // expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); + } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); @@ -17271,14 +17830,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: - case AMDGPU::BI__builtin_amdgcn_ldexpf: - case AMDGPU::BI__builtin_amdgcn_ldexph: { + case AMDGPU::BI__builtin_amdgcn_ldexpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); return Builder.CreateCall(F, {Src0, Src1}); } + case AMDGPU::BI__builtin_amdgcn_ldexph: { + // The raw instruction has a different behavior for out of bounds exponent + // values (implicit truncation instead of saturate to short_min/short_max). + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty}); + return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)}); + } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: @@ -17479,21 +18046,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } - case AMDGPU::BI__builtin_amdgcn_read_exec: { - CallInst *CI = cast<CallInst>( - EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec")); - CI->setConvergent(); - return CI; - } + case AMDGPU::BI__builtin_amdgcn_read_exec: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { - StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? - "exec_lo" : "exec_hi"; - CallInst *CI = cast<CallInst>( - EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName)); - CI->setConvergent(); - return CI; - } + return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: @@ -17536,9 +18094,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: @@ -17576,6 +18138,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgForMatchingRetType = 2; BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; + break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: ArgForMatchingRetType = 4; @@ -17660,7 +18232,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, break; } - Value *Ptr = EmitScalarExpr(E->getArg(0)); + Address Ptr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), @@ -17778,6 +18350,32 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Undef}); } + case SystemZ::BI__builtin_s390_verllb: + case SystemZ::BI__builtin_s390_verllh: + case SystemZ::BI__builtin_s390_verllf: + case SystemZ::BI__builtin_s390_verllg: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); + // Splat scalar rotate amount to vector type. + unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements(); + Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false); + Amt = Builder.CreateVectorSplat(NumElts, Amt); + Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); + return Builder.CreateCall(F, { Src, Src, Amt }); + } + + case SystemZ::BI__builtin_s390_verllvb: + case SystemZ::BI__builtin_s390_verllvh: + case SystemZ::BI__builtin_s390_verllvf: + case SystemZ::BI__builtin_s390_verllvg: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); + Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); + return Builder.CreateCall(F, { Src, Src, Amt }); + } + case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); @@ -18523,9 +19121,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_add_gen_f: case NVPTX::BI__nvvm_atom_add_gen_d: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); + Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, + + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val, AtomicOrdering::SequentiallyConsistent); } @@ -19256,44 +19855,36 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the /// llvm.ptrmask intrinsic (with a GEP before in the align_up case). -/// TODO: actually use ptrmask once most optimization passes know about it. RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { BuiltinAlignArgs Args(E, *this); - llvm::Value *SrcAddr = Args.Src; - if (Args.Src->getType()->isPointerTy()) - SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); - llvm::Value *SrcForMask = SrcAddr; + llvm::Value *SrcForMask = Args.Src; if (AlignUp) { // When aligning up we have to first add the mask to ensure we go over the // next alignment value and then align down to the next valid multiple. // By adding the mask, we ensure that align_up on an already aligned // value will not change the value. - SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); + if (Args.Src->getType()->isPointerTy()) { + if (getLangOpts().isSignedOverflowDefined()) + SrcForMask = + Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary"); + else + SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask, + /*SignedIndices=*/true, + /*isSubtraction=*/false, + E->getExprLoc(), "over_boundary"); + } else { + SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); + } } // Invert the mask to only clear the lower bits. llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); - llvm::Value *Result = - Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); + llvm::Value *Result = nullptr; if (Args.Src->getType()->isPointerTy()) { - /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well. - // Result = Builder.CreateIntrinsic( - // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType}, - // {SrcForMask, NegatedMask}, nullptr, "aligned_result"); - Result->setName("aligned_intptr"); - llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); - // The result must point to the same underlying allocation. This means we - // can use an inbounds GEP to enable better optimization. - if (getLangOpts().isSignedOverflowDefined()) - Result = - Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result"); - else - Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference, - /*SignedIndices=*/true, - /*isSubtraction=*/!AlignUp, - E->getExprLoc(), "aligned_result"); - // Emit an alignment assumption to ensure that the new alignment is - // propagated to loads/stores, etc. - emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); + Result = Builder.CreateIntrinsic( + Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, + {SrcForMask, InvertedMask}, nullptr, "aligned_result"); + } else { + Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); } assert(Result->getType() == Args.SrcType); return RValue::get(Result); @@ -19997,8 +20588,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { // The base pointer is passed by address, so it needs to be loaded. Address A = EmitPointerWithAlignment(E->getArg(0)); - Address BP = Address(Builder.CreateBitCast( - A.getPointer(), Int8PtrPtrTy), Int8PtrTy, A.getAlignment()); + Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment()); llvm::Value *Base = Builder.CreateLoad(BP); // The treatment of both loads and stores is the same: the arguments for // the builtin are the same as the arguments for the intrinsic. @@ -20033,15 +20623,13 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, // The intrinsic generates one result, which is the new value for the base // pointer. It needs to be returned. The result of the load instruction is // passed to intrinsic by address, so the value needs to be stored. - llvm::Value *BaseAddress = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0)); // Expressions like &(*pt++) will be incremented per evaluation. // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression // per call. Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy), - Int8Ty, DestAddr.getAlignment()); + DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment()); llvm::Value *DestAddress = DestAddr.getPointer(); // Operands are Base, Dest, Modifier. @@ -20214,17 +20802,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, Ops.push_back(AggValue); continue; } - - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -20362,11 +20940,13 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, // Zihintntl case RISCV::BI__builtin_riscv_ntl_load: { llvm::Type *ResTy = ConvertType(E->getType()); - ConstantInt *Mode = cast<ConstantInt>(Ops[1]); + unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL + if (Ops.size() == 2) + DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue(); llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( getLLVMContext(), - llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue()))); + llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); llvm::MDNode *NontemporalNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); @@ -20388,18 +20968,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, return Load; } case RISCV::BI__builtin_riscv_ntl_store: { - ConstantInt *Mode = cast<ConstantInt>(Ops[2]); + unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL + if (Ops.size() == 3) + DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue(); llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( getLLVMContext(), - llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue()))); + llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); llvm::MDNode *NontemporalNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); - Value *BC = Builder.CreateBitCast( - Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()), "cast"); - - StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], BC); + StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), RISCVDomainNode); @@ -20418,129 +20997,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } - -Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - SmallVector<Value *, 4> Ops; - - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - - Intrinsic::ID ID = Intrinsic::not_intrinsic; - - switch (BuiltinID) { - default: - llvm_unreachable("unexpected builtin ID."); - case LoongArch::BI__builtin_loongarch_cacop_d: - ID = Intrinsic::loongarch_cacop_d; - break; - case LoongArch::BI__builtin_loongarch_cacop_w: - ID = Intrinsic::loongarch_cacop_w; - break; - case LoongArch::BI__builtin_loongarch_dbar: - ID = Intrinsic::loongarch_dbar; - break; - case LoongArch::BI__builtin_loongarch_break: - ID = Intrinsic::loongarch_break; - break; - case LoongArch::BI__builtin_loongarch_ibar: - ID = Intrinsic::loongarch_ibar; - break; - case LoongArch::BI__builtin_loongarch_movfcsr2gr: - ID = Intrinsic::loongarch_movfcsr2gr; - break; - case LoongArch::BI__builtin_loongarch_movgr2fcsr: - ID = Intrinsic::loongarch_movgr2fcsr; - break; - case LoongArch::BI__builtin_loongarch_syscall: - ID = Intrinsic::loongarch_syscall; - break; - case LoongArch::BI__builtin_loongarch_crc_w_b_w: - ID = Intrinsic::loongarch_crc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_h_w: - ID = Intrinsic::loongarch_crc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_w_w: - ID = Intrinsic::loongarch_crc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_d_w: - ID = Intrinsic::loongarch_crc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_b_w: - ID = Intrinsic::loongarch_crcc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_h_w: - ID = Intrinsic::loongarch_crcc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_w_w: - ID = Intrinsic::loongarch_crcc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_d_w: - ID = Intrinsic::loongarch_crcc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_w: - ID = Intrinsic::loongarch_csrrd_w; - break; - case LoongArch::BI__builtin_loongarch_csrwr_w: - ID = Intrinsic::loongarch_csrwr_w; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_w: - ID = Intrinsic::loongarch_csrxchg_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_d: - ID = Intrinsic::loongarch_csrrd_d; - break; - case LoongArch::BI__builtin_loongarch_csrwr_d: - ID = Intrinsic::loongarch_csrwr_d; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_d: - ID = Intrinsic::loongarch_csrxchg_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_b: - ID = Intrinsic::loongarch_iocsrrd_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_h: - ID = Intrinsic::loongarch_iocsrrd_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_w: - ID = Intrinsic::loongarch_iocsrrd_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_d: - ID = Intrinsic::loongarch_iocsrrd_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_b: - ID = Intrinsic::loongarch_iocsrwr_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_h: - ID = Intrinsic::loongarch_iocsrwr_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_w: - ID = Intrinsic::loongarch_iocsrwr_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_d: - ID = Intrinsic::loongarch_iocsrwr_d; - break; - case LoongArch::BI__builtin_loongarch_cpucfg: - ID = Intrinsic::loongarch_cpucfg; - break; - case LoongArch::BI__builtin_loongarch_asrtle_d: - ID = Intrinsic::loongarch_asrtle_d; - break; - case LoongArch::BI__builtin_loongarch_asrtgt_d: - ID = Intrinsic::loongarch_asrtgt_d; - break; - case LoongArch::BI__builtin_loongarch_lddir_d: - ID = Intrinsic::loongarch_lddir_d; - break; - case LoongArch::BI__builtin_loongarch_ldpte_d: - ID = Intrinsic::loongarch_ldpte_d; - break; - // TODO: Support more Intrinsics. - } - - assert(ID != Intrinsic::not_intrinsic); - - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops); -} diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 08769c98dc29..520b0c4f1176 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/Cuda.h" #include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" +#include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -226,18 +227,15 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) TheModule(CGM.getModule()), RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), DeviceMC(InitDeviceMC(CGM)) { - CodeGen::CodeGenTypes &Types = CGM.getTypes(); - ASTContext &Ctx = CGM.getContext(); - IntTy = CGM.IntTy; SizeTy = CGM.SizeTy; VoidTy = CGM.VoidTy; Zeros[0] = llvm::ConstantInt::get(SizeTy, 0); Zeros[1] = Zeros[0]; - CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); - VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); - VoidPtrPtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); + CharPtrTy = CGM.UnqualPtrTy; + VoidPtrTy = CGM.UnqualPtrTy; + VoidPtrPtrTy = CGM.UnqualPtrTy; } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { @@ -558,7 +556,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy), + KernelHandles[I.Kernel->getName()], KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), @@ -633,8 +631,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { replaceManagedVar(Var, ManagedVar); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(ManagedVar, VoidPtrTy), - Builder.CreateBitCast(Var, VoidPtrTy), + ManagedVar, + Var, VarName, llvm::ConstantInt::get(VarSizeTy, VarSize), llvm::ConstantInt::get(IntTy, Var->getAlignment())}; @@ -643,7 +641,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { } else { llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(Var, VoidPtrTy), + Var, VarName, VarName, llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()), @@ -657,15 +655,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { case DeviceVarFlags::Surface: Builder.CreateCall( RegisterSurf, - {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, - VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), + {&GpuBinaryHandlePtr, Var, VarName, VarName, + llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())}); break; case DeviceVarFlags::Texture: Builder.CreateCall( RegisterTex, - {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, - VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), + {&GpuBinaryHandlePtr, Var, VarName, VarName, + llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()), llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())}); break; @@ -862,9 +860,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { { CtorBuilder.SetInsertPoint(IfBlock); // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper); - llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( - RegisterFatbinFunc, - CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy)); + llvm::CallInst *RegisterFatbinCall = + CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper); CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr); CtorBuilder.CreateBr(ExitBlock); } @@ -880,9 +877,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Register binary with CUDA runtime. This is substantially different in // default mode vs. separate compilation! // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper); - llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( - RegisterFatbinFunc, - CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy)); + llvm::CallInst *RegisterFatbinCall = + CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper); GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle"); @@ -923,9 +919,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName); assert(RegisterGlobalsFunc && "Expecting at least dummy function!"); - llvm::Value *Args[] = {RegisterGlobalsFunc, - CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy), - ModuleIDConstant, + llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant, makeDummyFunction(getCallbackFnTy())}; CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args); } @@ -1132,33 +1126,45 @@ void CGNVCUDARuntime::transformManagedVars() { // registered. The linker will provide a pointer to this section so we can // register the symbols with the linked device image. void CGNVCUDARuntime::createOffloadingEntries() { - llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); - OMPBuilder.initialize(); - StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" : "cuda_offloading_entries"; + llvm::Module &M = CGM.getModule(); for (KernelInfo &I : EmittedKernels) - OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()], - getDeviceSideName(cast<NamedDecl>(I.D)), 0, - DeviceVarFlags::OffloadGlobalEntry, Section); + llvm::offloading::emitOffloadingEntry( + M, KernelHandles[I.Kernel->getName()], + getDeviceSideName(cast<NamedDecl>(I.D)), /*Flags=*/0, /*Data=*/0, + llvm::offloading::OffloadGlobalEntry, Section); for (VarInfo &I : DeviceVars) { uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(I.Var->getValueType()); + int32_t Flags = + (I.Flags.isExtern() + ? static_cast<int32_t>(llvm::offloading::OffloadGlobalExtern) + : 0) | + (I.Flags.isConstant() + ? static_cast<int32_t>(llvm::offloading::OffloadGlobalConstant) + : 0) | + (I.Flags.isNormalized() + ? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized) + : 0); if (I.Flags.getKind() == DeviceVarFlags::Variable) { - OMPBuilder.emitOffloadingEntry( - I.Var, getDeviceSideName(I.D), VarSize, - I.Flags.isManaged() ? DeviceVarFlags::OffloadGlobalManagedEntry - : DeviceVarFlags::OffloadGlobalEntry, - Section); + llvm::offloading::emitOffloadingEntry( + M, I.Var, getDeviceSideName(I.D), VarSize, + (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry + : llvm::offloading::OffloadGlobalEntry) | + Flags, + /*Data=*/0, Section); } else if (I.Flags.getKind() == DeviceVarFlags::Surface) { - OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize, - DeviceVarFlags::OffloadGlobalSurfaceEntry, - Section); + llvm::offloading::emitOffloadingEntry( + M, I.Var, getDeviceSideName(I.D), VarSize, + llvm::offloading::OffloadGlobalSurfaceEntry | Flags, + I.Flags.getSurfTexType(), Section); } else if (I.Flags.getKind() == DeviceVarFlags::Texture) { - OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize, - DeviceVarFlags::OffloadGlobalTextureEntry, - Section); + llvm::offloading::emitOffloadingEntry( + M, I.Var, getDeviceSideName(I.D), VarSize, + llvm::offloading::OffloadGlobalTextureEntry | Flags, + I.Flags.getSurfTexType(), Section); } } } @@ -1234,7 +1240,10 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setAlignment(CGM.getPointerAlign().getAsAlign()); Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); - CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); + auto *FD = cast<FunctionDecl>(GD.getDecl()); + auto *FT = FD->getPrimaryTemplate(); + if (!FT || FT->isThisDeclarationADefinition()) + CGM.maybeSetTrivialComdat(*FD, *Var); KernelHandles[F->getName()] = Var; KernelStubs[Var] = F; return Var; diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h index 9a9c6d26cc63..c7af8f1cf0fe 100644 --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -17,6 +17,7 @@ #include "clang/AST/GlobalDecl.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/GlobalValue.h" namespace llvm { @@ -52,19 +53,6 @@ public: Texture, // Builtin texture }; - /// The kind flag for an offloading entry. - enum OffloadEntryKindFlag : uint32_t { - /// Mark the entry as a global entry. This indicates the presense of a - /// kernel if the size field is zero and a variable otherwise. - OffloadGlobalEntry = 0x0, - /// Mark the entry as a managed global variable. - OffloadGlobalManagedEntry = 0x1, - /// Mark the entry as a surface variable. - OffloadGlobalSurfaceEntry = 0x2, - /// Mark the entry as a texture variable. - OffloadGlobalTextureEntry = 0x3, - }; - private: unsigned Kind : 2; unsigned Extern : 1; diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 7b77dd7875bc..a8bf57a277e9 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -120,10 +120,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) { // FIXME: I'm not entirely sure I like using a fake decl just for code // generation. Maybe we can come up with a better way? - auto *ThisDecl = ImplicitParamDecl::Create( - CGM.getContext(), nullptr, MD->getLocation(), - &CGM.getContext().Idents.get("this"), MD->getThisType(), - ImplicitParamDecl::CXXThis); + auto *ThisDecl = + ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(), + &CGM.getContext().Idents.get("this"), + MD->getThisType(), ImplicitParamKind::CXXThis); params.push_back(ThisDecl); CGF.CXXABIThisDecl = ThisDecl; @@ -312,8 +312,7 @@ void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { // Delegate back to CGM by default. - return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*IsConstantVariable=*/false); + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage); } bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bd272e016e92..a24aeea7ae32 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -13,6 +13,7 @@ #include "CGCall.h" #include "ABIInfo.h" +#include "ABIInfoImpl.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" @@ -71,6 +72,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_PreserveAll: return llvm::CallingConv::PreserveAll; case CC_Swift: return llvm::CallingConv::Swift; case CC_SwiftAsync: return llvm::CallingConv::SwiftTail; + case CC_M68kRTD: return llvm::CallingConv::M68k_RTD; } } @@ -112,8 +114,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) { // When translating an unprototyped function type, always use a // variadic type. return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), - /*instanceMethod=*/false, - /*chainCall=*/false, std::nullopt, + FnInfoOpts::None, std::nullopt, FTNP->getExtInfo(), {}, RequiredArgs(0)); } @@ -189,10 +190,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); - return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, - /*chainCall=*/false, prefix, - FTP->getExtInfo(), paramInfos, - Required); + FnInfoOpts opts = + instanceMethod ? FnInfoOpts::IsInstanceMethod : FnInfoOpts::None; + return CGT.arrangeLLVMFunctionInfo(resultType, opts, prefix, + FTP->getExtInfo(), paramInfos, Required); } /// Arrange the argument and result information for a value of the @@ -252,6 +253,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr<PreserveAllAttr>()) return CC_PreserveAll; + if (D->hasAttr<M68kRTDAttr>()) + return CC_M68kRTD; + return CC_C; } @@ -271,7 +275,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, argTypes.push_back(DeriveThisType(RD, MD)); return ::arrangeLLVMFunctionInfo( - *this, true, argTypes, + *this, /*instanceMethod=*/true, argTypes, FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>()); } @@ -298,7 +302,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { setCUDAKernelCallingConvention(FT, CGM, MD); auto prototype = FT.getAs<FunctionProtoType>(); - if (MD->isInstance()) { + if (MD->isImplicitObjectMemberFunction()) { // The abstract case is perfectly fine. const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD); return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD); @@ -363,9 +367,8 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { : TheCXXABI.hasMostDerivedReturn(GD) ? CGM.getContext().VoidPtrTy : Context.VoidTy; - return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, extInfo, - paramInfos, required); + return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod, + argTypes, extInfo, paramInfos, required); } static SmallVector<CanQualType, 16> @@ -439,9 +442,9 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs, ArgTypes.size()); } - return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, - /*chainCall=*/false, ArgTypes, Info, - ParamInfos, Required); + + return arrangeLLVMFunctionInfo(ResultType, FnInfoOpts::IsInstanceMethod, + ArgTypes, Info, ParamInfos, Required); } /// Arrange the argument and result information for the declaration or @@ -449,7 +452,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, const CGFunctionInfo & CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) - if (MD->isInstance()) + if (MD->isImplicitObjectMemberFunction()) return arrangeCXXMethodDeclaration(MD); CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified(); @@ -460,10 +463,9 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { // When declaring a function without a prototype, always use a // non-variadic type. if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) { - return arrangeLLVMFunctionInfo( - noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {}, - RequiredArgs::All); + return arrangeLLVMFunctionInfo(noProto->getReturnType(), FnInfoOpts::None, + std::nullopt, noProto->getExtInfo(), {}, + RequiredArgs::All); } return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>()); @@ -512,9 +514,9 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, RequiredArgs required = (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All); - return arrangeLLVMFunctionInfo( - GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, extParamInfos, required); + return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), + FnInfoOpts::None, argTys, einfo, extParamInfos, + required); } const CGFunctionInfo & @@ -523,9 +525,8 @@ CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType, auto argTypes = getArgTypesForCall(Context, args); FunctionType::ExtInfo einfo; - return arrangeLLVMFunctionInfo( - GetReturnType(returnType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(returnType), FnInfoOpts::None, + argTypes, einfo, {}, RequiredArgs::All); } const CGFunctionInfo & @@ -550,8 +551,7 @@ CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)}; - return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, - /*chainCall=*/false, ArgTys, + return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::None, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); } @@ -570,9 +570,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, ArgTys.push_back(Context.IntTy); CallingConv CC = Context.getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); - return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true, - /*chainCall=*/false, ArgTys, - FunctionType::ExtInfo(CC), {}, + return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::IsInstanceMethod, + ArgTys, FunctionType::ExtInfo(CC), {}, RequiredArgs::All); } @@ -616,10 +615,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, SmallVector<CanQualType, 16> argTypes; for (const auto &arg : args) argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty)); + FnInfoOpts opts = chainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None; return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()), - /*instanceMethod=*/false, chainCall, - argTypes, fnType->getExtInfo(), paramInfos, - required); + opts, argTypes, fnType->getExtInfo(), + paramInfos, required); } /// Figure out the rules for calling a function with the given formal @@ -650,8 +649,8 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, auto argTypes = getArgTypesForDeclaration(Context, params); return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), - /*instanceMethod*/ false, /*chainCall*/ false, - argTypes, proto->getExtInfo(), paramInfos, + FnInfoOpts::None, argTypes, + proto->getExtInfo(), paramInfos, RequiredArgs::forPrototypePlus(proto, 1)); } @@ -662,10 +661,9 @@ CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType, SmallVector<CanQualType, 16> argTypes; for (const auto &Arg : args) argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); - return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, FunctionType::ExtInfo(), - /*paramInfos=*/ {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, + argTypes, FunctionType::ExtInfo(), + /*paramInfos=*/{}, RequiredArgs::All); } const CGFunctionInfo & @@ -673,17 +671,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args) { auto argTypes = getArgTypesForDeclaration(Context, args); - return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false, - argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, + argTypes, FunctionType::ExtInfo(), {}, + RequiredArgs::All); } const CGFunctionInfo & CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, ArrayRef<CanQualType> argTypes) { - return arrangeLLVMFunctionInfo( - resultType, /*instanceMethod=*/false, /*chainCall=*/false, - argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::None, argTypes, + FunctionType::ExtInfo(), {}, + RequiredArgs::All); } /// Arrange a call to a C++ method, passing the given arguments. @@ -706,15 +704,15 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, auto argTypes = getArgTypesForCall(Context, args); FunctionType::ExtInfo info = proto->getExtInfo(); - return arrangeLLVMFunctionInfo( - GetReturnType(proto->getReturnType()), /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, info, paramInfos, required); + return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), + FnInfoOpts::IsInstanceMethod, argTypes, info, + paramInfos, required); } const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { - return arrangeLLVMFunctionInfo( - getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(getContext().VoidTy, FnInfoOpts::None, + std::nullopt, FunctionType::ExtInfo(), {}, + RequiredArgs::All); } const CGFunctionInfo & @@ -734,12 +732,15 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, auto argTypes = getArgTypesForCall(Context, args); assert(signature.getRequiredArgs().allowsOptionalArgs()); - return arrangeLLVMFunctionInfo(signature.getReturnType(), - signature.isInstanceMethod(), - signature.isChainCall(), - argTypes, - signature.getExtInfo(), - paramInfos, + FnInfoOpts opts = FnInfoOpts::None; + if (signature.isInstanceMethod()) + opts |= FnInfoOpts::IsInstanceMethod; + if (signature.isChainCall()) + opts |= FnInfoOpts::IsChainCall; + if (signature.isDelegateCall()) + opts |= FnInfoOpts::IsDelegateCall; + return arrangeLLVMFunctionInfo(signature.getReturnType(), opts, argTypes, + signature.getExtInfo(), paramInfos, signature.getRequiredArgs()); } @@ -752,21 +753,24 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI); /// Arrange the argument and result information for an abstract value /// of a given function type. This is the method which all of the /// above functions ultimately defer to. -const CGFunctionInfo & -CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, - bool instanceMethod, - bool chainCall, - ArrayRef<CanQualType> argTypes, - FunctionType::ExtInfo info, - ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, - RequiredArgs required) { +const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo( + CanQualType resultType, FnInfoOpts opts, ArrayRef<CanQualType> argTypes, + FunctionType::ExtInfo info, + ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, + RequiredArgs required) { assert(llvm::all_of(argTypes, [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; - CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos, - required, resultType, argTypes); + bool isInstanceMethod = + (opts & FnInfoOpts::IsInstanceMethod) == FnInfoOpts::IsInstanceMethod; + bool isChainCall = + (opts & FnInfoOpts::IsChainCall) == FnInfoOpts::IsChainCall; + bool isDelegateCall = + (opts & FnInfoOpts::IsDelegateCall) == FnInfoOpts::IsDelegateCall; + CGFunctionInfo::Profile(ID, isInstanceMethod, isChainCall, isDelegateCall, + info, paramInfos, required, resultType, argTypes); void *insertPos = nullptr; CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos); @@ -776,8 +780,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); // Construct the function info. We co-allocate the ArgInfos. - FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info, - paramInfos, resultType, argTypes, required); + FI = CGFunctionInfo::create(CC, isInstanceMethod, isChainCall, isDelegateCall, + info, paramInfos, resultType, argTypes, required); FunctionInfos.InsertNode(FI, insertPos); bool inserted = FunctionsBeingProcessed.insert(FI).second; @@ -812,9 +816,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, return *FI; } -CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, - bool instanceMethod, - bool chainCall, +CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod, + bool chainCall, bool delegateCall, const FunctionType::ExtInfo &info, ArrayRef<ExtParameterInfo> paramInfos, CanQualType resultType, @@ -834,6 +837,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ASTCallingConvention = info.getCC(); FI->InstanceMethod = instanceMethod; FI->ChainCall = chainCall; + FI->DelegateCall = delegateCall; FI->CmseNSCall = info.getCmseNSCall(); FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); @@ -1376,7 +1380,7 @@ static void CreateCoercedStore(llvm::Value *Src, llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy); if (SrcPtrTy && DstPtrTy && SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) { - Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy); + Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy); CGF.Builder.CreateStore(Src, Dst, DstIsVolatile); return; } @@ -1762,6 +1766,15 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) && FPT->isNothrow()) FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_enabled"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_compatible"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + FuncAttrs.addAttribute("aarch64_pstate_za_shared"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZAPreservedMask) + FuncAttrs.addAttribute("aarch64_pstate_za_preserved"); } static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, @@ -1992,11 +2005,45 @@ static void getTrivialDefaultFunctionAttributes( } } -/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as -/// though we had emitted it ourselves. We remove any attributes on F that -/// conflict with the attributes we add here. -static void mergeDefaultFunctionDefinitionAttributes( - llvm::Function &F, const CodeGenOptions CodeGenOpts, +/// Merges `target-features` from \TargetOpts and \F, and sets the result in +/// \FuncAttr +/// * features from \F are always kept +/// * a feature from \TargetOpts is kept if itself and its opposite are absent +/// from \F +static void +overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr, + const llvm::Function &F, + const TargetOptions &TargetOpts) { + auto FFeatures = F.getFnAttribute("target-features"); + + llvm::StringSet<> MergedNames; + SmallVector<StringRef> MergedFeatures; + MergedFeatures.reserve(TargetOpts.Features.size()); + + auto AddUnmergedFeatures = [&](auto &&FeatureRange) { + for (StringRef Feature : FeatureRange) { + if (Feature.empty()) + continue; + assert(Feature[0] == '+' || Feature[0] == '-'); + StringRef Name = Feature.drop_front(1); + bool Merged = !MergedNames.insert(Name).second; + if (!Merged) + MergedFeatures.push_back(Feature); + } + }; + + if (FFeatures.isValid()) + AddUnmergedFeatures(llvm::split(FFeatures.getValueAsString(), ',')); + AddUnmergedFeatures(TargetOpts.Features); + + if (!MergedFeatures.empty()) { + llvm::sort(MergedFeatures); + FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ",")); + } +} + +void CodeGen::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, const TargetOptions &TargetOpts, bool WillInternalize) { @@ -2052,16 +2099,10 @@ static void mergeDefaultFunctionDefinitionAttributes( F.removeFnAttrs(AttrsToRemove); addDenormalModeAttrs(Merged, MergedF32, FuncAttrs); - F.addFnAttrs(FuncAttrs); -} -void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes( - llvm::Function &F, const CodeGenOptions CodeGenOpts, - const LangOptions &LangOpts, const TargetOptions &TargetOpts, - bool WillInternalize) { + overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts); - ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts, - TargetOpts, WillInternalize); + F.addFnAttrs(FuncAttrs); } void CodeGenModule::getTrivialDefaultFunctionAttributes( @@ -2084,23 +2125,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs); } -void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { - llvm::AttrBuilder FuncAttrs(F.getContext()); - getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), - /* AttrOnCallSite = */ false, FuncAttrs); - // TODO: call GetCPUAndFeaturesAttributes? - F.addFnAttrs(FuncAttrs); -} - -/// Apply default attributes to \p F, accounting for merge semantics of -/// attributes that should not overwrite existing attributes. -void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( - llvm::Function &F, bool WillInternalize) { - ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(), - getTarget().getTargetOpts(), - WillInternalize); -} - void CodeGenModule::addDefaultFunctionDefinitionAttributes( llvm::AttrBuilder &attrs) { getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false, @@ -2148,7 +2172,8 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types, const llvm::DataLayout &DL, const ABIArgInfo &AI, bool CheckCoerce = true) { llvm::Type *Ty = Types.ConvertTypeForMem(QTy); - if (AI.getKind() == ABIArgInfo::Indirect) + if (AI.getKind() == ABIArgInfo::Indirect || + AI.getKind() == ABIArgInfo::IndirectAliased) return true; if (AI.getKind() == ABIArgInfo::Extend) return true; @@ -2247,6 +2272,17 @@ static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) { return Mask; } +void CodeGenModule::AdjustMemoryAttribute(StringRef Name, + CGCalleeInfo CalleeInfo, + llvm::AttributeList &Attrs) { + if (Attrs.getMemoryEffects().getModRef() == llvm::ModRefInfo::NoModRef) { + Attrs = Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Memory); + llvm::Attribute MemoryAttr = llvm::Attribute::getWithMemoryEffects( + getLLVMContext(), llvm::MemoryEffects::writeOnly()); + Attrs = Attrs.addFnAttribute(getLLVMContext(), MemoryAttr); + } +} + /// Construct the IR attribute list of a function or call. /// /// When adding an attribute, please consider where it should be handled: @@ -2364,7 +2400,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // gcc specifies that 'pure' functions cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr<NoAliasAttr>()) { - FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly()); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::inaccessibleOrArgMemOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } if (TargetDecl->hasAttr<RestrictAttr>()) @@ -2398,10 +2434,21 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // to the compiler that the global work-size be a multiple of // the work-group size specified to clEnqueueNDRangeKernel // (i.e. work groups are uniform). - FuncAttrs.addAttribute("uniform-work-group-size", - llvm::toStringRef(CodeGenOpts.UniformWGSize)); + FuncAttrs.addAttribute( + "uniform-work-group-size", + llvm::toStringRef(getLangOpts().OffloadUniformBlock)); } } + + if (TargetDecl->hasAttr<CUDAGlobalAttr>() && + getLangOpts().OffloadUniformBlock) + FuncAttrs.addAttribute("uniform-work-group-size", "true"); + + if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>()) + FuncAttrs.addAttribute("aarch64_pstate_sm_body"); + + if (TargetDecl->hasAttr<ArmNewZAAttr>()) + FuncAttrs.addAttribute("aarch64_pstate_za_new"); } // Attach "no-builtins" attributes to: @@ -2593,7 +2640,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, llvm::AttrBuilder Attrs(getLLVMContext()); QualType ThisTy = - FI.arg_begin()->type.castAs<PointerType>()->getPointeeType(); + FI.arg_begin()->type.getTypePtr()->getPointeeType(); if (!CodeGenOpts.NullPointerIsValid && getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) { @@ -2672,7 +2719,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, auto *Decl = ParamType->getAsRecordDecl(); if (CodeGenOpts.PassByValueIsNoAlias && Decl && - Decl->getArgPassingRestrictions() == RecordDecl::APK_CanPassInRegs) + Decl->getArgPassingRestrictions() == + RecordArgPassingKind::CanPassInRegs) // When calling the function, the pointer passed in will be the only // reference to the underlying object. Mark it accordingly. Attrs.addAttribute(llvm::Attribute::NoAlias); @@ -3015,7 +3063,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // indicates dereferenceability, and if the size is constant we can // use the dereferenceable attribute (which requires the size in // bytes). - if (ArrTy->getSizeModifier() == ArrayType::Static) { + if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) { QualType ETy = ArrTy->getElementType(); llvm::Align Alignment = CGM.getNaturalTypeAlignment(ETy).getAsAlign(); @@ -3039,7 +3087,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // For C99 VLAs with the static keyword, we don't know the size so // we can't use the dereferenceable attribute, but in addrspace(0) // we know that it must be nonnull. - if (ArrTy->getSizeModifier() == VariableArrayType::Static) { + if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) { QualType ETy = ArrTy->getElementType(); llvm::Align Alignment = CGM.getNaturalTypeAlignment(ETy).getAsAlign(); @@ -3400,9 +3448,9 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF, const VarDecl *self = method->getSelfDecl(); if (!self->getType().isConstQualified()) return nullptr; - // Look for a retain call. - llvm::CallInst *retainCall = - dyn_cast<llvm::CallInst>(result->stripPointerCasts()); + // Look for a retain call. Note: stripPointerCasts looks through returned arg + // functions, which would cause us to miss the retain. + llvm::CallInst *retainCall = dyn_cast<llvm::CallInst>(result); if (!retainCall || retainCall->getCalledOperand() != CGF.CGM.getObjCEntrypoints().objc_retain) return nullptr; @@ -3459,7 +3507,9 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) { return nullptr; // These aren't actually possible for non-coerced returns, and we // only care about non-coerced returns on this code path. - assert(!SI->isAtomic() && !SI->isVolatile()); + // All memory instructions inside __try block are volatile. + assert(!SI->isAtomic() && + (!SI->isVolatile() || CGF.currentFunctionUsesSEHTry())); return SI; }; // If there are multiple uses of the return-value slot, just check @@ -3989,10 +4039,6 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - if (isInAllocaArgument(CGM.getCXXABI(), type)) { - CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter"); - } - // GetAddrOfLocalVar returns a pointer-to-pointer for references, // but the argument needs to be the original pointer. if (type->isReferenceType()) { @@ -4262,15 +4308,13 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) { assert(!StackBase); // Save the stack. - llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave); - StackBase = CGF.Builder.CreateCall(F, {}, "inalloca.save"); + StackBase = CGF.Builder.CreateStackSave("inalloca.save"); } void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { if (StackBase) { // Restore the stack after the call. - llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); - CGF.Builder.CreateCall(F, StackBase); + CGF.Builder.CreateStackRestore(StackBase); } } @@ -5105,7 +5149,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, "indirect argument must be in alloca address space"); bool NeedCopy = false; - if (Addr.getAlignment() < Align && llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) < Align.getAsAlign()) { @@ -5114,12 +5157,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); - if (!ArgInfo.getIndirectByVal() || + bool isByValOrRef = + ArgInfo.isIndirectAliased() || ArgInfo.getIndirectByVal(); + + if (!isByValOrRef || (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) { NeedCopy = true; } if (!getLangOpts().OpenCL) { - if ((ArgInfo.getIndirectByVal() && + if ((isByValOrRef && (AS != LangAS::Default && AS != CGM.getASTAllocaAddressSpace()))) { NeedCopy = true; @@ -5127,7 +5173,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } // For OpenCL even if RV is located in default or alloca address space // we don't want to perform address space cast for it. - else if ((ArgInfo.getIndirectByVal() && + else if ((isByValOrRef && Addr.getType()->getAddressSpace() != IRFuncTy-> getParamType(FirstIRArg)->getPointerAddressSpace())) { NeedCopy = true; @@ -5244,30 +5290,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType()); if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) { llvm::Type *SrcTy = Src.getElementType(); - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy); + llvm::TypeSize SrcTypeSize = + CGM.getDataLayout().getTypeAllocSize(SrcTy); + llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy); + if (SrcTypeSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(SrcTypeSize == DstTypeSize && + "Only allow non-fractional movement of structure with " + "homogeneous scalable vector type"); + assert(NumIRArgs == STy->getNumElements()); - // If the source type is smaller than the destination type of the - // coerce-to logic, copy the source value into a temp alloca the size - // of the destination type to allow loading all of it. The bits past - // the source value are left undef. - if (SrcSize < DstSize) { - Address TempAlloca - = CreateTempAlloca(STy, Src.getAlignment(), - Src.getName() + ".coerce"); - Builder.CreateMemCpy(TempAlloca, Src, SrcSize); - Src = TempAlloca; + llvm::Value *StoredStructValue = + Builder.CreateLoad(Src, Src.getName() + ".tuple"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + llvm::Value *Extract = Builder.CreateExtractValue( + StoredStructValue, i, Src.getName() + ".extract" + Twine(i)); + IRCallArgs[FirstIRArg + i] = Extract; + } } else { - Src = Src.withElementType(STy); - } + uint64_t SrcSize = SrcTypeSize.getFixedValue(); + uint64_t DstSize = DstTypeSize.getFixedValue(); - assert(NumIRArgs == STy->getNumElements()); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Address EltPtr = Builder.CreateStructGEP(Src, i); - llvm::Value *LI = Builder.CreateLoad(EltPtr); - if (ArgHasMaybeUndefAttr) - LI = Builder.CreateFreeze(LI); - IRCallArgs[FirstIRArg + i] = LI; + // If the source type is smaller than the destination type of the + // coerce-to logic, copy the source value into a temp alloca the size + // of the destination type to allow loading all of it. The bits past + // the source value are left undef. + if (SrcSize < DstSize) { + Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(), + Src.getName() + ".coerce"); + Builder.CreateMemCpy(TempAlloca, Src, SrcSize); + Src = TempAlloca; + } else { + Src = Src.withElementType(STy); + } + + assert(NumIRArgs == STy->getNumElements()); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Address EltPtr = Builder.CreateStructGEP(Src, i); + llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); + IRCallArgs[FirstIRArg + i] = LI; + } } } else { // In the simple case, just pass the coerced loaded value. @@ -5442,11 +5508,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, /*AttrOnCallSite=*/true, /*IsThunk=*/false); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) { if (FD->hasAttr<StrictFPAttr>()) // All calls within a strictfp function are marked strictfp Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP); + // If -ffast-math is enabled and the function is guarded by an + // '__attribute__((optnone)) adjust the memory attribute so the BE emits the + // library call instead of the intrinsic. + if (FD->hasAttr<OptimizeNoneAttr>() && getLangOpts().FastMath) + CGM.AdjustMemoryAttribute(CalleePtr->getName(), Callee.getAbstractInfo(), + Attrs); + } // Add call-site nomerge attribute if exists. if (InNoMergeAttributedStmt) Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge); @@ -5535,6 +5608,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, BundleList); EmitBlock(Cont); } + if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() && + CI->getCalledFunction()->getName().startswith("_Z4sqrt")) { + SetSqrtFPAccuracy(CI); + } if (callOrInvoke) *callOrInvoke = CI; @@ -5765,9 +5842,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, DestIsVolatile = false; } - // If the value is offset in memory, apply the offset now. - Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI); - CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + // An empty record can overlap other data (if declared with + // no_unique_address); omit the store for such types - as there is no + // actual data to store. + if (!isEmptyRecord(getContext(), RetTy, true)) { + // If the value is offset in memory, apply the offset now. + Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI); + CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + } return convertTempToRValue(DestPtr, RetTy, SourceLocation()); } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index eaaf10c4eec6..aee86a3242fd 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -256,7 +256,7 @@ public: /// arguments in a call. class CallArgList : public SmallVector<CallArg, 8> { public: - CallArgList() : StackBase(nullptr) {} + CallArgList() = default; struct Writeback { /// The original argument. Note that the argument l-value @@ -342,7 +342,7 @@ private: SmallVector<CallArgCleanup, 1> CleanupsToDeactivate; /// The stacksave call. It dominates all of the argument evaluation. - llvm::CallInst *StackBase; + llvm::CallInst *StackBase = nullptr; }; /// FunctionArgList - Type for representing both the decl and type @@ -375,14 +375,58 @@ public: bool isExternallyDestructed() const { return IsExternallyDestructed; } }; -/// Helper to add attributes to \p F according to the CodeGenOptions and -/// LangOptions without requiring a CodeGenModule to be constructed. +/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as +/// though we had emitted it ourselves. We remove any attributes on F that +/// conflict with the attributes we add here. +/// +/// This is useful for adding attrs to bitcode modules that you want to link +/// with but don't control, such as CUDA's libdevice. When linking with such +/// a bitcode library, you might want to set e.g. its functions' +/// "unsafe-fp-math" attribute to match the attr of the functions you're +/// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of +/// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM +/// will propagate unsafe-fp-math=false up to every transitive caller of a +/// function in the bitcode library! +/// +/// With the exception of fast-math attrs, this will only make the attributes +/// on the function more conservative. But it's unsafe to call this on a +/// function which relies on particular fast-math attributes for correctness. +/// It's up to you to ensure that this is safe. void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, - const CodeGenOptions CodeGenOpts, + const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, const TargetOptions &TargetOpts, bool WillInternalize); +enum class FnInfoOpts { + None = 0, + IsInstanceMethod = 1 << 0, + IsChainCall = 1 << 1, + IsDelegateCall = 1 << 2, +}; + +inline FnInfoOpts operator|(FnInfoOpts A, FnInfoOpts B) { + return static_cast<FnInfoOpts>( + static_cast<std::underlying_type_t<FnInfoOpts>>(A) | + static_cast<std::underlying_type_t<FnInfoOpts>>(B)); +} + +inline FnInfoOpts operator&(FnInfoOpts A, FnInfoOpts B) { + return static_cast<FnInfoOpts>( + static_cast<std::underlying_type_t<FnInfoOpts>>(A) & + static_cast<std::underlying_type_t<FnInfoOpts>>(B)); +} + +inline FnInfoOpts operator|=(FnInfoOpts A, FnInfoOpts B) { + A = A | B; + return A; +} + +inline FnInfoOpts operator&=(FnInfoOpts A, FnInfoOpts B) { + A = A & B; + return A; +} + } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 93e7b54fca04..d18f186ce5b4 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -28,6 +28,7 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Transforms/Utils/SanitizerStats.h" #include <optional> @@ -138,7 +139,7 @@ Address CodeGenFunction::LoadCXXThisAddress() { CXXThisAlignment = CGM.getClassPointerAlignment(MD->getParent()); } - llvm::Type *Ty = ConvertType(MD->getThisType()->getPointeeType()); + llvm::Type *Ty = ConvertType(MD->getFunctionObjectParameterType()); return Address(LoadCXXThis(), Ty, CXXThisAlignment, KnownNonNull); } @@ -403,11 +404,8 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, assert(PathBegin != PathEnd && "Base path should not be empty!"); QualType DerivedTy = - getContext().getCanonicalType(getContext().getTagDeclType(Derived)); - unsigned AddrSpace = BaseAddr.getAddressSpace(); + getContext().getCanonicalType(getContext().getTagDeclType(Derived)); llvm::Type *DerivedValueTy = ConvertType(DerivedTy); - llvm::Type *DerivedPtrTy = - llvm::PointerType::get(getLLVMContext(), AddrSpace); llvm::Value *NonVirtualOffset = CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd); @@ -432,13 +430,10 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, } // Apply the offset. - llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy); + llvm::Value *Value = BaseAddr.getPointer(); Value = Builder.CreateInBoundsGEP( Int8Ty, Value, Builder.CreateNeg(NonVirtualOffset), "sub.ptr"); - // Just cast. - Value = Builder.CreateBitCast(Value, DerivedPtrTy); - // Produce a PHI if we had a null-check. if (NullCheckValue) { Builder.CreateBr(CastEnd); @@ -516,7 +511,7 @@ namespace { const CXXDestructorDecl *D = BaseClass->getDestructor(); // We are already inside a destructor, so presumably the object being // destroyed should have the expected type. - QualType ThisTy = D->getThisObjectType(); + QualType ThisTy = D->getFunctionObjectParameterType(); Address Addr = CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(), DerivedClass, BaseClass, @@ -1297,10 +1292,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, assert(BaseCtorContinueBB); } - llvm::Value *const OldThis = CXXThisValue; for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) { if (!ConstructVBases) continue; + SaveAndRestore ThisRAII(CXXThisValue); if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) @@ -1317,7 +1312,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, // Then, non-virtual base initializers. for (; B != E && (*B)->isBaseInitializer(); B++) { assert(!(*B)->isBaseVirtual()); - + SaveAndRestore ThisRAII(CXXThisValue); if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) @@ -1325,8 +1320,6 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, EmitBaseInitializer(*this, ClassDecl, *B); } - CXXThisValue = OldThis; - InitializeVTablePointers(ClassDecl); // And finally, initialize class members. @@ -1462,7 +1455,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); if (HaveInsertPoint()) { - QualType ThisTy = Dtor->getThisObjectType(); + QualType ThisTy = Dtor->getFunctionObjectParameterType(); EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); } @@ -1496,7 +1489,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { EnterDtorCleanups(Dtor, Dtor_Complete); if (!isTryBody) { - QualType ThisTy = Dtor->getThisObjectType(); + QualType ThisTy = Dtor->getFunctionObjectParameterType(); EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false, /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); break; @@ -1676,8 +1669,7 @@ namespace { CodeGenFunction::SanitizerScope SanScope(&CGF); // Pass in void pointer and size of region as arguments to runtime // function - SmallVector<llvm::Value *, 2> Args = { - CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy)}; + SmallVector<llvm::Value *, 2> Args = {Ptr}; SmallVector<llvm::Type *, 2> ArgTypes = {CGF.VoidPtrTy}; if (PoisonSize.has_value()) { @@ -1756,10 +1748,8 @@ namespace { llvm::ConstantInt *OffsetSizePtr = llvm::ConstantInt::get(CGF.SizeTy, PoisonStart.getQuantity()); - llvm::Value *OffsetPtr = CGF.Builder.CreateGEP( - CGF.Int8Ty, - CGF.Builder.CreateBitCast(CGF.LoadCXXThis(), CGF.Int8PtrTy), - OffsetSizePtr); + llvm::Value *OffsetPtr = + CGF.Builder.CreateGEP(CGF.Int8Ty, CGF.LoadCXXThis(), OffsetSizePtr); CharUnits PoisonEnd; if (EndIndex >= Layout.getFieldCount()) { @@ -2123,8 +2113,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CallArgList Args; Address This = ThisAVS.getAddress(); LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace(); - QualType ThisType = D->getThisType(); - LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); + LangAS ThisAS = D->getFunctionObjectParameterType().getAddressSpace(); llvm::Value *ThisPtr = This.getPointer(); if (SlotAS != ThisAS) { @@ -2463,7 +2452,7 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { // We are calling the destructor from within the constructor. // Therefore, "this" should have the expected type. - QualType ThisTy = Dtor->getThisObjectType(); + QualType ThisTy = Dtor->getFunctionObjectParameterType(); CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false, /*Delegating=*/true, Addr, ThisTy); } @@ -2736,7 +2725,6 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); - llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); // If we already know that the call has hidden LTO visibility, emit // @llvm.type.test(). Otherwise emit @llvm.public.type.test(), which WPD // will convert to @llvm.type.test() if we assert at link time that we have @@ -2745,7 +2733,7 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, ? llvm::Intrinsic::type_test : llvm::Intrinsic::public_type_test; llvm::Value *TypeTest = - Builder.CreateCall(CGM.getIntrinsic(IID), {CastedVTable, TypeId}); + Builder.CreateCall(CGM.getIntrinsic(IID), {VTable, TypeId}); Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest); } } @@ -2849,9 +2837,8 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); - llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId}); + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, TypeId}); llvm::Constant *StaticData[] = { llvm::ConstantInt::get(Int8Ty, TCK), @@ -2861,7 +2848,7 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { - EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData); + EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, VTable, StaticData); return; } @@ -2874,9 +2861,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, CGM.getLLVMContext(), llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); llvm::Value *ValidVtable = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables}); + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); EmitCheck(std::make_pair(TypeTest, M), SanitizerHandler::CFICheckFail, - StaticData, {CastedVTable, ValidVtable}); + StaticData, {VTable, ValidVtable}); } bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { @@ -2907,11 +2894,9 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD); - llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy); llvm::Value *CheckedLoad = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), - {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), - TypeId}); + {VTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), TypeId}); llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); std::string TypeName = RD->getQualifiedNameAsString(); @@ -2927,14 +2912,16 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( } void CodeGenFunction::EmitForwardingCallToLambda( - const CXXMethodDecl *callOperator, - CallArgList &callArgs) { + const CXXMethodDecl *callOperator, CallArgList &callArgs, + const CGFunctionInfo *calleeFnInfo, llvm::Constant *calleePtr) { // Get the address of the call operator. - const CGFunctionInfo &calleeFnInfo = - CGM.getTypes().arrangeCXXMethodDeclaration(callOperator); - llvm::Constant *calleePtr = - CGM.GetAddrOfFunction(GlobalDecl(callOperator), - CGM.getTypes().GetFunctionType(calleeFnInfo)); + if (!calleeFnInfo) + calleeFnInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(callOperator); + + if (!calleePtr) + calleePtr = + CGM.GetAddrOfFunction(GlobalDecl(callOperator), + CGM.getTypes().GetFunctionType(*calleeFnInfo)); // Prepare the return slot. const FunctionProtoType *FPT = @@ -2942,8 +2929,8 @@ void CodeGenFunction::EmitForwardingCallToLambda( QualType resultType = FPT->getReturnType(); ReturnValueSlot returnSlot; if (!resultType->isVoidType() && - calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(calleeFnInfo.getReturnType())) + calleeFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && + !hasScalarEvaluationKind(calleeFnInfo->getReturnType())) returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified(), /*IsUnused=*/false, /*IsExternallyDestructed=*/true); @@ -2954,7 +2941,7 @@ void CodeGenFunction::EmitForwardingCallToLambda( // Now emit our call. auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator)); - RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs); + RValue RV = EmitCall(*calleeFnInfo, callee, returnSlot, callArgs); // If necessary, copy the returned value into the slot. if (!resultType->isVoidType() && returnSlot.isNull()) { @@ -2996,7 +2983,15 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { + if (MD->isVariadic()) { + // FIXME: Making this work correctly is nasty because it requires either + // cloning the body of the call operator or making the call operator + // forward. + CGM.ErrorUnsupported(MD, "lambda conversion to variadic function"); + return; + } + const CXXRecordDecl *Lambda = MD->getParent(); // Start building arguments for forwarding call @@ -3007,10 +3002,16 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { Address ThisPtr = CreateMemTemp(LambdaType, "unused.capture"); CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); - // Add the rest of the parameters. + EmitLambdaDelegatingInvokeBody(MD, CallArgs); +} + +void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD, + CallArgList &CallArgs) { + // Add the rest of the forwarded parameters. for (auto *Param : MD->parameters()) EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc()); + const CXXRecordDecl *Lambda = MD->getParent(); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); // For a generic lambda, find the corresponding call operator specialization // to which the call to the static-invoker shall be forwarded. @@ -3024,10 +3025,21 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { assert(CorrespondingCallOpSpecialization); CallOp = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization); } + + // Special lambda forwarding when there are inalloca parameters. + if (hasInAllocaArg(MD)) { + const CGFunctionInfo *ImplFnInfo = nullptr; + llvm::Function *ImplFn = nullptr; + EmitLambdaInAllocaImplFn(CallOp, &ImplFnInfo, &ImplFn); + + EmitForwardingCallToLambda(CallOp, CallArgs, ImplFnInfo, ImplFn); + return; + } + EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD) { if (MD->isVariadic()) { // FIXME: Making this work correctly is nasty because it requires either // cloning the body of the call operator or making the call operator forward. @@ -3035,5 +3047,56 @@ void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { return; } - EmitLambdaDelegatingInvokeBody(MD); + // Forward %this argument. + CallArgList CallArgs; + QualType LambdaType = getContext().getRecordType(MD->getParent()); + QualType ThisType = getContext().getPointerType(LambdaType); + llvm::Value *ThisArg = CurFn->getArg(0); + CallArgs.add(RValue::get(ThisArg), ThisType); + + EmitLambdaDelegatingInvokeBody(MD, CallArgs); +} + +void CodeGenFunction::EmitLambdaInAllocaImplFn( + const CXXMethodDecl *CallOp, const CGFunctionInfo **ImplFnInfo, + llvm::Function **ImplFn) { + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeCXXMethodDeclaration(CallOp); + llvm::Function *CallOpFn = + cast<llvm::Function>(CGM.GetAddrOfFunction(GlobalDecl(CallOp))); + + // Emit function containing the original call op body. __invoke will delegate + // to this function. + SmallVector<CanQualType, 4> ArgTypes; + for (auto I = FnInfo.arg_begin(); I != FnInfo.arg_end(); ++I) + ArgTypes.push_back(I->type); + *ImplFnInfo = &CGM.getTypes().arrangeLLVMFunctionInfo( + FnInfo.getReturnType(), FnInfoOpts::IsDelegateCall, ArgTypes, + FnInfo.getExtInfo(), {}, FnInfo.getRequiredArgs()); + + // Create mangled name as if this was a method named __impl. If for some + // reason the name doesn't look as expected then just tack __impl to the + // front. + // TODO: Use the name mangler to produce the right name instead of using + // string replacement. + StringRef CallOpName = CallOpFn->getName(); + std::string ImplName; + if (size_t Pos = CallOpName.find_first_of("<lambda")) + ImplName = ("?__impl@" + CallOpName.drop_front(Pos)).str(); + else + ImplName = ("__impl" + CallOpName).str(); + + llvm::Function *Fn = CallOpFn->getParent()->getFunction(ImplName); + if (!Fn) { + Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(**ImplFnInfo), + llvm::GlobalValue::InternalLinkage, ImplName, + CGM.getModule()); + CGM.SetInternalFunctionAttributes(CallOp, Fn, **ImplFnInfo); + + const GlobalDecl &GD = GlobalDecl(CallOp); + const auto *D = cast<FunctionDecl>(GD.getDecl()); + CodeGenFunction(CGM).GenerateCode(GD, Fn, **ImplFnInfo); + CGM.SetLLVMFunctionAttributesForDefinition(D, Fn); + } + *ImplFn = Fn; } diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 0bbab283603d..f87caf050eea 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -207,8 +207,13 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { Scope->setLifetimeMarker(); // With Windows -EHa, Invoke llvm.seh.scope.begin() for EHCleanup + // If exceptions are disabled/ignored and SEH is not in use, then there is no + // invoke destination. SEH "works" even if exceptions are off. In practice, + // this means that C++ destructors and other EH cleanups don't run, which is + // consistent with MSVC's behavior, except in the presence of -EHa. + // Check getInvokeDest() to generate llvm.seh.scope.begin() as needed. if (CGF->getLangOpts().EHAsynch && IsEHCleanup && !IsLifetimeMarker && - CGF->getTarget().getCXXABI().isMicrosoft()) + CGF->getTarget().getCXXABI().isMicrosoft() && CGF->getInvokeDest()) CGF->EmitSehCppScopeBegin(); return Scope->getCleanupBuffer(); @@ -868,8 +873,13 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // If there's exactly one branch-after and no other threads, // we can route it without a switch. + // Skip for SEH, since ExitSwitch is used to generate code to indicate + // abnormal termination. (SEH: Except _leave and fall-through at + // the end, all other exits in a _try (return/goto/continue/break) + // are considered as abnormal terminations, using NormalCleanupDestSlot + // to indicate abnormal termination) if (!Scope.hasBranchThroughs() && !HasFixups && !HasFallthrough && - Scope.getNumBranchAfters() == 1) { + !currentFunctionUsesSEHTry() && Scope.getNumBranchAfters() == 1) { assert(!BranchThroughDest || !IsActive); // Clean up the possibly dead store to the cleanup dest slot. diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 8437cda79beb..888d30bfb3e1 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -129,14 +129,48 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) { return Prefix; } -static bool memberCallExpressionCanThrow(const Expr *E) { - if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E)) - if (const auto *Proto = - CE->getMethodDecl()->getType()->getAs<FunctionProtoType>()) - if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) && - Proto->canThrow() == CT_Cannot) - return false; - return true; +// Check if function can throw based on prototype noexcept, also works for +// destructors which are implicitly noexcept but can be marked noexcept(false). +static bool FunctionCanThrow(const FunctionDecl *D) { + const auto *Proto = D->getType()->getAs<FunctionProtoType>(); + if (!Proto) { + // Function proto is not found, we conservatively assume throwing. + return true; + } + return !isNoexceptExceptionSpec(Proto->getExceptionSpecType()) || + Proto->canThrow() != CT_Cannot; +} + +static bool ResumeStmtCanThrow(const Stmt *S) { + if (const auto *CE = dyn_cast<CallExpr>(S)) { + const auto *Callee = CE->getDirectCallee(); + if (!Callee) + // We don't have direct callee. Conservatively assume throwing. + return true; + + if (FunctionCanThrow(Callee)) + return true; + + // Fall through to visit the children. + } + + if (const auto *TE = dyn_cast<CXXBindTemporaryExpr>(S)) { + // Special handling of CXXBindTemporaryExpr here as calling of Dtor of the + // temporary is not part of `children()` as covered in the fall through. + // We need to mark entire statement as throwing if the destructor of the + // temporary throws. + const auto *Dtor = TE->getTemporary()->getDestructor(); + if (FunctionCanThrow(Dtor)) + return true; + + // Fall through to visit the children. + } + + for (const auto *child : S->children()) + if (ResumeStmtCanThrow(child)) + return true; + + return false; } // Emit suspend expression which roughly looks like: @@ -201,6 +235,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co CGF.CurCoro.InSuspendBlock = true; auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); CGF.CurCoro.InSuspendBlock = false; + if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. BasicBlock *RealSuspendBlock = @@ -232,7 +267,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co // is marked as 'noexcept', we avoid generating this additional IR. CXXTryStmt *TryStmt = nullptr; if (Coro.ExceptionHandler && Kind == AwaitKind::Init && - memberCallExpressionCanThrow(S.getResumeExpr())) { + ResumeStmtCanThrow(S.getResumeExpr())) { Coro.ResumeEHVar = CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh")); Builder.CreateFlagStore(true, Coro.ResumeEHVar); @@ -244,6 +279,15 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co FPOptionsOverride(), Loc, Loc); TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch); CGF.EnterCXXTryStmt(*TryStmt); + CGF.EmitStmt(TryBody); + // We don't use EmitCXXTryStmt here. We need to store to ResumeEHVar that + // doesn't exist in the body. + Builder.CreateFlagStore(false, Coro.ResumeEHVar); + CGF.ExitCXXTryStmt(*TryStmt); + LValueOrRValue Res; + // We are not supposed to obtain the value from init suspend await_resume(). + Res.RV = RValue::getIgnored(); + return Res; } LValueOrRValue Res; @@ -252,11 +296,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co else Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); - if (TryStmt) { - Builder.CreateFlagStore(false, Coro.ResumeEHVar); - CGF.ExitCXXTryStmt(*TryStmt); - } - return Res; } @@ -402,8 +441,11 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup { llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end); // See if we have a funclet bundle to associate coro.end with. (WinEH) auto Bundles = getBundlesForCoroEnd(CGF); - auto *CoroEnd = CGF.Builder.CreateCall( - CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles); + auto *CoroEnd = + CGF.Builder.CreateCall(CoroEndFn, + {NullPtr, CGF.Builder.getTrue(), + llvm::ConstantTokenNone::get(CoroEndFn->getContext())}, + Bundles); if (Bundles.empty()) { // Otherwise, (landingpad model), create a conditional branch that leads // either to a cleanup block or a block with EH resume instruction. @@ -531,6 +573,11 @@ struct GetReturnObjectManager { Builder.CreateStore(Builder.getFalse(), GroActiveFlag); GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + auto *GroAlloca = dyn_cast_or_null<llvm::AllocaInst>( + GroEmission.getOriginalAllocatedAddress().getPointer()); + assert(GroAlloca && "expected alloca to be emitted"); + GroAlloca->setMetadata(llvm::LLVMContext::MD_coro_outside_frame, + llvm::MDNode::get(CGF.CGM.getLLVMContext(), {})); // Remember the top of EHStack before emitting the cleanup. auto old_top = CGF.EHStack.stable_begin(); @@ -594,7 +641,7 @@ static void emitBodyAndFallthrough(CodeGenFunction &CGF, } void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { - auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getPtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); @@ -754,7 +801,9 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // Emit coro.end before getReturnStmt (and parameter destructors), since // resume and destroy parts of the coroutine should not include them. llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); - Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); + Builder.CreateCall(CoroEnd, + {NullPtr, Builder.getFalse(), + llvm::ConstantTokenNone::get(CoroEnd->getContext())}); if (Stmt *Ret = S.getReturnStmt()) { // Since we already emitted the return value above, so we shouldn't @@ -766,6 +815,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // LLVM require the frontend to mark the coroutine. CurFn->setPresplitCoroutine(); + + if (CXXRecordDecl *RD = FnRetTy->getAsCXXRecordDecl(); + RD && RD->hasAttr<CoroOnlyDestroyWhenCompleteAttr>()) + CurFn->setCoroDestroyOnlyWhenComplete(); } // Emit coroutine intrinsic and patch up arguments of the token type. @@ -783,7 +836,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, } CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin " "has been used earlier in this function"); - auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getPtrTy()); return RValue::get(NullPtr); } case llvm::Intrinsic::coro_size: { @@ -823,6 +876,10 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, } for (const Expr *Arg : E->arguments()) Args.push_back(EmitScalarExpr(Arg)); + // @llvm.coro.end takes a token parameter. Add token 'none' as the last + // argument. + if (IID == llvm::Intrinsic::coro_end) + Args.push_back(llvm::ConstantTokenNone::get(getLLVMContext())); llvm::Function *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index f049a682cfed..7cf661994a29 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -391,12 +391,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { SourceManager &SM = CGM.getContext().getSourceManager(); StringRef FileName; FileID FID; + std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (Loc.isInvalid()) { // The DIFile used by the CU is distinct from the main source file. Call // createFile() below for canonicalization if the source file was specified // with an absolute path. FileName = TheCU->getFile()->getFilename(); + CSInfo = TheCU->getFile()->getChecksum(); } else { PresumedLoc PLoc = SM.getPresumedLoc(Loc); FileName = PLoc.getFilename(); @@ -417,13 +419,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { return cast<llvm::DIFile>(V); } + // Put Checksum at a scope where it will persist past the createFile call. SmallString<64> Checksum; - - std::optional<llvm::DIFile::ChecksumKind> CSKind = + if (!CSInfo) { + std::optional<llvm::DIFile::ChecksumKind> CSKind = computeChecksum(FID, Checksum); - std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; - if (CSKind) - CSInfo.emplace(*CSKind, Checksum); + if (CSKind) + CSInfo.emplace(*CSKind, Checksum); + } return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); } @@ -1442,6 +1445,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_PreserveAll; case CC_X86RegCall: return llvm::dwarf::DW_CC_LLVM_X86RegCall; + case CC_M68kRTD: + return llvm::dwarf::DW_CC_LLVM_M68kRTD; } return 0; } @@ -1494,6 +1499,8 @@ CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, llvm::DIScope *RecordTy, const RecordDecl *RD) { StringRef Name = BitFieldDecl->getName(); QualType Ty = BitFieldDecl->getType(); + if (BitFieldDecl->hasAttr<PreferredTypeAttr>()) + Ty = BitFieldDecl->getAttr<PreferredTypeAttr>()->getType(); SourceLocation Loc = BitFieldDecl->getLocation(); llvm::DIFile *VUnit = getOrCreateFile(Loc); llvm::DIType *DebugType = getOrCreateType(Ty, VUnit); @@ -1650,8 +1657,10 @@ void CGDebugInfo::CollectRecordLambdaFields( FieldDecl *f = *Field; llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); QualType type = f->getType(); + StringRef ThisName = + CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; llvm::DIType *fieldType = createFieldType( - "this", type, f->getLocation(), f->getAccess(), + ThisName, type, f->getLocation(), f->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); elements.push_back(fieldType); @@ -1670,6 +1679,9 @@ CGDebugInfo::CreateRecordStaticField(const VarDecl *Var, llvm::DIType *RecordTy, unsigned LineNumber = getLineNumber(Var->getLocation()); StringRef VName = Var->getName(); + + // FIXME: to avoid complications with type merging we should + // emit the constant on the definition instead of the declaration. llvm::Constant *C = nullptr; if (Var->getInit()) { const APValue *Value = Var->evaluateValue(); @@ -1682,9 +1694,12 @@ CGDebugInfo::CreateRecordStaticField(const VarDecl *Var, llvm::DIType *RecordTy, } llvm::DINode::DIFlags Flags = getAccessFlag(Var->getAccess(), RD); + auto Tag = CGM.getCodeGenOpts().DwarfVersion >= 5 + ? llvm::dwarf::DW_TAG_variable + : llvm::dwarf::DW_TAG_member; auto Align = getDeclAlignIfRequired(Var, CGM.getContext()); llvm::DIDerivedType *GV = DBuilder.createStaticMemberType( - RecordTy, VName, VUnit, LineNumber, VTy, Flags, C, Align); + RecordTy, VName, VUnit, LineNumber, VTy, Flags, C, Tag, Align); StaticDataMemberCache[Var->getCanonicalDecl()].reset(GV); return GV; } @@ -2127,14 +2142,14 @@ CGDebugInfo::CollectTemplateParams(std::optional<TemplateArgs> OArgs, // attribute, i.e. that value is not available at the host side. if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice || !D->hasAttr<CUDADeviceAttr>()) { - const CXXMethodDecl *MD; // Variable pointer template parameters have a value that is the address // of the variable. if (const auto *VD = dyn_cast<VarDecl>(D)) V = CGM.GetAddrOfGlobalVar(VD); // Member function pointers have special support for building them, // though this is currently unsupported in LLVM CodeGen. - else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) + else if (const auto *MD = dyn_cast<CXXMethodDecl>(D); + MD && MD->isImplicitObjectMemberFunction()) V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); else if (const auto *FD = dyn_cast<FunctionDecl>(D)) V = CGM.GetAddrOfFunction(FD); @@ -3114,8 +3129,8 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, uint64_t NumVectorBytes = Size / Ctx.getCharWidth(); // Construct the vector of 'char' type. - QualType CharVecTy = Ctx.getVectorType(Ctx.CharTy, NumVectorBytes, - VectorType::GenericVector); + QualType CharVecTy = + Ctx.getVectorType(Ctx.CharTy, NumVectorBytes, VectorKind::Generic); return CreateType(CharVecTy->getAs<VectorType>(), Unit); } @@ -3378,9 +3393,9 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { unsigned Line = getLineNumber(ED->getLocation()); llvm::DIScope *EnumContext = getDeclContextDescriptor(ED); llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit); - return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit, - Line, Size, Align, EltArray, ClassTy, - Identifier, ED->isScoped()); + return DBuilder.createEnumerationType( + EnumContext, ED->getName(), DefUnit, Line, Size, Align, EltArray, ClassTy, + /*RunTimeLang=*/0, Identifier, ED->isScoped()); } llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, @@ -3869,7 +3884,7 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); T = CGM.getContext().getConstantArrayType(ET, ConstVal, nullptr, - ArrayType::Normal, 0); + ArraySizeModifier::Normal, 0); } Name = VD->getName(); @@ -4541,7 +4556,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, if (NumPaddingBytes.isPositive()) { llvm::APInt pad(32, NumPaddingBytes.getQuantity()); FType = CGM.getContext().getConstantArrayType( - CGM.getContext().CharTy, pad, nullptr, ArrayType::Normal, 0); + CGM.getContext().CharTy, pad, nullptr, ArraySizeModifier::Normal, 0); EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset)); } } @@ -4612,8 +4627,8 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an // object pointer flag. if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) { - if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis || - IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + if (IPD->getParameterKind() == ImplicitParamKind::CXXThis || + IPD->getParameterKind() == ImplicitParamKind::ObjCSelf) Flags |= llvm::DINode::FlagObjectPointer; } @@ -4744,6 +4759,40 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, return D; } +llvm::DIType *CGDebugInfo::CreateBindingDeclType(const BindingDecl *BD) { + llvm::DIFile *Unit = getOrCreateFile(BD->getLocation()); + + // If the declaration is bound to a bitfield struct field, its type may have a + // size that is different from its deduced declaration type's. + if (const MemberExpr *ME = dyn_cast<MemberExpr>(BD->getBinding())) { + if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { + if (FD->isBitField()) { + ASTContext &Context = CGM.getContext(); + const CGRecordLayout &RL = + CGM.getTypes().getCGRecordLayout(FD->getParent()); + const CGBitFieldInfo &Info = RL.getBitFieldInfo(FD); + + // Find an integer type with the same bitwidth as the bitfield size. If + // no suitable type is present in the target, give up on producing debug + // information as it would be wrong. It is certainly possible to produce + // correct debug info, but the logic isn't currently implemented. + uint64_t BitfieldSizeInBits = Info.Size; + QualType IntTy = + Context.getIntTypeForBitwidth(BitfieldSizeInBits, Info.IsSigned); + if (IntTy.isNull()) + return nullptr; + Qualifiers Quals = BD->getType().getQualifiers(); + QualType FinalTy = Context.getQualifiedType(IntTy, Quals); + llvm::DIType *Ty = getOrCreateType(FinalTy, Unit); + assert(Ty); + return Ty; + } + } + } + + return getOrCreateType(BD->getType(), Unit); +} + llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, llvm::Value *Storage, std::optional<unsigned> ArgNo, @@ -4758,8 +4807,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, if (isa<DeclRefExpr>(BD->getBinding())) return nullptr; - llvm::DIFile *Unit = getOrCreateFile(BD->getLocation()); - llvm::DIType *Ty = getOrCreateType(BD->getType(), Unit); + llvm::DIType *Ty = CreateBindingDeclType(BD); // If there is no debug info for this type then do not emit debug info // for this variable. @@ -4785,6 +4833,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, unsigned Column = getColumnNumber(BD->getLocation()); StringRef Name = BD->getName(); auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); + llvm::DIFile *Unit = getOrCreateFile(BD->getLocation()); // Create the descriptor for the variable. llvm::DILocalVariable *D = DBuilder.createAutoVariable( Scope, Name, Unit, Line, Ty, CGM.getLangOpts().Optimize, @@ -4800,6 +4849,11 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD, const uint64_t fieldOffset = layout.getFieldOffset(fieldIndex); if (fieldOffset != 0) { + // Currently if the field offset is not a multiple of byte, the produced + // location would not be accurate. Therefore give up. + if (fieldOffset % CGM.getContext().getCharWidth() != 0) + return nullptr; + Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); Expr.push_back( CGM.getContext().toCharUnitsFromBits(fieldOffset).getQuantity()); @@ -4835,11 +4889,15 @@ CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, const bool UsePointerValue) { assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); - if (auto *DD = dyn_cast<DecompositionDecl>(VD)) + if (auto *DD = dyn_cast<DecompositionDecl>(VD)) { for (auto *B : DD->bindings()) { EmitDeclare(B, Storage, std::nullopt, Builder, VD->getType()->isReferenceType()); } + // Don't emit an llvm.dbg.declare for the composite storage as it doesn't + // correspond to a user variable. + return nullptr; + } return EmitDeclare(VD, Storage, std::nullopt, Builder, UsePointerValue); } @@ -4903,7 +4961,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( // Self is passed along as an implicit non-arg variable in a // block. Mark it as the object pointer. if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) - if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + if (IPD->getParameterKind() == ImplicitParamKind::ObjCSelf) Ty = CreateSelfType(VD->getType(), Ty); // Get location information. @@ -5530,25 +5588,8 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { auto &GV = DeclCache[VD]; if (GV) return; - llvm::DIExpression *InitExpr = nullptr; - if (CGM.getContext().getTypeSize(VD->getType()) <= 64) { - // FIXME: Add a representation for integer constants wider than 64 bits. - if (Init.isInt()) { - const llvm::APSInt &InitInt = Init.getInt(); - std::optional<uint64_t> InitIntOpt; - if (InitInt.isUnsigned()) - InitIntOpt = InitInt.tryZExtValue(); - else if (auto tmp = InitInt.trySExtValue(); tmp.has_value()) - // Transform a signed optional to unsigned optional. When cpp 23 comes, - // use std::optional::transform - InitIntOpt = (uint64_t)tmp.value(); - if (InitIntOpt) - InitExpr = DBuilder.createConstantValueExpression(InitIntOpt.value()); - } else if (Init.isFloat()) - InitExpr = DBuilder.createConstantValueExpression( - Init.getFloat().bitcastToAPInt().getZExtValue()); - } + llvm::DIExpression *InitExpr = createConstantValueExpression(VD, Init); llvm::MDTuple *TemplateParameters = nullptr; if (isa<VarTemplateSpecializationDecl>(VD)) @@ -5885,3 +5926,32 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { return llvm::DINode::FlagAllCallsDescribed; } + +llvm::DIExpression * +CGDebugInfo::createConstantValueExpression(const clang::ValueDecl *VD, + const APValue &Val) { + // FIXME: Add a representation for integer constants wider than 64 bits. + if (CGM.getContext().getTypeSize(VD->getType()) > 64) + return nullptr; + + if (Val.isFloat()) + return DBuilder.createConstantValueExpression( + Val.getFloat().bitcastToAPInt().getZExtValue()); + + if (!Val.isInt()) + return nullptr; + + llvm::APSInt const &ValInt = Val.getInt(); + std::optional<uint64_t> ValIntOpt; + if (ValInt.isUnsigned()) + ValIntOpt = ValInt.tryZExtValue(); + else if (auto tmp = ValInt.trySExtValue()) + // Transform a signed optional to unsigned optional. When cpp 23 comes, + // use std::optional::transform + ValIntOpt = static_cast<uint64_t>(*tmp); + + if (ValIntOpt) + return DBuilder.createConstantValueExpression(ValIntOpt.value()); + + return nullptr; +} diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 1fd08626358b..7b60e94555d0 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -148,7 +148,7 @@ class CGDebugInfo { llvm::BumpPtrAllocator DebugInfoNames; StringRef CWDName; - llvm::StringMap<llvm::TrackingMDRef> DIFileCache; + llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache; llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations and global alias variables) that aren't covered @@ -337,6 +337,9 @@ class CGDebugInfo { llvm::DIScope *RecordTy, const RecordDecl *RD); + /// Create type for binding declarations. + llvm::DIType *CreateBindingDeclType(const BindingDecl *BD); + /// Create an anonnymous zero-size separator for bit-field-decl if needed on /// the target. llvm::DIDerivedType *createBitFieldSeparatorIfNeeded( @@ -797,6 +800,11 @@ private: llvm::MDTuple *&TemplateParameters, llvm::DIScope *&VDContext); + /// Create a DIExpression representing the constant corresponding + /// to the specified 'Val'. Returns nullptr on failure. + llvm::DIExpression *createConstantValueExpression(const clang::ValueDecl *VD, + const APValue &Val); + /// Allocate a copy of \p A using the DebugInfoNames allocator /// and return a reference to it. If multiple arguments are given the strings /// are concatenated. @@ -832,8 +840,10 @@ public: // Define copy assignment operator. ApplyDebugLocation &operator=(ApplyDebugLocation &&Other) { - CGF = Other.CGF; - Other.CGF = nullptr; + if (this != &Other) { + CGF = Other.CGF; + Other.CGF = nullptr; + } return *this; } diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index b0d6eb05acc2..a5da0aa2965a 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -96,7 +96,6 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::FriendTemplate: case Decl::Block: case Decl::Captured: - case Decl::ClassScopeFunctionSpecialization: case Decl::UsingShadow: case Decl::ConstructorUsingShadow: case Decl::ObjCTypeParam: @@ -202,7 +201,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return; llvm::GlobalValue::LinkageTypes Linkage = - CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false); + CGM.getLLVMLinkageVarDefinition(&D); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because @@ -387,9 +386,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, GV->takeName(OldGV); // Replace all uses of the old global with the new global - llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); - OldGV->replaceAllUsesWith(NewPtrForOldDecl); + OldGV->replaceAllUsesWith(GV); // Erase the old global, since it is no longer used. OldGV->eraseFromParent(); @@ -398,7 +395,8 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, bool NeedsDtor = D.needsDestruction(getContext()) == QualType::DK_cxx_destructor; - GV->setConstant(CGM.isTypeConstant(D.getType(), true, !NeedsDtor)); + GV->setConstant( + D.getType().isConstantStorage(getContext(), true, !NeedsDtor)); GV->setInitializer(Init); emitter.finalize(GV); @@ -579,8 +577,7 @@ namespace { bool isRedundantBeforeReturn() override { return true; } void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *V = CGF.Builder.CreateLoad(Stack); - llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); - CGF.Builder.CreateCall(F, V); + CGF.Builder.CreateStackRestore(V); } }; @@ -1247,29 +1244,24 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, // If the initializer is small, use a handful of stores. if (shouldSplitConstantStore(CGM, ConstantSize)) { if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { - // FIXME: handle the case when STy != Loc.getElementType(). - if (STy == Loc.getElementType()) { - for (unsigned i = 0; i != constant->getNumOperands(); i++) { - Address EltPtr = Builder.CreateStructGEP(Loc, i); - emitStoresForConstant( - CGM, D, EltPtr, isVolatile, Builder, - cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), - IsAutoInit); - } - return; + const llvm::StructLayout *Layout = + CGM.getDataLayout().getStructLayout(STy); + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i)); + Address EltPtr = Builder.CreateConstInBoundsByteGEP( + Loc.withElementType(CGM.Int8Ty), CurOff); + emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder, + constant->getAggregateElement(i), IsAutoInit); } + return; } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { - // FIXME: handle the case when ATy != Loc.getElementType(). - if (ATy == Loc.getElementType()) { - for (unsigned i = 0; i != ATy->getNumElements(); i++) { - Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); - emitStoresForConstant( - CGM, D, EltPtr, isVolatile, Builder, - cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), - IsAutoInit); - } - return; + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstGEP( + Loc.withElementType(ATy->getElementType()), i); + emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder, + constant->getAggregateElement(i), IsAutoInit); } + return; } } @@ -1359,7 +1351,6 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(llvm::TypeSize Size, "Pointer should be in alloca address space"); llvm::Value *SizeV = llvm::ConstantInt::get( Int64Ty, Size.isScalable() ? -1 : Size.getFixedValue()); - Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); C->setDoesNotThrow(); @@ -1370,7 +1361,6 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { assert(Addr->getType()->getPointerAddressSpace() == CGM.getDataLayout().getAllocaAddrSpace() && "Pointer should be in alloca address space"); - Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); C->setDoesNotThrow(); @@ -1499,7 +1489,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if ((!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) && (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && - !isEscapingByRef && CGM.isTypeConstant(Ty, true, !NeedsDtor))) { + !isEscapingByRef && + Ty.isConstantStorage(getContext(), true, !NeedsDtor))) { EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); // Signal this condition to later callbacks. @@ -1533,8 +1524,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // applied. llvm::Value *Zero = Builder.getFalse(); Address NRVOFlag = - CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo", - /*ArraySize=*/nullptr, &AllocaAddr); + CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo"); EnsureInsertPoint(); Builder.CreateStore(Zero, NRVOFlag); @@ -1629,10 +1619,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (!DidCallStackSave) { // Save the stack. Address Stack = - CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); + CreateDefaultAlignTempAlloca(AllocaInt8PtrTy, "saved_stack"); - llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); - llvm::Value *V = Builder.CreateCall(F); + llvm::Value *V = Builder.CreateStackSave(); + assert(V->getType() == AllocaInt8PtrTy); Builder.CreateStore(V, Stack); DidCallStackSave = true; @@ -2523,7 +2513,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Suppressing debug info for ThreadPrivateVar parameters, else it hides // debug info of TLS variables. NoDebugInfo = - (IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar); + (IPD->getParameterKind() == ImplicitParamKind::ThreadPrivateVar); } Address DeclPtr = Address::invalid(); diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index be8fb6c274db..e08a1e5f42df 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -126,7 +126,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(DestAS)); auto SrcAS = D.getType().getQualifiers().getAddressSpace(); if (DestAS == SrcAS) - Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy); + Argument = Addr.getPointer(); else // FIXME: On addr space mismatch we are passing NULL. The generation // of the global destructor function should be adjusted accordingly. @@ -167,8 +167,7 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); - llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)}; + llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), Addr}; Builder.CreateCall(InvariantStart, Args); } @@ -217,7 +216,7 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, D.needsDestruction(getContext()) == QualType::DK_cxx_destructor; if (PerformInit) EmitDeclInit(*this, D, DeclAddr); - if (CGM.isTypeConstant(D.getType(), true, !NeedsDtor)) + if (D.getType().isConstantStorage(getContext(), true, !NeedsDtor)) EmitDeclInvariant(*this, D, DeclPtr); else EmitDeclDestroy(*this, D, DeclAddr); @@ -279,8 +278,8 @@ llvm::Function *CodeGenFunction::createTLSAtExitStub( } const CGFunctionInfo &FI = CGM.getTypes().arrangeLLVMFunctionInfo( - getContext().IntTy, /*instanceMethod=*/false, /*chainCall=*/false, - {getContext().IntTy}, FunctionType::ExtInfo(), {}, RequiredArgs::All); + getContext().IntTy, FnInfoOpts::None, {getContext().IntTy}, + FunctionType::ExtInfo(), {}, RequiredArgs::All); // Get the stub function type, int(*)(int,...). llvm::FunctionType *StubTy = @@ -293,7 +292,7 @@ llvm::Function *CodeGenFunction::createTLSAtExitStub( FunctionArgList Args; ImplicitParamDecl IPD(CGM.getContext(), CGM.getContext().IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&IPD); QualType ResTy = CGM.getContext().IntTy; @@ -328,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, registerGlobalDtorWithAtExit(dtorStub); } +/// Register a global destructor using the LLVM 'llvm.global_dtors' global. +void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD, + llvm::FunctionCallee Dtor, + llvm::Constant *Addr) { + // Create a function which calls the destructor. + llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr); + CGM.AddGlobalDtor(dtorStub); +} + void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { // extern "C" int atexit(void (*f)(void)); assert(dtorStub->getType() == @@ -520,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, D->hasAttr<CUDASharedAttr>())) return; - if (getLangOpts().OpenMP && - getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) - return; - // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) @@ -655,6 +659,10 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() { */ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { + assert(Primary->isInterfaceOrPartition() && + "The function should only be called for C++20 named module interface" + " or partition."); + while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) CXXGlobalInits.pop_back(); @@ -662,19 +670,35 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { // Module initializers for imported modules are emitted first. // Collect all the modules that we import - SmallVector<Module *> AllImports; + llvm::SmallSetVector<Module *, 8> AllImports; // Ones that we export for (auto I : Primary->Exports) - AllImports.push_back(I.getPointer()); + AllImports.insert(I.getPointer()); // Ones that we only import. for (Module *M : Primary->Imports) - AllImports.push_back(M); + AllImports.insert(M); + // Ones that we import in the global module fragment or the private module + // fragment. + for (Module *SubM : Primary->submodules()) { + assert((SubM->isGlobalModule() || SubM->isPrivateModule()) && + "The sub modules of C++20 module unit should only be global module " + "fragments or private module framents."); + assert(SubM->Exports.empty() && + "The global mdoule fragments and the private module fragments are " + "not allowed to export import modules."); + for (Module *M : SubM->Imports) + AllImports.insert(M); + } SmallVector<llvm::Function *, 8> ModuleInits; for (Module *M : AllImports) { // No Itanium initializer in header like modules. if (M->isHeaderLikeModule()) continue; // TODO: warn of mixed use of module map modules and C++20? + // We're allowed to skip the initialization if we are sure it doesn't + // do any thing. + if (!M->isNamedModuleInterfaceHasInit()) + continue; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); SmallString<256> FnName; { @@ -731,8 +755,7 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) { // If we have a completely empty initializer then we do not want to create // the guard variable. ConstantAddress GuardAddr = ConstantAddress::invalid(); - if (!AllImports.empty() || !PrioritizedCXXGlobalInits.empty() || - !CXXGlobalInits.empty()) { + if (!ModuleInits.empty()) { // Create the guard var. llvm::GlobalVariable *Guard = new llvm::GlobalVariable( getModule(), Int8Ty, /*isConstant=*/false, @@ -1120,7 +1143,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( bool useEHCleanupForArray, const VarDecl *VD) { FunctionArgList args; ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); args.push_back(&Dst); const CGFunctionInfo &FI = diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 9cb7d4c7731d..bae8babb8efe 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -263,12 +263,7 @@ static llvm::FunctionCallee getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { llvm::FunctionCallee Fn = getPersonalityFn(CGM, Personality); - llvm::PointerType* Int8PtrTy = llvm::PointerType::get( - llvm::Type::getInt8Ty(CGM.getLLVMContext()), - CGM.getDataLayout().getProgramAddressSpace()); - - return llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(Fn.getCallee()), - Int8PtrTy); + return cast<llvm::Constant>(Fn.getCallee()); } /// Check whether a landingpad instruction only uses C++ features. @@ -440,6 +435,15 @@ llvm::Value *CodeGenFunction::getSelectorFromSlot() { void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint) { + // If the exception is being emitted in an OpenMP target region, + // and the target is a GPU, we do not support exception handling. + // Therefore, we emit a trap which will abort the program, and + // prompt a warning indicating that a trap will be emitted. + const llvm::Triple &T = Target.getTriple(); + if (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())) { + EmitTrapCall(llvm::Intrinsic::trap); + return; + } if (const Expr *SubExpr = E->getSubExpr()) { QualType ThrowType = SubExpr->getType(); if (ThrowType->isObjCObjectPointerType()) { @@ -609,9 +613,16 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { } void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) { - EnterCXXTryStmt(S); + const llvm::Triple &T = Target.getTriple(); + // If we encounter a try statement on in an OpenMP target region offloaded to + // a GPU, we treat it as a basic block. + const bool IsTargetDevice = + (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())); + if (!IsTargetDevice) + EnterCXXTryStmt(S); EmitStmt(S.getTryBlock()); - ExitCXXTryStmt(S); + if (!IsTargetDevice) + ExitCXXTryStmt(S); } void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { @@ -1120,6 +1131,8 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF, // Select the right handler. llvm::Function *llvm_eh_typeid_for = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); + llvm::Type *argTy = llvm_eh_typeid_for->getArg(0)->getType(); + LangAS globAS = CGF.CGM.GetGlobalVarAddressSpace(nullptr); // Load the selector value. llvm::Value *selector = CGF.getSelectorFromSlot(); @@ -1133,7 +1146,11 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF, assert(handler.Type.Flags == 0 && "landingpads do not support catch handler flags"); assert(typeValue && "fell into catch-all case!"); - typeValue = CGF.Builder.CreateBitCast(typeValue, CGF.Int8PtrTy); + // With opaque ptrs, only the address space can be a mismatch. + if (typeValue->getType() != argTy) + typeValue = + CGF.getTargetHooks().performAddrSpaceCast(CGF, typeValue, globAS, + LangAS::Default, argTy); // Figure out the next block. bool nextIsEnd; @@ -1816,13 +1833,11 @@ Address CodeGenFunction::recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF, auto InsertPair = ParentCGF.EscapedLocals.insert( std::make_pair(ParentAlloca, ParentCGF.EscapedLocals.size())); int FrameEscapeIdx = InsertPair.first->second; - // call i8* @llvm.localrecover(i8* bitcast(@parentFn), i8* %fp, i32 N) + // call ptr @llvm.localrecover(ptr @parentFn, ptr %fp, i32 N) llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::localrecover); - llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); RecoverCall = Builder.CreateCall( - FrameRecoverFn, {ParentI8Fn, ParentFP, + FrameRecoverFn, {ParentCGF.CurFn, ParentFP, llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)}); } else { @@ -1885,9 +1900,7 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, // since finally funclets recover the parent FP for us. llvm::Function *RecoverFPIntrin = CGM.getIntrinsic(llvm::Intrinsic::eh_recoverfp); - llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); - ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP}); + ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentCGF.CurFn, EntryFP}); // if the parent is a _finally, the passed-in ParentFP is the FP // of parent _finally, not Establisher's FP (FP of outermost function). @@ -1915,19 +1928,15 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, int FrameEscapeIdx = InsertPair.first->second; // an example of a filter's prolog:: - // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..) - // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..) - // %2 = bitcast i8* %1 to i8** - // %3 = load i8*, i8* *%2, align 8 - // ==> %3 is the frame-pointer of outermost host function + // %0 = call ptr @llvm.eh.recoverfp(@"?fin$0@0@main@@",..) + // %1 = call ptr @llvm.localrecover(@"?fin$0@0@main@@",..) + // %2 = load ptr, ptr %1, align 8 + // ==> %2 is the frame-pointer of outermost host function llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::localrecover); - llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); ParentFP = Builder.CreateCall( - FrameRecoverFn, {ParentI8Fn, ParentFP, + FrameRecoverFn, {ParentCGF.CurFn, ParentFP, llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)}); - ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy); ParentFP = Builder.CreateLoad( Address(ParentFP, CGM.VoidPtrTy, getPointerAlign())); } @@ -2019,17 +2028,17 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, Args.push_back(ImplicitParamDecl::Create( getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("exception_pointers"), - getContext().VoidPtrTy, ImplicitParamDecl::Other)); + getContext().VoidPtrTy, ImplicitParamKind::Other)); } else { Args.push_back(ImplicitParamDecl::Create( getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("abnormal_termination"), - getContext().UnsignedCharTy, ImplicitParamDecl::Other)); + getContext().UnsignedCharTy, ImplicitParamKind::Other)); } Args.push_back(ImplicitParamDecl::Create( getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy, - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); } QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; @@ -2184,9 +2193,7 @@ void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) { // in place of the RTTI typeinfo global that C++ EH uses. llvm::Function *FilterFunc = HelperCGF.GenerateSEHFilterFunction(*this, *Except); - llvm::Constant *OpaqueFunc = - llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy); - CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret")); + CatchScope->setHandler(0, FilterFunc, createBasicBlock("__except.ret")); } void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ed6095f7cfeb..69cf7f76be9a 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -30,6 +30,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -51,6 +52,12 @@ using namespace clang; using namespace CodeGen; +// Experiment to make sanitizers easier to debug +static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization( + "ubsan-unique-traps", llvm::cl::Optional, + llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"), + llvm::cl::init(false)); + //===--------------------------------------------------------------------===// // Miscellaneous Helper Methods //===--------------------------------------------------------------------===// @@ -140,9 +147,8 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(), ArrayTy->getNumElements()); - Result = Address( - Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()), - VectorTy, Result.getAlignment(), KnownNonNull); + Result = Address(Result.getPointer(), VectorTy, Result.getAlignment(), + KnownNonNull); } return Result; } @@ -392,7 +398,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, QualType Ty = Inner->getType(); if (CGF.CGM.getCodeGenOpts().MergeAllConstants && (Ty->isArrayType() || Ty->isRecordType()) && - CGF.CGM.isTypeConstant(Ty, true, false)) + Ty.isConstantStorage(CGF.getContext(), true, false)) if (auto Init = ConstantEmitter(CGF).tryEmitAbstract(Inner, Ty)) { auto AS = CGF.CGM.GetGlobalConstantAddressSpace(); auto *GV = new llvm::GlobalVariable( @@ -444,9 +450,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { Address Object = createReferenceTemporary(*this, M, E); if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) { llvm::Type *Ty = ConvertTypeForMem(E->getType()); - Object = Address(llvm::ConstantExpr::getBitCast( - Var, Ty->getPointerTo(Object.getAddressSpace())), - Ty, Object.getAlignment()); + Object = Object.withElementType(Ty); // createReferenceTemporary will promote the temporary to a global with a // constant initializer if it can. It can only do this to a value of @@ -502,11 +506,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { if (auto *Var = dyn_cast<llvm::GlobalVariable>( Object.getPointer()->stripPointerCasts())) { llvm::Type *TemporaryType = ConvertTypeForMem(E->getType()); - Object = Address(llvm::ConstantExpr::getBitCast( - cast<llvm::Constant>(Object.getPointer()), - TemporaryType->getPointerTo()), - TemporaryType, - Object.getAlignment()); + Object = Object.withElementType(TemporaryType); // If the temporary is a global and has a constant initializer or is a // constant temporary that we promoted to a global, we may have already // initialized it. @@ -746,9 +746,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Min = Builder.getFalse(); llvm::Value *NullIsUnknown = Builder.getFalse(); llvm::Value *Dynamic = Builder.getFalse(); - llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); llvm::Value *LargeEnough = Builder.CreateICmpUGE( - Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown, Dynamic}), Size); + Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}), Size); Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); } } @@ -825,9 +824,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Load the vptr, and compute hash_16_bytes(TypeHash, vptr). llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash); - llvm::Type *VPtrTy = llvm::PointerType::get(IntPtrTy, 0); - Address VPtrAddr(Builder.CreateBitCast(Ptr, VPtrTy), IntPtrTy, - getPointerAlign()); + Address VPtrAddr(Ptr, IntPtrTy, getPointerAlign()); llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr); llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty); @@ -929,16 +926,27 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, if (CE->getCastKind() == CK_ArrayToPointerDecay && !CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(), StrictFlexArraysLevel)) { + CodeGenFunction::SanitizerScope SanScope(&CGF); + IndexedType = CE->getSubExpr()->getType(); const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) return CGF.Builder.getInt(CAT->getSize()); - else if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) + + if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) return CGF.getVLASize(VAT).NumElts; // Ignore pass_object_size here. It's not applicable on decayed pointers. } + + if (const ValueDecl *VD = CGF.FindCountedByField(Base)) { + IndexedType = Base->getType(); + const Expr *E = CGF.BuildCountedByFieldExpr(Base, VD); + return CGF.EmitAnyExprToTemp(E).getScalarVal(); + } } + CodeGenFunction::SanitizerScope SanScope(&CGF); + QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0}; if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) { IndexedType = Base->getType(); @@ -948,13 +956,122 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, return nullptr; } +const Expr * +CodeGenFunction::BuildCountedByFieldExpr(const Expr *Base, + const ValueDecl *CountedByVD) { + // Find the outer struct expr (i.e. p in p->a.b.c.d). + Expr *CountedByExpr = const_cast<Expr *>(Base)->IgnoreParenImpCasts(); + + // Work our way up the expression until we reach the DeclRefExpr. + while (!isa<DeclRefExpr>(CountedByExpr)) + if (const auto *ME = dyn_cast<MemberExpr>(CountedByExpr)) + CountedByExpr = ME->getBase()->IgnoreParenImpCasts(); + + // Add back an implicit cast to create the required pr-value. + CountedByExpr = ImplicitCastExpr::Create( + getContext(), CountedByExpr->getType(), CK_LValueToRValue, CountedByExpr, + nullptr, VK_PRValue, FPOptionsOverride()); + + if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountedByVD)) { + // The counted_by field is inside an anonymous struct / union. The + // IndirectFieldDecl has the correct order of FieldDecls to build this + // easily. (Yay!) + for (NamedDecl *ND : IFD->chain()) { + auto *VD = cast<ValueDecl>(ND); + CountedByExpr = + MemberExpr::CreateImplicit(getContext(), CountedByExpr, + CountedByExpr->getType()->isPointerType(), + VD, VD->getType(), VK_LValue, OK_Ordinary); + } + } else { + CountedByExpr = MemberExpr::CreateImplicit( + getContext(), const_cast<Expr *>(CountedByExpr), + CountedByExpr->getType()->isPointerType(), + const_cast<ValueDecl *>(CountedByVD), CountedByVD->getType(), VK_LValue, + OK_Ordinary); + } + + return CountedByExpr; +} + +const ValueDecl * +CodeGenFunction::FindFlexibleArrayMemberField(ASTContext &Ctx, + const RecordDecl *RD) { + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + + for (const Decl *D : RD->decls()) { + if (const auto *VD = dyn_cast<ValueDecl>(D); + VD && Decl::isFlexibleArrayMemberLike( + Ctx, VD, VD->getType(), StrictFlexArraysLevel, + /*IgnoreTemplateOrMacroSubstitution=*/true)) + return VD; + + if (const auto *Record = dyn_cast<RecordDecl>(D)) + if (const ValueDecl *VD = FindFlexibleArrayMemberField(Ctx, Record)) + return VD; + } + + return nullptr; +} + +const ValueDecl *CodeGenFunction::FindCountedByField(const Expr *Base) { + ASTContext &Ctx = getContext(); + const RecordDecl *OuterRD = nullptr; + const FieldDecl *FD = nullptr; + + Base = Base->IgnoreParenImpCasts(); + + // Get the outer-most lexical RecordDecl. + if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { + QualType Ty = DRE->getDecl()->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + + if (const auto *RD = Ty->getAsRecordDecl()) + OuterRD = RD->getOuterLexicalRecordContext(); + } else if (const auto *ME = dyn_cast<MemberExpr>(Base)) { + if (const ValueDecl *MD = ME->getMemberDecl()) { + OuterRD = MD->getDeclContext()->getOuterLexicalRecordContext(); + + const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + getLangOpts().getStrictFlexArraysLevel(); + if (Decl::isFlexibleArrayMemberLike( + Ctx, MD, MD->getType(), StrictFlexArraysLevel, + /*IgnoreTemplateOrMacroSubstitution=*/true)) + // Base is referencing the FAM itself. + FD = dyn_cast<FieldDecl>(MD); + } + } + + if (!OuterRD) + return nullptr; + + if (!FD) { + const ValueDecl *VD = FindFlexibleArrayMemberField(Ctx, OuterRD); + FD = dyn_cast_if_present<FieldDecl>(VD); + if (!FD) + return nullptr; + } + + const auto *CBA = FD->getAttr<CountedByAttr>(); + if (!CBA) + return nullptr; + + DeclarationName DName(CBA->getCountedByField()); + DeclContext::lookup_result Lookup = OuterRD->lookup(DName); + + if (Lookup.empty()) + return nullptr; + + return dyn_cast<ValueDecl>(Lookup.front()); +} + void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed) { assert(SanOpts.has(SanitizerKind::ArrayBounds) && "should not be called unless adding bounds checks"); - SanitizerScope SanScope(this); - const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = getLangOpts().getStrictFlexArraysLevel(); @@ -964,6 +1081,8 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, if (!Bound) return; + SanitizerScope SanScope(this); + bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false); @@ -1216,7 +1335,7 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, const char *Name) { ErrorUnsupported(E, Name); llvm::Type *ElTy = ConvertType(E->getType()); - llvm::Type *Ty = llvm::PointerType::getUnqual(ElTy); + llvm::Type *Ty = UnqualPtrTy; return MakeAddrLValue( Address(llvm::UndefValue::get(Ty), ElTy, CharUnits::One()), E->getType()); } @@ -2039,6 +2158,14 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(), LV.isVolatileQualified()); + // HLSL allows treating scalars as one-element vectors. Converting the scalar + // IR value to a vector here allows the rest of codegen to behave as normal. + if (getLangOpts().HLSL && !Vec->getType()->isVectorTy()) { + llvm::Type *DstTy = llvm::FixedVectorType::get(Vec->getType(), 1); + llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); + Vec = Builder.CreateInsertElement(DstTy, Vec, Zero, "cast.splat"); + } + const llvm::Constant *Elts = LV.getExtVectorElts(); // If the result of the expression is a non-vector type, we must be extracting @@ -2308,10 +2435,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst) { + // HLSL allows storing to scalar values through ExtVector component LValues. + // To support this we need to handle the case where the destination address is + // a scalar. + Address DstAddr = Dst.getExtVectorAddress(); + if (!DstAddr.getElementType()->isVectorTy()) { + assert(!Dst.getType()->isVectorType() && + "this should only occur for non-vector l-values"); + Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified()); + return; + } + // This access turns into a read/modify/write of the vector. Load the input // value now. - llvm::Value *Vec = Builder.CreateLoad(Dst.getExtVectorAddress(), - Dst.isVolatileQualified()); + llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified()); const llvm::Constant *Elts = Dst.getExtVectorElts(); llvm::Value *SrcVal = Src.getScalarVal(); @@ -2359,7 +2496,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, llvm_unreachable("unexpected shorten vector length"); } } else { - // If the Src is a scalar (not a vector) it must be updating one element. + // If the Src is a scalar (not a vector), and the target is a vector it must + // be updating one element. unsigned InIdx = getAccessedFieldNo(0, Elts); llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt); @@ -2492,14 +2630,6 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, } } -static llvm::Value * -EmitBitCastOfLValueToProperType(CodeGenFunction &CGF, - llvm::Value *V, llvm::Type *IRType, - StringRef Name = StringRef()) { - unsigned AS = cast<llvm::PointerType>(V->getType())->getAddressSpace(); - return CGF.Builder.CreateBitCast(V, IRType->getPointerTo(AS), Name); -} - static LValue EmitThreadPrivateVarDeclLValue( CodeGenFunction &CGF, const VarDecl *VD, QualType T, Address Addr, llvm::Type *RealVarTy, SourceLocation Loc) { @@ -2600,7 +2730,6 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, V = CGF.Builder.CreateThreadLocalAddress(V); llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); - V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Address Addr(V, RealVarTy, Alignment); // Emit reference to the private copy of the variable if it is an OpenMP @@ -2627,19 +2756,6 @@ static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM, } llvm::Constant *V = CGM.GetAddrOfFunction(GD); - if (!FD->hasPrototype()) { - if (const FunctionProtoType *Proto = - FD->getType()->getAs<FunctionProtoType>()) { - // Ugly case: for a K&R-style definition, the type of the definition - // isn't the same as the type of a use. Correct for this with a - // bitcast. - QualType NoProtoType = - CGM.getContext().getFunctionNoProtoType(Proto->getReturnType()); - NoProtoType = CGM.getContext().getPointerType(NoProtoType); - V = llvm::ConstantExpr::getBitCast(V, - CGM.getTypes().ConvertType(NoProtoType)); - } - } return V; } @@ -2654,9 +2770,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E, static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD, llvm::Value *ThisValue) { - QualType TagType = CGF.getContext().getTagDeclType(FD->getParent()); - LValue LV = CGF.MakeNaturalAlignAddrLValue(ThisValue, TagType); - return CGF.EmitLValueForField(LV, FD); + + return CGF.EmitLValueForLambdaField(FD, ThisValue); } /// Named Registers are named metadata pointing to the register name @@ -2692,8 +2807,7 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) { /// this context. static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, const DeclRefExpr *E, - const VarDecl *VD, - bool IsConstant) { + const VarDecl *VD) { // For a variable declared in an enclosing scope, do not emit a spurious // reference even if we have a capture, as that will emit an unwarranted // reference to our capture state, and will likely generate worse code than @@ -2726,7 +2840,7 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, // We can emit a spurious reference only if the linkage implies that we'll // be emitting a non-interposable symbol that will be retained until link // time. - switch (CGF.CGM.getLLVMLinkageVarDefinition(VD, IsConstant)) { + switch (CGF.CGM.getLLVMLinkageVarDefinition(VD)) { case llvm::GlobalValue::ExternalLinkage: case llvm::GlobalValue::LinkOnceODRLinkage: case llvm::GlobalValue::WeakODRLinkage: @@ -2757,7 +2871,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // constant value directly instead. if (E->isNonOdrUse() == NOUR_Constant && (VD->getType()->isReferenceType() || - !canEmitSpuriousReferenceToVariable(*this, E, VD, true))) { + !canEmitSpuriousReferenceToVariable(*this, E, VD))) { VD->getAnyInitializer(VD); llvm::Constant *Val = ConstantEmitter(*this).emitAbstract( E->getLocation(), *VD->evaluateValue(), VD->getType()); @@ -2859,7 +2973,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // some reason; most likely, because it's in an outer function. } else if (VD->isStaticLocal()) { llvm::Constant *var = CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD)); addr = Address( var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); @@ -2943,9 +3057,20 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { return MakeAddrLValue(CGM.GetAddrOfMSGuidDecl(GD), T, AlignmentSource::Decl); - if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND)) - return MakeAddrLValue(CGM.GetAddrOfTemplateParamObject(TPO), T, - AlignmentSource::Decl); + if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND)) { + auto ATPO = CGM.GetAddrOfTemplateParamObject(TPO); + auto AS = getLangASFromTargetAS(ATPO.getAddressSpace()); + + if (AS != T.getAddressSpace()) { + auto TargetAS = getContext().getTargetAddressSpace(T.getAddressSpace()); + auto PtrTy = ATPO.getElementType()->getPointerTo(TargetAS); + auto ASC = getTargetHooks().performAddrSpaceCast( + CGM, ATPO.getPointer(), AS, T.getAddressSpace(), PtrTy); + ATPO = ConstantAddress(ASC, ATPO.getElementType(), ATPO.getAlignment()); + } + + return MakeAddrLValue(ATPO, T, AlignmentSource::Decl); + } llvm_unreachable("Unhandled DeclRefExpr"); } @@ -3421,8 +3546,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( "__cfi_slowpath_diag", llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false)); - CheckCall = Builder.CreateCall( - SlowPathFn, {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)}); + CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr, InfoPtr}); } else { SlowPathFn = CGM.getModule().getOrInsertFunction( "__cfi_slowpath", @@ -3445,14 +3569,12 @@ void CodeGenFunction::EmitCfiCheckStub() { llvm::Function *F = llvm::Function::Create( llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false), llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M); + F->setAlignment(llvm::Align(4096)); CGM.setDSOLocal(F); llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F); - // FIXME: consider emitting an intrinsic call like - // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2) - // which can be lowered in CrossDSOCFI pass to the actual contents of - // __cfi_check. This would allow inlining of __cfi_check calls. - llvm::CallInst::Create( - llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB); + // CrossDSOCFI pass is not executed if there is no executable code. + SmallVector<llvm::Value*> Args{F->getArg(2), F->getArg(1)}; + llvm::CallInst::Create(M->getFunction("__cfi_check_fail"), Args, "", BB); llvm::ReturnInst::Create(Ctx, nullptr, BB); } @@ -3467,9 +3589,9 @@ void CodeGenFunction::EmitCfiCheckFail() { SanitizerScope SanScope(this); FunctionArgList Args; ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&ArgData); Args.push_back(&ArgAddr); @@ -3570,17 +3692,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, // check-type per function to save on code size. if (TrapBBs.size() <= CheckHandlerID) TrapBBs.resize(CheckHandlerID + 1); + llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; - if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB || - (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) { + if (!ClSanitizeDebugDeoptimization && + CGM.getCodeGenOpts().OptimizationLevel && TrapBB && + (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) { + auto Call = TrapBB->begin(); + assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB"); + + Call->applyMergedLocation(Call->getDebugLoc(), + Builder.getCurrentDebugLocation()); + Builder.CreateCondBr(Checked, Cont, TrapBB); + } else { TrapBB = createBasicBlock("trap"); Builder.CreateCondBr(Checked, Cont, TrapBB); EmitBlock(TrapBB); - llvm::CallInst *TrapCall = - Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap), - llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID)); + llvm::CallInst *TrapCall = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::ubsantrap), + llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization + ? TrapBB->getParent()->size() + : CheckHandlerID)); if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", @@ -3590,13 +3723,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); Builder.CreateUnreachable(); - } else { - auto Call = TrapBB->begin(); - assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB"); - - Call->applyMergedLocation(Call->getDebugLoc(), - Builder.getCurrentDebugLocation()); - Builder.CreateCondBr(Checked, Cont, TrapBB); } EmitBlock(Cont); @@ -3707,6 +3833,33 @@ static QualType getFixedSizeElementType(const ASTContext &ctx, return eltType; } +static bool hasBPFPreserveStaticOffset(const RecordDecl *D) { + return D && D->hasAttr<BPFPreserveStaticOffsetAttr>(); +} + +static bool hasBPFPreserveStaticOffset(const Expr *E) { + if (!E) + return false; + QualType PointeeType = E->getType()->getPointeeType(); + if (PointeeType.isNull()) + return false; + if (const auto *BaseDecl = PointeeType->getAsRecordDecl()) + return hasBPFPreserveStaticOffset(BaseDecl); + return false; +} + +// Wraps Addr with a call to llvm.preserve.static.offset intrinsic. +static Address wrapWithBPFPreserveStaticOffset(CodeGenFunction &CGF, + Address &Addr) { + if (!CGF.getTarget().getTriple().isBPF()) + return Addr; + + llvm::Function *Fn = + CGF.CGM.getIntrinsic(llvm::Intrinsic::preserve_static_offset); + llvm::CallInst *Call = CGF.Builder.CreateCall(Fn, {Addr.getPointer()}); + return Address(Call, Addr.getElementType(), Addr.getAlignment()); +} + /// Given an array base, check whether its member access belongs to a record /// with preserve_access_index attribute or not. static bool IsPreserveAIArrayBase(CodeGenFunction &CGF, const Expr *ArrayBase) { @@ -3768,6 +3921,9 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); + if (hasBPFPreserveStaticOffset(Base)) + addr = wrapWithBPFPreserveStaticOffset(CGF, addr); + llvm::Value *eltPtr; auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back()); if (!LastIndex || @@ -4269,17 +4425,38 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { /// Given that we are currently emitting a lambda, emit an l-value for /// one of its members. -LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { - if (CurCodeDecl) { - assert(cast<CXXMethodDecl>(CurCodeDecl)->getParent()->isLambda()); - assert(cast<CXXMethodDecl>(CurCodeDecl)->getParent() == Field->getParent()); +/// +LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field, + llvm::Value *ThisValue) { + bool HasExplicitObjectParameter = false; + if (const auto *MD = dyn_cast_if_present<CXXMethodDecl>(CurCodeDecl)) { + HasExplicitObjectParameter = MD->isExplicitObjectMemberFunction(); + assert(MD->getParent()->isLambda()); + assert(MD->getParent() == Field->getParent()); + } + LValue LambdaLV; + if (HasExplicitObjectParameter) { + const VarDecl *D = cast<CXXMethodDecl>(CurCodeDecl)->getParamDecl(0); + auto It = LocalDeclMap.find(D); + assert(It != LocalDeclMap.end() && "explicit parameter not loaded?"); + Address AddrOfExplicitObject = It->getSecond(); + if (D->getType()->isReferenceType()) + LambdaLV = EmitLoadOfReferenceLValue(AddrOfExplicitObject, D->getType(), + AlignmentSource::Decl); + else + LambdaLV = MakeNaturalAlignAddrLValue(AddrOfExplicitObject.getPointer(), + D->getType().getNonReferenceType()); + } else { + QualType LambdaTagType = getContext().getTagDeclType(Field->getParent()); + LambdaLV = MakeNaturalAlignAddrLValue(ThisValue, LambdaTagType); } - QualType LambdaTagType = - getContext().getTagDeclType(Field->getParent()); - LValue LambdaLV = MakeNaturalAlignAddrLValue(CXXABIThisValue, LambdaTagType); return EmitLValueForField(LambdaLV, Field); } +LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { + return EmitLValueForLambdaField(Field, CXXABIThisValue); +} + /// Get the field index in the debug info. The debug info structure/union /// will ignore the unnamed bitfields. unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, @@ -4375,6 +4552,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); + if (hasBPFPreserveStaticOffset(rec)) + Addr = wrapWithBPFPreserveStaticOffset(*this, Addr); if (!UseVolatile) { if (!IsInPreservedAIRegion && (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) { @@ -4447,6 +4626,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, } Address addr = base.getAddress(*this); + if (hasBPFPreserveStaticOffset(rec)) + addr = wrapWithBPFPreserveStaticOffset(*this, addr); if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) { if (CGM.getCodeGenOpts().StrictVTablePointers && ClassDef->isDynamicClass()) { @@ -4616,7 +4797,7 @@ std::optional<LValue> HandleConditionalOperatorLValueSimpleCase( if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Live->IgnoreParens())) { CGF.EmitCXXThrowExpr(ThrowExpr); llvm::Type *ElemTy = CGF.ConvertType(Dead->getType()); - llvm::Type *Ty = llvm::PointerType::getUnqual(ElemTy); + llvm::Type *Ty = CGF.UnqualPtrTy; return CGF.MakeAddrLValue( Address(llvm::UndefValue::get(Ty), ElemTy, CharUnits::One()), Dead->getType()); @@ -4751,7 +4932,6 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_IntegralToPointer: case CK_PointerToIntegral: case CK_PointerToBoolean: - case CK_VectorSplat: case CK_IntegralCast: case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: @@ -4819,6 +4999,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { // bound and change the IR type. // FIXME: Once pointee types are removed from IR, remove this. LValue LV = EmitLValue(E->getSubExpr()); + // Propagate the volatile qualifer to LValue, if exist in E. + if (E->changesVolatileQualification()) + LV.getQuals() = E->getType().getQualifiers(); if (LV.isSimple()) { Address V = LV.getAddress(*this); if (V.isValid()) { @@ -4913,6 +5096,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { } case CK_ZeroToOCLOpaqueType: llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid"); + + case CK_VectorSplat: { + // LValue results of vector splats are only supported in HLSL. + if (!getLangOpts().HLSL) + return EmitUnsupportedLValue(E, "unexpected cast lvalue"); + return EmitLValue(E->getSubExpr()); + } } llvm_unreachable("Unhandled lvalue cast kind?"); @@ -4991,9 +5181,12 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E, if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E)) return EmitCUDAKernelCallExpr(CE, ReturnValue); + // A CXXOperatorCallExpr is created even for explicit object methods, but + // these should be treated like static function call. if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E)) - if (const CXXMethodDecl *MD = - dyn_cast_or_null<CXXMethodDecl>(CE->getCalleeDecl())) + if (const auto *MD = + dyn_cast_if_present<CXXMethodDecl>(CE->getCalleeDecl()); + MD && MD->isImplicitObjectMemberFunction()) return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue); CGCallee callee = EmitCallee(E->getCallee()); @@ -5365,8 +5558,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee AlignedCalleePtr = CalleePtr; } - llvm::Value *CalleePrefixStruct = Builder.CreateBitCast( - AlignedCalleePtr, llvm::PointerType::getUnqual(PrefixStructTy)); + llvm::Value *CalleePrefixStruct = AlignedCalleePtr; llvm::Value *CalleeSigPtr = Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 0); llvm::Value *CalleeSig = @@ -5413,9 +5605,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CalleePtr = Callee.getFunctionPointer(); - llvm::Value *CastedCallee = Builder.CreateBitCast(CalleePtr, Int8PtrTy); llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId}); + CGM.getIntrinsic(llvm::Intrinsic::type_test), {CalleePtr, TypeId}); auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); llvm::Constant *StaticData[] = { @@ -5425,18 +5616,17 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee }; if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId, - CastedCallee, StaticData); + CalleePtr, StaticData); } else { EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall), SanitizerHandler::CFICheckFail, StaticData, - {CastedCallee, llvm::UndefValue::get(IntPtrTy)}); + {CalleePtr, llvm::UndefValue::get(IntPtrTy)}); } } CallArgList Args; if (Chain) - Args.add(RValue::get(Builder.CreateBitCast(Chain, CGM.VoidPtrTy)), - CGM.getContext().VoidPtrTy); + Args.add(RValue::get(Chain), CGM.getContext().VoidPtrTy); // C++17 requires that we evaluate arguments to a call using assignment syntax // right-to-left, and that we evaluate arguments to certain other operators @@ -5507,10 +5697,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee isa<CUDAKernelCallExpr>(E) && (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { llvm::Value *Handle = Callee.getFunctionPointer(); - auto *Cast = - Builder.CreateBitCast(Handle, Handle->getType()->getPointerTo()); auto *Stub = Builder.CreateLoad( - Address(Cast, Handle->getType(), CGM.getPointerAlign())); + Address(Handle, Handle->getType(), CGM.getPointerAlign())); Callee.setFunctionPointer(Stub); } llvm::CallBase *CallOrInvoke = nullptr; diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 4d3f3e9603d9..98ae56e2df88 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -41,7 +41,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD, assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE)); - assert(MD->isInstance() && + assert(MD->isImplicitObjectMemberFunction() && "Trying to emit a member or operator call expr on a static method!"); // Push the this ptr. @@ -66,7 +66,12 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD, Args.addFrom(*RtlArgs); } else if (CE) { // Special case: skip first argument of CXXOperatorCall (it is "this"). - unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0; + unsigned ArgsToSkip = 0; + if (const auto *Op = dyn_cast<CXXOperatorCallExpr>(CE)) { + if (const auto *M = dyn_cast<CXXMethodDecl>(Op->getCalleeDecl())) + ArgsToSkip = + static_cast<unsigned>(!M->isExplicitObjectMemberFunction()); + } CGF.EmitCallArgs(Args, FPT, drop_begin(CE->arguments(), ArgsToSkip), CE->getDirectCallee()); } else { @@ -484,7 +489,7 @@ RValue CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue) { - assert(MD->isInstance() && + assert(MD->isImplicitObjectMemberFunction() && "Trying to emit a member call expr on a static method!"); return EmitCXXMemberOrOperatorMemberCallExpr( E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr, @@ -595,12 +600,12 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, // already zeroed. if (E->requiresZeroInitialization() && !Dest.isZeroed()) { switch (E->getConstructionKind()) { - case CXXConstructExpr::CK_Delegating: - case CXXConstructExpr::CK_Complete: + case CXXConstructionKind::Delegating: + case CXXConstructionKind::Complete: EmitNullInitialization(Dest.getAddress(), E->getType()); break; - case CXXConstructExpr::CK_VirtualBase: - case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructionKind::VirtualBase: + case CXXConstructionKind::NonVirtualBase: EmitNullBaseClassInitialization(*this, Dest.getAddress(), CD->getParent()); break; @@ -636,21 +641,21 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, bool Delegating = false; switch (E->getConstructionKind()) { - case CXXConstructExpr::CK_Delegating: + case CXXConstructionKind::Delegating: // We should be emitting a constructor; GlobalDecl will assert this Type = CurGD.getCtorType(); Delegating = true; break; - case CXXConstructExpr::CK_Complete: + case CXXConstructionKind::Complete: Type = Ctor_Complete; break; - case CXXConstructExpr::CK_VirtualBase: + case CXXConstructionKind::VirtualBase: ForVirtualBase = true; [[fallthrough]]; - case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructionKind::NonVirtualBase: Type = Ctor_Base; } @@ -1101,9 +1106,7 @@ void CodeGenFunction::EmitNewArrayInitializer( // element. TODO: some of these stores can be trivially // observed to be unnecessary. if (EndOfInit.isValid()) { - auto FinishedPtr = - Builder.CreateBitCast(CurPtr.getPointer(), BeginPtr.getType()); - Builder.CreateStore(FinishedPtr, EndOfInit); + Builder.CreateStore(CurPtr.getPointer(), EndOfInit); } // FIXME: If the last initializer is an incomplete initializer list for // an array, and we have an array filler, we can fold together the two @@ -2195,11 +2198,19 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); + LangAS GlobAS = CGM.GetGlobalVarAddressSpace(nullptr); + + auto MaybeASCast = [=](auto &&TypeInfo) { + if (GlobAS == LangAS::Default) + return TypeInfo; + return getTargetHooks().performAddrSpaceCast(CGM,TypeInfo, GlobAS, + LangAS::Default, PtrTy); + }; if (E->isTypeOperand()) { llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(E->getTypeOperand(getContext())); - return TypeInfo; + return MaybeASCast(TypeInfo); } // C++ [expr.typeid]p2: @@ -2212,7 +2223,7 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { return EmitTypeidFromVTable(*this, E->getExprOperand(), PtrTy); QualType OperandTy = E->getExprOperand()->getType(); - return CGM.GetAddrOfRTTIDescriptor(OperandTy); + return MaybeASCast(CGM.GetAddrOfRTTIDescriptor(OperandTy)); } static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 2dd1a991ec97..f3cbd1d0451e 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -177,11 +177,15 @@ public: ComplexPairTy VisitImplicitCastExpr(ImplicitCastExpr *E) { // Unlike for scalars, we don't have to worry about function->ptr demotion // here. + if (E->changesVolatileQualification()) + return EmitLoadOfLValue(E); return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType()); } ComplexPairTy VisitCastExpr(CastExpr *E) { if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E)) CGF.CGM.EmitExplicitCastExprType(ECE, &CGF); + if (E->changesVolatileQualification()) + return EmitLoadOfLValue(E); return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType()); } ComplexPairTy VisitCallExpr(const CallExpr *E); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 353ee56839f3..604e3958161d 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -25,6 +25,7 @@ #include "clang/Basic/Builtins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -934,7 +935,7 @@ tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter, auto GV = new llvm::GlobalVariable( CGM.getModule(), C->getType(), - CGM.isTypeConstant(E->getType(), true, false), + E->getType().isConstantStorage(CGM.getContext(), true, false), llvm::GlobalValue::InternalLinkage, C, ".compoundliteral", nullptr, llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(addressSpace)); @@ -1127,9 +1128,36 @@ public: case CK_ConstructorConversion: return Visit(subExpr, destType); + case CK_ArrayToPointerDecay: + if (const auto *S = dyn_cast<StringLiteral>(subExpr)) + return CGM.GetAddrOfConstantStringFromLiteral(S).getPointer(); + return nullptr; + case CK_NullToPointer: + if (Visit(subExpr, destType)) + return CGM.EmitNullConstant(destType); + return nullptr; + case CK_IntToOCLSampler: llvm_unreachable("global sampler variables are not generated"); + case CK_IntegralCast: { + QualType FromType = subExpr->getType(); + // See also HandleIntToIntCast in ExprConstant.cpp + if (FromType->isIntegerType()) + if (llvm::Constant *C = Visit(subExpr, FromType)) + if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) { + unsigned SrcWidth = CGM.getContext().getIntWidth(FromType); + unsigned DstWidth = CGM.getContext().getIntWidth(destType); + if (DstWidth == SrcWidth) + return CI; + llvm::APInt A = FromType->isSignedIntegerType() + ? CI->getValue().sextOrTrunc(DstWidth) + : CI->getValue().zextOrTrunc(DstWidth); + return llvm::ConstantInt::get(CGM.getLLVMContext(), A); + } + return nullptr; + } + case CK_Dependent: llvm_unreachable("saw dependent cast!"); case CK_BuiltinFnToFnPtr: @@ -1164,7 +1192,6 @@ public: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: - case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_BaseToDerived: case CK_DerivedToBase: @@ -1183,8 +1210,6 @@ public: case CK_IntegralComplexToFloatingComplex: case CK_PointerToIntegral: case CK_PointerToBoolean: - case CK_NullToPointer: - case CK_IntegralCast: case CK_BooleanToSignedIntegral: case CK_IntegralToPointer: case CK_IntegralToBoolean: @@ -1215,6 +1240,10 @@ public: return Visit(E->getSubExpr(), T); } + llvm::Constant *VisitIntegerLiteral(IntegerLiteral *I, QualType T) { + return llvm::ConstantInt::get(CGM.getLLVMContext(), I->getValue()); + } + llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType()); assert(CAT && "can't emit array init for non-constant-bound array"); @@ -1352,6 +1381,13 @@ public: return Visit(E->getSubExpr(), T); } + llvm::Constant *VisitUnaryMinus(UnaryOperator *U, QualType T) { + if (llvm::Constant *C = Visit(U->getSubExpr(), T)) + if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) + return llvm::ConstantInt::get(CGM.getLLVMContext(), -CI->getValue()); + return nullptr; + } + // Utility methods llvm::Type *ConvertType(QualType T) { return CGM.getTypes().ConvertType(T); @@ -1594,13 +1630,8 @@ namespace { IndexValues[i] = llvm::ConstantInt::get(CGM.Int32Ty, Indices[i]); } - // Form a GEP and then bitcast to the placeholder type so that the - // replacement will succeed. - llvm::Constant *location = - llvm::ConstantExpr::getInBoundsGetElementPtr(BaseValueTy, - Base, IndexValues); - location = llvm::ConstantExpr::getBitCast(location, - placeholder->getType()); + llvm::Constant *location = llvm::ConstantExpr::getInBoundsGetElementPtr( + BaseValueTy, Base, IndexValues); Locations.insert({placeholder, location}); } @@ -1726,7 +1757,10 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, // Zero-extend bool. if (C->getType()->isIntegerTy(1) && !destType->isBitIntType()) { llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType); - return llvm::ConstantExpr::getZExt(C, boolTy); + llvm::Constant *Res = llvm::ConstantFoldCastOperand( + llvm::Instruction::ZExt, C, boolTy, CGM.getDataLayout()); + assert(Res && "Constant folding must succeed"); + return Res; } return C; @@ -1736,9 +1770,10 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, QualType destType) { assert(!destType->isVoidType() && "can't emit a void constant"); - if (llvm::Constant *C = - ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType)) - return C; + if (!destType->isReferenceType()) + if (llvm::Constant *C = + ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType)) + return C; Expr::EvalResult Result; @@ -1826,10 +1861,7 @@ private: if (!hasNonZeroOffset()) return C; - llvm::Type *origPtrTy = C->getType(); - C = llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset()); - C = llvm::ConstantExpr::getPointerCast(C, origPtrTy); - return C; + return llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset()); } }; @@ -1890,8 +1922,9 @@ ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { // FIXME: signedness depends on the original integer type. auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); llvm::Constant *C; - C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, - /*isSigned*/ false); + C = llvm::ConstantFoldIntegerCast(getOffset(), intptrTy, /*isSigned*/ false, + CGM.getDataLayout()); + assert(C && "Must have folded, as Offset is a ConstantInt"); C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); return C; } @@ -1918,7 +1951,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { if (VD->isLocalVarDecl()) { return CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD)); } } } @@ -1996,8 +2029,6 @@ ConstantLValue ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) { assert(Emitter.CGF && "Invalid address of label expression outside function"); llvm::Constant *Ptr = Emitter.CGF->GetAddrOfLabel(E->getLabel()); - Ptr = llvm::ConstantExpr::getBitCast(Ptr, - CGM.getTypes().ConvertType(E->getType())); return Ptr; } @@ -2112,6 +2143,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, Inits[I] = llvm::ConstantInt::get(CGM.getLLVMContext(), Elt.getInt()); else if (Elt.isFloat()) Inits[I] = llvm::ConstantFP::get(CGM.getLLVMContext(), Elt.getFloat()); + else if (Elt.isIndeterminate()) + Inits[I] = llvm::UndefValue::get(CGM.getTypes().ConvertType( + DestType->castAs<VectorType>()->getElementType())); else llvm_unreachable("unsupported vector element type"); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index fe1a59b21f38..41ad2ddac30d 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1798,7 +1798,7 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { // careful, because the base of a vector subscript is occasionally an rvalue, // so we can't get it as an lvalue. if (!E->getBase()->getType()->isVectorType() && - !E->getBase()->getType()->isVLSTBuiltinType()) + !E->getBase()->getType()->isSveVLSBuiltinType()) return EmitLoadOfLValue(E); // Handle the vector case. The base must be a vector, the index must be an @@ -2084,11 +2084,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Value *Src = Visit(const_cast<Expr*>(E)); llvm::Type *SrcTy = Src->getType(); llvm::Type *DstTy = ConvertType(DestTy); - if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() && - SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) { - llvm_unreachable("wrong cast for pointers in different address spaces" - "(must be an address space cast)!"); - } + assert( + (!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() || + SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) && + "Address-space cast must be used to convert address spaces"); if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { if (auto *PT = DestTy->getAs<PointerType>()) { @@ -2225,16 +2224,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return Visit(const_cast<Expr*>(E)); case CK_NoOp: { - llvm::Value *V = Visit(const_cast<Expr *>(E)); - if (V) { - // CK_NoOp can model a pointer qualification conversion, which can remove - // an array bound and change the IR type. - // FIXME: Once pointee types are removed from IR, remove this. - llvm::Type *T = ConvertType(DestTy); - if (T != V->getType()) - V = Builder.CreateBitCast(V, T); - } - return V; + return CE->changesVolatileQualification() ? EmitLoadOfLValue(CE) + : Visit(const_cast<Expr *>(E)); } case CK_BaseToDerived: { @@ -2580,7 +2571,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement return Builder.CreateAtomicRMW( - llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True, + llvm::AtomicRMWInst::Xchg, LV.getAddress(CGF), True, llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit @@ -2598,7 +2589,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt, + Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } @@ -2764,8 +2755,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, amt = llvm::ConstantFP::get(VMContext, llvm::APFloat(static_cast<double>(amount))); else { - // Remaining types are Half, LongDouble, __ibm128 or __float128. Convert - // from float. + // Remaining types are Half, Bfloat16, LongDouble, __ibm128 or __float128. + // Convert from float. llvm::APFloat F(static_cast<float>(amount)); bool ignored; const llvm::fltSemantics *FS; @@ -2775,6 +2766,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, FS = &CGF.getTarget().getFloat128Format(); else if (value->getType()->isHalfTy()) FS = &CGF.getTarget().getHalfFormat(); + else if (value->getType()->isBFloatTy()) + FS = &CGF.getTarget().getBFloat16Format(); else if (value->getType()->isPPC_FP128Ty()) FS = &CGF.getTarget().getIbm128Format(); else @@ -2928,7 +2921,7 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { // Perform vector logical not on comparison with zero vector. if (E->getType()->isVectorType() && E->getType()->castAs<VectorType>()->getVectorKind() == - VectorType::GenericVector) { + VectorKind::Generic) { Value *Oper = Visit(E->getSubExpr()); Value *Zero = llvm::Constant::getNullValue(Oper->getType()); Value *Result; @@ -3050,9 +3043,10 @@ Value * ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( const UnaryExprOrTypeTraitExpr *E) { QualType TypeToSize = E->getTypeOfArgument(); - if (E->getKind() == UETT_SizeOf) { + if (auto Kind = E->getKind(); + Kind == UETT_SizeOf || Kind == UETT_DataSizeOf) { if (const VariableArrayType *VAT = - CGF.getContext().getAsVariableArrayType(TypeToSize)) { + CGF.getContext().getAsVariableArrayType(TypeToSize)) { if (E->isArgumentType()) { // sizeof(type) - make sure to emit the VLA size. CGF.EmitVariablyModifiedType(TypeToSize); @@ -3079,6 +3073,9 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( E->getTypeOfArgument()->getPointeeType())) .getQuantity(); return llvm::ConstantInt::get(CGF.SizeTy, Alignment); + } else if (E->getKind() == UETT_VectorElements) { + auto *VecTy = cast<llvm::VectorType>(ConvertType(E->getTypeOfArgument())); + return Builder.CreateElementCount(CGF.SizeTy, VecTy->getElementCount()); } // If this isn't sizeof(vla), the result must be constant; use the constant @@ -3317,7 +3314,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( E->getExprLoc()), LHSTy); Value *OldVal = Builder.CreateAtomicRMW( - AtomicOp, LHSLV.getPointer(CGF), Amt, + AtomicOp, LHSLV.getAddress(CGF), Amt, llvm::AtomicOrdering::SequentiallyConsistent); // Since operation is atomic, the result type is guaranteed to be the @@ -3688,8 +3685,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, index = CGF.Builder.CreateMul(index, objectSize); - Value *result = CGF.Builder.CreateBitCast(pointer, CGF.VoidPtrTy); - result = CGF.Builder.CreateGEP(CGF.Int8Ty, result, index, "add.ptr"); + Value *result = + CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr"); return CGF.Builder.CreateBitCast(result, pointer->getType()); } @@ -3719,10 +3716,12 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // Explicitly handle GNU void* and function pointer arithmetic extensions. The // GNU void* casts amount to no-ops since our void* type is i8*, but this is // future proof. + llvm::Type *elemTy; if (elementType->isVoidType() || elementType->isFunctionType()) - return CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr"); + elemTy = CGF.Int8Ty; + else + elemTy = CGF.ConvertTypeForMem(elementType); - llvm::Type *elemTy = CGF.ConvertTypeForMem(elementType); if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr"); @@ -3872,6 +3871,14 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { } } + // For vector and matrix adds, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -3885,10 +3892,6 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) - return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); } @@ -4022,6 +4025,14 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { } } + // For vector and matrix subs, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -4035,9 +4046,6 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) - return FMulAdd; return Builder.CreateFSub(op.LHS, op.RHS, "sub"); } @@ -4856,7 +4864,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { } if (condExpr->getType()->isVectorType() || - condExpr->getType()->isVLSTBuiltinType()) { + condExpr->getType()->isSveVLSBuiltinType()) { CGF.incrementProfileCounter(E); llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index 75fb06de9384..e465789a003e 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -23,8 +23,8 @@ using namespace CodeGen; namespace { llvm::Function *GetVprintfDeclaration(llvm::Module &M) { - llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), - llvm::Type::getInt8PtrTy(M.getContext())}; + llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), + llvm::PointerType::getUnqual(M.getContext())}; llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); @@ -45,8 +45,8 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) { llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { const char *Name = "__llvm_omp_vprintf"; llvm::Module &M = CGM.getModule(); - llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), - llvm::Type::getInt8PtrTy(M.getContext()), + llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), + llvm::PointerType::getUnqual(M.getContext()), llvm::Type::getInt32Ty(M.getContext())}; llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); @@ -99,8 +99,9 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { // Construct and fill the args buffer that we'll pass to vprintf. if (Args.size() <= 1) { // If there are no args, pass a null pointer and size 0 - llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); - return {BufferPtr, llvm::TypeSize::Fixed(0)}; + llvm::Value *BufferPtr = + llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)); + return {BufferPtr, llvm::TypeSize::getFixed(0)}; } else { llvm::SmallVector<llvm::Type *, 8> ArgTypes; for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) @@ -120,7 +121,7 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); } llvm::Value *BufferPtr = - Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + Builder.CreatePointerCast(Alloca, llvm::PointerType::getUnqual(Ctx)); return {BufferPtr, DL.getTypeAllocSize(AllocaTy)}; } } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index e9fa273f21cc..c239bc17ef26 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -223,56 +223,6 @@ void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV, ResourceMD->addOperand(Res.getMetadata()); } -static llvm::hlsl::ResourceKind -castResourceShapeToResourceKind(HLSLResourceAttr::ResourceKind RK) { - switch (RK) { - case HLSLResourceAttr::ResourceKind::Texture1D: - return llvm::hlsl::ResourceKind::Texture1D; - case HLSLResourceAttr::ResourceKind::Texture2D: - return llvm::hlsl::ResourceKind::Texture2D; - case HLSLResourceAttr::ResourceKind::Texture2DMS: - return llvm::hlsl::ResourceKind::Texture2DMS; - case HLSLResourceAttr::ResourceKind::Texture3D: - return llvm::hlsl::ResourceKind::Texture3D; - case HLSLResourceAttr::ResourceKind::TextureCube: - return llvm::hlsl::ResourceKind::TextureCube; - case HLSLResourceAttr::ResourceKind::Texture1DArray: - return llvm::hlsl::ResourceKind::Texture1DArray; - case HLSLResourceAttr::ResourceKind::Texture2DArray: - return llvm::hlsl::ResourceKind::Texture2DArray; - case HLSLResourceAttr::ResourceKind::Texture2DMSArray: - return llvm::hlsl::ResourceKind::Texture2DMSArray; - case HLSLResourceAttr::ResourceKind::TextureCubeArray: - return llvm::hlsl::ResourceKind::TextureCubeArray; - case HLSLResourceAttr::ResourceKind::TypedBuffer: - return llvm::hlsl::ResourceKind::TypedBuffer; - case HLSLResourceAttr::ResourceKind::RawBuffer: - return llvm::hlsl::ResourceKind::RawBuffer; - case HLSLResourceAttr::ResourceKind::StructuredBuffer: - return llvm::hlsl::ResourceKind::StructuredBuffer; - case HLSLResourceAttr::ResourceKind::CBufferKind: - return llvm::hlsl::ResourceKind::CBuffer; - case HLSLResourceAttr::ResourceKind::SamplerKind: - return llvm::hlsl::ResourceKind::Sampler; - case HLSLResourceAttr::ResourceKind::TBuffer: - return llvm::hlsl::ResourceKind::TBuffer; - case HLSLResourceAttr::ResourceKind::RTAccelerationStructure: - return llvm::hlsl::ResourceKind::RTAccelerationStructure; - case HLSLResourceAttr::ResourceKind::FeedbackTexture2D: - return llvm::hlsl::ResourceKind::FeedbackTexture2D; - case HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray: - return llvm::hlsl::ResourceKind::FeedbackTexture2DArray; - } - // Make sure to update HLSLResourceAttr::ResourceKind when add new Kind to - // hlsl::ResourceKind. Assume FeedbackTexture2DArray is the last enum for - // HLSLResourceAttr::ResourceKind. - static_assert( - static_cast<uint32_t>( - HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray) == - (static_cast<uint32_t>(llvm::hlsl::ResourceKind::NumEntries) - 2)); - llvm_unreachable("all switch cases should be covered"); -} - void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { const Type *Ty = D->getType()->getPointeeOrArrayElementType(); if (!Ty) @@ -284,15 +234,12 @@ void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { if (!Attr) return; - HLSLResourceAttr::ResourceClass RC = Attr->getResourceType(); - llvm::hlsl::ResourceKind RK = - castResourceShapeToResourceKind(Attr->getResourceShape()); + llvm::hlsl::ResourceClass RC = Attr->getResourceClass(); + llvm::hlsl::ResourceKind RK = Attr->getResourceKind(); QualType QT(Ty, 0); BufferResBinding Binding(D->getAttr<HLSLResourceBindingAttr>()); - addBufferResourceAnnotation(GV, QT.getAsString(), - static_cast<llvm::hlsl::ResourceClass>(RC), RK, - Binding); + addBufferResourceAnnotation(GV, QT.getAsString(), RC, RK, Binding); } CGHLSLRuntime::BufferResBinding::BufferResBinding( diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index e5d9db273c2d..0d4800b90a2f 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -440,6 +440,14 @@ MDNode *LoopInfo::createMetadata( Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); } + // Setting clang::code_align attribute. + if (Attrs.CodeAlign > 0) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.align"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.CodeAlign))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), AdditionalLoopProperties.end()); return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); @@ -453,7 +461,7 @@ LoopAttributes::LoopAttributes(bool IsParallel) VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), - PipelineInitiationInterval(0), MustProgress(false) {} + PipelineInitiationInterval(0), CodeAlign(0), MustProgress(false) {} void LoopAttributes::clear() { IsParallel = false; @@ -469,6 +477,7 @@ void LoopAttributes::clear() { DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; + CodeAlign = 0; MustProgress = false; } @@ -493,8 +502,8 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc && !Attrs.MustProgress) + Attrs.DistributeEnable == LoopAttributes::Unspecified && + Attrs.CodeAlign == 0 && !StartLoc && !EndLoc && !Attrs.MustProgress) return; TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); @@ -788,6 +797,15 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, } } + // Identify loop attribute 'code_align' from Attrs. + // For attribute code_align: + // n - 'llvm.loop.align i32 n' metadata will be emitted. + if (const auto *CodeAlign = getSpecificAttr<const CodeAlignAttr>(Attrs)) { + const auto *CE = cast<ConstantExpr>(CodeAlign->getAlignment()); + llvm::APSInt ArgVal = CE->getResultAsAPSInt(); + setCodeAlign(ArgVal.getSExtValue()); + } + setMustProgress(MustProgress); if (CGOpts.OptimizationLevel > 0) diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 856e892f712e..a1c8c7e5307f 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -79,6 +79,9 @@ struct LoopAttributes { /// Value for llvm.loop.pipeline.iicount metadata. unsigned PipelineInitiationInterval; + /// Value for 'llvm.loop.align' metadata. + unsigned CodeAlign; + /// Value for whether the loop is required to make progress. bool MustProgress; }; @@ -282,6 +285,9 @@ public: StagedAttrs.PipelineInitiationInterval = C; } + /// Set value of code align for the next loop pushed. + void setCodeAlign(unsigned C) { StagedAttrs.CodeAlign = C; } + /// Set no progress for the next loop pushed. void setMustProgress(bool P) { StagedAttrs.MustProgress = P; } diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp index 3d2b1b8b2f78..75c1d7fbea84 100644 --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -313,7 +313,7 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, for (unsigned I = 0; I < N; ++I) Params.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy, - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); llvm::append_range(Args, Params); @@ -367,8 +367,6 @@ template <class Derived> struct GenFuncBase { CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts); llvm::Value *DstArrayEnd = CGF.Builder.CreateInBoundsGEP( CGF.Int8Ty, DstAddr.getPointer(), SizeInBytes); - DstArrayEnd = CGF.Builder.CreateBitCast( - DstArrayEnd, CGF.CGM.Int8PtrPtrTy, "dstarray.end"); llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock(); // Create the header block and insert the phi instructions. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 46c37eaea82b..acc85165a470 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -52,8 +52,7 @@ llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E) { llvm::Constant *C = CGM.getObjCRuntime().GenerateConstantString(E->getString()).getPointer(); - // FIXME: This bitcast should just be made an invariant on the Runtime. - return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType())); + return C; } /// EmitObjCBoxedExpr - This routine generates code to call @@ -149,9 +148,9 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()), NumElements); QualType ElementType = Context.getObjCIdType().withConst(); - QualType ElementArrayType - = Context.getConstantArrayType(ElementType, APNumElements, nullptr, - ArrayType::Normal, /*IndexTypeQuals=*/0); + QualType ElementArrayType = Context.getConstantArrayType( + ElementType, APNumElements, nullptr, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0); // Allocate the temporary array(s). Address Objects = CreateMemTemp(ElementArrayType, "objects"); @@ -222,6 +221,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, QualType ResultType = E->getType(); const ObjCObjectPointerType *InterfacePointerType = ResultType->getAsObjCInterfacePointerType(); + assert(InterfacePointerType && "Unexpected InterfacePointerType - null"); ObjCInterfaceDecl *Class = InterfacePointerType->getObjectType()->getInterface(); CGObjCRuntime &Runtime = CGM.getObjCRuntime(); @@ -827,11 +827,8 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, // sizeof (Type of Ivar), isAtomic, false); CallArgList args; - llvm::Value *dest = - CGF.Builder.CreateBitCast(CGF.ReturnValue.getPointer(), CGF.VoidPtrTy); + llvm::Value *dest = CGF.ReturnValue.getPointer(); args.add(RValue::get(dest), Context.VoidPtrTy); - - src = CGF.Builder.CreateBitCast(src, CGF.VoidPtrTy); args.add(RValue::get(src), Context.VoidPtrTy); CharUnits size = CGF.getContext().getTypeSizeInChars(ivar->getType()); @@ -1098,7 +1095,6 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, llvm::Value *ivarAddr = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) .getPointer(CGF); - ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); // Third argument is the helper function. @@ -1340,7 +1336,6 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); - argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); // The third argument is the sizeof the type. @@ -1377,7 +1372,6 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, llvm::Value *ivarAddr = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) .getPointer(CGF); - ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); // The second argument is the address of the parameter variable. @@ -1386,7 +1380,6 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); - argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); // Third argument is the helper function. @@ -1800,10 +1793,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ Selector FastEnumSel = CGM.getContext().Selectors.getSelector(std::size(II), &II[0]); - QualType ItemsTy = - getContext().getConstantArrayType(getContext().getObjCIdType(), - llvm::APInt(32, NumItems), nullptr, - ArrayType::Normal, 0); + QualType ItemsTy = getContext().getConstantArrayType( + getContext().getObjCIdType(), llvm::APInt(32, NumItems), nullptr, + ArraySizeModifier::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); // Emit the collection pointer. In ARC, we do a retain. @@ -3686,7 +3678,6 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { /* constraints */ "r", /* side effects */ true); - object = Builder.CreateBitCast(object, VoidPtrTy); EmitNounwindRuntimeCall(extender, object); } @@ -3710,7 +3701,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CharUnits Alignment = C.getTypeAlignInChars(Ty); llvm::Constant *Fn = getNonTrivialCStructMoveAssignmentOperator( CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } if (!getLangOpts().CPlusPlus || @@ -3790,7 +3781,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( EmitStmt(TheCall); FinishFunction(); - HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + HelperFn = Fn; CGM.setAtomicSetterHelperFnMap(Ty, HelperFn); return HelperFn; } @@ -3808,7 +3799,7 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( CharUnits Alignment = C.getTypeAlignInChars(Ty); llvm::Constant *Fn = getNonTrivialCStructCopyConstructor( CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty); - return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + return Fn; } if (!getLangOpts().CPlusPlus || @@ -3909,7 +3900,7 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); FinishFunction(); - HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); + HelperFn = Fn; CGM.setAtomicGetterHelperFnMap(Ty, HelperFn); return HelperFn; } @@ -3953,7 +3944,7 @@ static unsigned getBaseMachOPlatformID(const llvm::Triple &TT) { case llvm::Triple::DriverKit: return llvm::MachO::PLATFORM_DRIVERKIT; default: - return /*Unknown platform*/ 0; + return llvm::MachO::PLATFORM_UNKNOWN; } } diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 09b6c3ac6adf..4ca1a8cce64d 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1014,8 +1014,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { if (CGM.getTriple().isOSBinFormatCOFF()) { cast<llvm::GlobalValue>(isa)->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } - } else if (isa->getType() != PtrToIdTy) - isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); + } // struct // { @@ -1108,10 +1107,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { std::pair<llvm::GlobalVariable*, int> v{ObjCStrGV, 0}; EarlyInitList.emplace_back(Sym, v); } - llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy); - ObjCStrings[Str] = ObjCStr; - ConstantStrings.push_back(ObjCStr); - return ConstantAddress(ObjCStr, IdElemTy, Align); + ObjCStrings[Str] = ObjCStrGV; + ConstantStrings.push_back(ObjCStrGV); + return ConstantAddress(ObjCStrGV, IdElemTy, Align); } void PushProperty(ConstantArrayBuilder &PropertiesArray, @@ -1193,9 +1191,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ReferencedProtocols.end()); SmallVector<llvm::Constant *, 16> Protocols; for (const auto *PI : RuntimeProtocols) - Protocols.push_back( - llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), - ProtocolPtrTy)); + Protocols.push_back(GenerateProtocolRef(PI)); return GenerateProtocolList(Protocols); } @@ -1305,7 +1301,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::GlobalValue::ExternalLinkage, nullptr, Name); GV->setAlignment(CGM.getPointerAlign().getAsAlign()); } - return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy); + return GV; } /// Existing protocol references. @@ -1322,9 +1318,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { std::string RefName = SymbolForProtocolRef(Name); assert(!TheModule.getGlobalVariable(RefName)); // Emit a reference symbol. - auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, - false, llvm::GlobalValue::LinkOnceODRLinkage, - llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); + auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, false, + llvm::GlobalValue::LinkOnceODRLinkage, + Protocol, RefName); GV->setComdat(TheModule.getOrInsertComdat(RefName)); GV->setSection(sectionName<ProtocolReferenceSection>()); GV->setAlignment(CGM.getPointerAlign().getAsAlign()); @@ -1381,9 +1377,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto RuntimeProtocols = GetRuntimeProtocolList(PD->protocol_begin(), PD->protocol_end()); for (const auto *PI : RuntimeProtocols) - Protocols.push_back( - llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), - ProtocolPtrTy)); + Protocols.push_back(GenerateProtocolRef(PI)); llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); // Collect information about methods @@ -1420,19 +1414,13 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { GV->setSection(sectionName<ProtocolSection>()); GV->setComdat(TheModule.getOrInsertComdat(SymName)); if (OldGV) { - OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV, - OldGV->getType())); + OldGV->replaceAllUsesWith(GV); OldGV->removeFromParent(); GV->setName(SymName); } Protocol = GV; return GV; } - llvm::Constant *EnforceType(llvm::Constant *Val, llvm::Type *Ty) { - if (Val->getType() == Ty) - return Val; - return llvm::ConstantExpr::getBitCast(Val, Ty); - } llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding) override { return GetConstantSelector(Sel, TypeEncoding); @@ -1469,7 +1457,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto SelVarName = (StringRef(".objc_selector_") + Sel.getAsString() + "_" + MangledTypes).str(); if (auto *GV = TheModule.getNamedGlobal(SelVarName)) - return EnforceType(GV, SelectorTy); + return GV; ConstantInitBuilder builder(CGM); auto SelBuilder = builder.beginStruct(); SelBuilder.add(ExportUniqueString(Sel.getAsString(), ".objc_sel_name_", @@ -1480,8 +1468,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { GV->setComdat(TheModule.getOrInsertComdat(SelVarName)); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); GV->setSection(sectionName<SelectorSection>()); - auto *SelVal = EnforceType(GV, SelectorTy); - return SelVal; + return GV; } llvm::StructType *emptyStruct = nullptr; @@ -1738,9 +1725,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { SmallVector<ObjCMethodDecl*, 16> ClassMethods; ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(), OID->classmeth_end()); - metaclassFields.addBitCast( - GenerateMethodList(className, "", ClassMethods, true), - PtrTy); + metaclassFields.add( + GenerateMethodList(className, "", ClassMethods, true)); } // void *dtable; metaclassFields.addNullPointer(PtrTy); @@ -1791,7 +1777,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } } if (!IsCOFF) - classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + classFields.add(SuperClass); else classFields.addNullPointer(PtrTy); } else @@ -1907,9 +1893,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { if (InstanceMethods.size() == 0) classFields.addNullPointer(PtrTy); else - classFields.addBitCast( - GenerateMethodList(className, "", InstanceMethods, false), - PtrTy); + classFields.add( + GenerateMethodList(className, "", InstanceMethods, false)); + // void *dtable; classFields.addNullPointer(PtrTy); // IMP cxx_construct; @@ -1925,9 +1911,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { classDecl->protocol_end()); SmallVector<llvm::Constant *, 16> Protocols; for (const auto *I : RuntimeProtocols) - Protocols.push_back( - llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I), - ProtocolPtrTy)); + Protocols.push_back(GenerateProtocolRef(I)); + if (Protocols.empty()) classFields.addNullPointer(PtrTy); else @@ -1945,7 +1930,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto *classRefSymbol = GetClassVar(className); classRefSymbol->setSection(sectionName<ClassReferenceSection>()); - classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + classRefSymbol->setInitializer(classStruct); if (IsCOFF) { // we can't import a class struct. @@ -1966,22 +1951,19 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Resolve the class aliases, if they exist. // FIXME: Class pointer aliases shouldn't exist! if (ClassPtrAlias) { - ClassPtrAlias->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + ClassPtrAlias->replaceAllUsesWith(classStruct); ClassPtrAlias->eraseFromParent(); ClassPtrAlias = nullptr; } if (auto Placeholder = TheModule.getNamedGlobal(SymbolForClass(className))) if (Placeholder != classStruct) { - Placeholder->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(classStruct, Placeholder->getType())); + Placeholder->replaceAllUsesWith(classStruct); Placeholder->eraseFromParent(); classStruct->setName(SymbolForClass(className)); } if (MetaClassPtrAlias) { - MetaClassPtrAlias->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(metaclass, IdTy)); + MetaClassPtrAlias->replaceAllUsesWith(metaclass); MetaClassPtrAlias->eraseFromParent(); MetaClassPtrAlias = nullptr; } @@ -2422,7 +2404,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { false, llvm::GlobalValue::ExternalLinkage, nullptr, "__objc_id_type_info"); - return llvm::ConstantExpr::getBitCast(IDEHType, PtrToInt8Ty); + return IDEHType; } const ObjCObjectPointerType *PT = @@ -2436,9 +2418,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { std::string typeinfoName = "__objc_eh_typeinfo_" + className; // Return the existing typeinfo if it exists - llvm::Constant *typeinfo = TheModule.getGlobalVariable(typeinfoName); - if (typeinfo) - return llvm::ConstantExpr::getBitCast(typeinfo, PtrToInt8Ty); + if (llvm::Constant *typeinfo = TheModule.getGlobalVariable(typeinfoName)) + return typeinfo; // Otherwise create it. @@ -2453,9 +2434,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { nullptr, vtableName); } llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2); - auto *BVtable = llvm::ConstantExpr::getBitCast( - llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two), - PtrToInt8Ty); + auto *BVtable = + llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two); llvm::Constant *typeName = ExportUniqueString(className, "__objc_eh_typename_"); @@ -2469,7 +2449,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { CGM.getPointerAlign(), /*constant*/ false, llvm::GlobalValue::LinkOnceODRLinkage); - return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty); + return TI; } /// Generate an NSConstantString object. @@ -2493,19 +2473,16 @@ ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) { llvm::Constant *isa = TheModule.getNamedGlobal(Sym); if (!isa) - isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, - llvm::GlobalValue::ExternalWeakLinkage, nullptr, Sym); - else if (isa->getType() != PtrToIdTy) - isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); + isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */ false, + llvm::GlobalValue::ExternalWeakLinkage, + nullptr, Sym); ConstantInitBuilder Builder(CGM); auto Fields = Builder.beginStruct(); Fields.add(isa); Fields.add(MakeConstantString(Str)); Fields.addInt(IntTy, Str.size()); - llvm::Constant *ObjCStr = - Fields.finishAndCreateGlobal(".objc_str", Align); - ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty); + llvm::Constant *ObjCStr = Fields.finishAndCreateGlobal(".objc_str", Align); ObjCStrings[Str] = ObjCStr; ConstantStrings.push_back(ObjCStr); return ConstantAddress(ObjCStr, Int8Ty, Align); @@ -2909,14 +2886,14 @@ GenerateMethodList(StringRef ClassName, assert(FnPtr && "Can't generate metadata for method that doesn't exist"); auto Method = MethodArray.beginStruct(ObjCMethodTy); if (isV2ABI) { - Method.addBitCast(FnPtr, IMPTy); + Method.add(FnPtr); Method.add(GetConstantSelector(OMD->getSelector(), Context.getObjCEncodingForMethodDecl(OMD))); Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD, true))); } else { Method.add(MakeConstantString(OMD->getSelector().getAsString())); Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD))); - Method.addBitCast(FnPtr, IMPTy); + Method.add(FnPtr); } Method.finishAndAddTo(MethodArray); } @@ -3015,7 +2992,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( // Fill in the structure // isa - Elements.addBitCast(MetaClass, PtrToInt8Ty); + Elements.add(MetaClass); // super_class Elements.add(SuperClass); // name @@ -3044,7 +3021,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( // sibling_class Elements.add(NULLPtr); // protocols - Elements.addBitCast(Protocols, PtrTy); + Elements.add(Protocols); // gc_object_type Elements.add(NULLPtr); // abi_version @@ -3068,8 +3045,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( Elements.finishAndCreateGlobal(ClassSym, CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); if (ClassRef) { - ClassRef->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(Class, - ClassRef->getType())); + ClassRef->replaceAllUsesWith(Class); ClassRef->removeFromParent(); Class->setName(ClassSym); } @@ -3117,7 +3093,7 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) { } else { protocol = value->getValue(); } - Elements.addBitCast(protocol, PtrToInt8Ty); + Elements.add(protocol); } Elements.finishAndAddTo(ProtocolList); return ProtocolList.finishAndCreateGlobal(".objc_protocol_list", @@ -3144,7 +3120,6 @@ llvm::Constant * CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) { llvm::Constant *ProtocolList = GenerateProtocolList({}); llvm::Constant *MethodList = GenerateProtocolMethodList({}); - MethodList = llvm::ConstantExpr::getBitCast(MethodList, PtrToInt8Ty); // Protocols are objects containing lists of the methods implemented and // protocols adopted. ConstantInitBuilder Builder(CGM); @@ -3235,9 +3210,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { Elements.add(PropertyList); Elements.add(OptionalPropertyList); ExistingProtocols[ProtocolName] = - llvm::ConstantExpr::getBitCast( - Elements.finishAndCreateGlobal(".objc_protocol", CGM.getPointerAlign()), - IdTy); + Elements.finishAndCreateGlobal(".objc_protocol", CGM.getPointerAlign()); } void CGObjCGNU::GenerateProtocolHolderCategory() { // Collect information about instance methods @@ -3250,11 +3223,9 @@ void CGObjCGNU::GenerateProtocolHolderCategory() { Elements.add(MakeConstantString(CategoryName)); Elements.add(MakeConstantString(ClassName)); // Instance method list - Elements.addBitCast(GenerateMethodList( - ClassName, CategoryName, {}, false), PtrTy); + Elements.add(GenerateMethodList(ClassName, CategoryName, {}, false)); // Class method list - Elements.addBitCast(GenerateMethodList( - ClassName, CategoryName, {}, true), PtrTy); + Elements.add(GenerateMethodList(ClassName, CategoryName, {}, true)); // Protocol list ConstantInitBuilder ProtocolListBuilder(CGM); @@ -3264,16 +3235,13 @@ void CGObjCGNU::GenerateProtocolHolderCategory() { auto ProtocolElements = ProtocolList.beginArray(PtrTy); for (auto iter = ExistingProtocols.begin(), endIter = ExistingProtocols.end(); iter != endIter ; iter++) { - ProtocolElements.addBitCast(iter->getValue(), PtrTy); + ProtocolElements.add(iter->getValue()); } ProtocolElements.finishAndAddTo(ProtocolList); - Elements.addBitCast( - ProtocolList.finishAndCreateGlobal(".objc_protocol_list", - CGM.getPointerAlign()), - PtrTy); - Categories.push_back(llvm::ConstantExpr::getBitCast( - Elements.finishAndCreateGlobal("", CGM.getPointerAlign()), - PtrTy)); + Elements.add(ProtocolList.finishAndCreateGlobal(".objc_protocol_list", + CGM.getPointerAlign())); + Categories.push_back( + Elements.finishAndCreateGlobal("", CGM.getPointerAlign())); } /// Libobjc2 uses a bitfield representation where small(ish) bitfields are @@ -3348,38 +3316,35 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { SmallVector<ObjCMethodDecl*, 16> InstanceMethods; InstanceMethods.insert(InstanceMethods.begin(), OCD->instmeth_begin(), OCD->instmeth_end()); - Elements.addBitCast( - GenerateMethodList(ClassName, CategoryName, InstanceMethods, false), - PtrTy); + Elements.add( + GenerateMethodList(ClassName, CategoryName, InstanceMethods, false)); + // Class method list SmallVector<ObjCMethodDecl*, 16> ClassMethods; ClassMethods.insert(ClassMethods.begin(), OCD->classmeth_begin(), OCD->classmeth_end()); - Elements.addBitCast( - GenerateMethodList(ClassName, CategoryName, ClassMethods, true), - PtrTy); + Elements.add(GenerateMethodList(ClassName, CategoryName, ClassMethods, true)); + // Protocol list - Elements.addBitCast(GenerateCategoryProtocolList(CatDecl), PtrTy); + Elements.add(GenerateCategoryProtocolList(CatDecl)); if (isRuntime(ObjCRuntime::GNUstep, 2)) { const ObjCCategoryDecl *Category = Class->FindCategoryDeclaration(OCD->getIdentifier()); if (Category) { // Instance properties - Elements.addBitCast(GeneratePropertyList(OCD, Category, false), PtrTy); + Elements.add(GeneratePropertyList(OCD, Category, false)); // Class properties - Elements.addBitCast(GeneratePropertyList(OCD, Category, true), PtrTy); + Elements.add(GeneratePropertyList(OCD, Category, true)); } else { Elements.addNullPointer(PtrTy); Elements.addNullPointer(PtrTy); } } - Categories.push_back(llvm::ConstantExpr::getBitCast( - Elements.finishAndCreateGlobal( - std::string(".objc_category_")+ClassName+CategoryName, - CGM.getPointerAlign()), - PtrTy)); + Categories.push_back(Elements.finishAndCreateGlobal( + std::string(".objc_category_") + ClassName + CategoryName, + CGM.getPointerAlign())); } llvm::Constant *CGObjCGNU::GeneratePropertyList(const Decl *Container, @@ -3682,20 +3647,17 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // Resolve the class aliases, if they exist. if (ClassPtrAlias) { - ClassPtrAlias->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(ClassStruct, IdTy)); + ClassPtrAlias->replaceAllUsesWith(ClassStruct); ClassPtrAlias->eraseFromParent(); ClassPtrAlias = nullptr; } if (MetaClassPtrAlias) { - MetaClassPtrAlias->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(MetaClassStruct, IdTy)); + MetaClassPtrAlias->replaceAllUsesWith(MetaClassStruct); MetaClassPtrAlias->eraseFromParent(); MetaClassPtrAlias = nullptr; } // Add class structure to list to be added to the symtab later - ClassStruct = llvm::ConstantExpr::getBitCast(ClassStruct, PtrToInt8Ty); Classes.push_back(ClassStruct); } @@ -3709,11 +3671,9 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { GenerateProtocolHolderCategory(); llvm::StructType *selStructTy = dyn_cast<llvm::StructType>(SelectorElemTy); - llvm::Type *selStructPtrTy = SelectorTy; if (!selStructTy) { selStructTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty }); - selStructPtrTy = llvm::PointerType::getUnqual(selStructTy); } // Generate statics list: @@ -3744,7 +3704,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { statics = allStaticsArray.finishAndCreateGlobal(".objc_statics_ptr", CGM.getPointerAlign()); - statics = llvm::ConstantExpr::getBitCast(statics, PtrTy); } // Array of classes, categories, and constant objects. @@ -3807,9 +3766,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { // FIXME: We're generating redundant loads and stores here! llvm::Constant *selPtr = llvm::ConstantExpr::getGetElementPtr( selectorList->getValueType(), selectorList, idxs); - // If selectors are defined as an opaque type, cast the pointer to this - // type. - selPtr = llvm::ConstantExpr::getBitCast(selPtr, SelectorTy); selectorAliases[i]->replaceAllUsesWith(selPtr); selectorAliases[i]->eraseFromParent(); } @@ -3821,7 +3777,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { // Number of static selectors symtab.addInt(LongTy, selectorCount); - symtab.addBitCast(selectorList, selStructPtrTy); + symtab.add(selectorList); // Number of classes defined. symtab.addInt(CGM.Int16Ty, Classes.size()); @@ -3930,7 +3886,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { llvm::Constant *TheClass = TheModule.getGlobalVariable("_OBJC_CLASS_" + iter->first, true); if (TheClass) { - TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy); Builder.CreateCall(RegisterAlias, {TheClass, MakeConstantString(iter->second)}); } @@ -4123,9 +4078,9 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( // when linked against code which isn't (most of the time). llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name); if (!IvarOffsetPointer) - IvarOffsetPointer = new llvm::GlobalVariable(TheModule, - llvm::Type::getInt32PtrTy(VMContext), false, - llvm::GlobalValue::ExternalLinkage, nullptr, Name); + IvarOffsetPointer = new llvm::GlobalVariable( + TheModule, llvm::PointerType::getUnqual(VMContext), false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name); return IvarOffsetPointer; } @@ -4169,10 +4124,11 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) return CGF.Builder.CreateZExtOrBitCast( CGF.Builder.CreateAlignedLoad( - Int32Ty, CGF.Builder.CreateAlignedLoad( - llvm::Type::getInt32PtrTy(VMContext), - ObjCIvarOffsetVariable(Interface, Ivar), - CGF.getPointerAlign(), "ivar"), + Int32Ty, + CGF.Builder.CreateAlignedLoad( + llvm::PointerType::getUnqual(VMContext), + ObjCIvarOffsetVariable(Interface, Ivar), + CGF.getPointerAlign(), "ivar"), CharUnits::fromQuantity(4)), PtrDiffTy); std::string name = "__objc_ivar_offset_value_" + diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 32f4f411347a..ba52b23be018 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1713,8 +1713,8 @@ public: /// A helper class for performing the null-initialization of a return /// value. struct NullReturnState { - llvm::BasicBlock *NullBB; - NullReturnState() : NullBB(nullptr) {} + llvm::BasicBlock *NullBB = nullptr; + NullReturnState() = default; /// Perform a null-check of the given receiver. void init(CodeGenFunction &CGF, llvm::Value *receiver) { @@ -1958,9 +1958,8 @@ llvm::Constant *CGObjCMac::getNSConstantStringClassRef() { llvm::Type *PTy = llvm::ArrayType::get(CGM.IntTy, 0); auto GV = CGM.CreateRuntimeVariable(PTy, str); - auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo()); - ConstantStringClassRef = V; - return V; + ConstantStringClassRef = GV; + return GV; } llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() { @@ -1972,12 +1971,8 @@ llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() { StringClass.empty() ? "OBJC_CLASS_$_NSConstantString" : "OBJC_CLASS_$_" + StringClass; llvm::Constant *GV = GetClassGlobal(str, NotForDefinition); - - // Make sure the result is of the correct type. - auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo()); - - ConstantStringClassRef = V; - return V; + ConstantStringClassRef = GV; + return GV; } ConstantAddress @@ -1996,11 +1991,8 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { // If we don't already have it, construct the type for a constant NSString. if (!NSConstantStringType) { NSConstantStringType = - llvm::StructType::create({ - CGM.Int32Ty->getPointerTo(), - CGM.Int8PtrTy, - CGM.IntTy - }, "struct.__builtin_NSString"); + llvm::StructType::create({CGM.UnqualPtrTy, CGM.Int8PtrTy, CGM.IntTy}, + "struct.__builtin_NSString"); } ConstantInitBuilder Builder(CGM); @@ -2022,7 +2014,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. GV->setAlignment(llvm::Align(1)); - Fields.addBitCast(GV, CGM.Int8PtrTy); + Fields.add(GV); // String length. Fields.addInt(CGM.IntTy, StringLength); @@ -2969,8 +2961,7 @@ llvm::Value *CGObjCMac::GenerateProtocolRef(CodeGenFunction &CGF, // resolved. Investigate. Its also wasteful to look this up over and over. LazySymbols.insert(&CGM.getContext().Idents.get("Protocol")); - return llvm::ConstantExpr::getBitCast(GetProtocolRef(PD), - ObjCTypes.getExternalProtocolPtrTy()); + return GetProtocolRef(PD); } void CGObjCCommonMac::GenerateProtocol(const ObjCProtocolDecl *PD) { @@ -3190,7 +3181,7 @@ CGObjCMac::EmitProtocolList(Twine name, llvm::GlobalVariable *GV = CreateMetadataVar(name, values, section, CGM.getPointerAlign(), false); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListPtrTy); + return GV; } static void @@ -3298,7 +3289,7 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, llvm::GlobalVariable *GV = CreateMetadataVar(Name, values, Section, CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.PropertyListPtrTy); + return GV; } llvm::Constant * @@ -3319,7 +3310,7 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name, llvm::GlobalVariable *GV = CreateMetadataVar(Name, Init, Section, CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.Int8PtrPtrTy); + return GV; } /* @@ -3329,7 +3320,7 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name, struct _objc_method_list *instance_methods; struct _objc_method_list *class_methods; struct _objc_protocol_list *protocols; - uint32_t size; // <rdar://4585769> + uint32_t size; // sizeof(struct _objc_category) struct _objc_property_list *instance_properties; struct _objc_property_list *class_properties; }; @@ -3566,8 +3557,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { // Record a reference to the super class. LazySymbols.insert(Super->getIdentifier()); - values.addBitCast(GetClassName(Super->getObjCRuntimeNameAsString()), - ObjCTypes.ClassPtrTy); + values.add(GetClassName(Super->getObjCRuntimeNameAsString())); } else { values.addNullPointer(ObjCTypes.ClassPtrTy); } @@ -3621,14 +3611,12 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID, const ObjCInterfaceDecl *Root = ID->getClassInterface(); while (const ObjCInterfaceDecl *Super = Root->getSuperClass()) Root = Super; - values.addBitCast(GetClassName(Root->getObjCRuntimeNameAsString()), - ObjCTypes.ClassPtrTy); + values.add(GetClassName(Root->getObjCRuntimeNameAsString())); // The super class for the metaclass is emitted as the name of the // super class. The runtime fixes this up to point to the // *metaclass* for the super class. if (ObjCInterfaceDecl *Super = ID->getClassInterface()->getSuperClass()) { - values.addBitCast(GetClassName(Super->getObjCRuntimeNameAsString()), - ObjCTypes.ClassPtrTy); + values.add(GetClassName(Super->getObjCRuntimeNameAsString())); } else { values.addNullPointer(ObjCTypes.ClassPtrTy); } @@ -3812,7 +3800,7 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID, GV = CreateMetadataVar("OBJC_INSTANCE_VARIABLES_" + ID->getName(), ivarList, "__OBJC,__instance_vars,regular,no_dead_strip", CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListPtrTy); + return GV; } /// Build a struct objc_method_description constant for the given method. @@ -3824,8 +3812,7 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID, void CGObjCMac::emitMethodDescriptionConstant(ConstantArrayBuilder &builder, const ObjCMethodDecl *MD) { auto description = builder.beginStruct(ObjCTypes.MethodDescriptionTy); - description.addBitCast(GetMethodVarName(MD->getSelector()), - ObjCTypes.SelectorPtrTy); + description.add(GetMethodVarName(MD->getSelector())); description.add(GetMethodVarType(MD)); description.finishAndAddTo(builder); } @@ -3843,10 +3830,9 @@ void CGObjCMac::emitMethodConstant(ConstantArrayBuilder &builder, assert(fn && "no definition registered for method"); auto method = builder.beginStruct(ObjCTypes.MethodTy); - method.addBitCast(GetMethodVarName(MD->getSelector()), - ObjCTypes.SelectorPtrTy); + method.add(GetMethodVarName(MD->getSelector())); method.add(GetMethodVarType(MD)); - method.addBitCast(fn, ObjCTypes.Int8PtrTy); + method.add(fn); method.finishAndAddTo(builder); } @@ -3931,8 +3917,7 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT, llvm::GlobalVariable *GV = CreateMetadataVar(prefix + name, values, section, CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, - ObjCTypes.MethodDescriptionListPtrTy); + return GV; } // Otherwise, it's an objc_method_list. @@ -3949,7 +3934,7 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT, llvm::GlobalVariable *GV = CreateMetadataVar(prefix + name, values, section, CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListPtrTy); + return GV; } llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD, @@ -4004,8 +3989,7 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, "", &CGM.getModule()); Fn->takeName(OldFn); - OldFn->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(Fn, OldFn->getType())); + OldFn->replaceAllUsesWith(Fn); OldFn->eraseFromParent(); // Replace the cached function in the map. @@ -4486,14 +4470,10 @@ llvm::FunctionType *FragileHazards::GetAsmFnType() { want to implement correct ObjC/C++ exception interactions for the fragile ABI. - Note that for this use of setjmp/longjmp to be correct, we may need - to mark some local variables volatile: if a non-volatile local - variable is modified between the setjmp and the longjmp, it has - indeterminate value. For the purposes of LLVM IR, it may be - sufficient to make loads and stores within the @try (to variables - declared outside the @try) volatile. This is necessary for - optimized correctness, but is not currently being done; this is - being tracked as rdar://problem/8160285 + Note that for this use of setjmp/longjmp to be correct in the presence of + optimization, we use inline assembly on the set of local variables to force + flushing locals to memory immediately before any protected calls and to + inhibit optimizing locals across the setjmp->catch edge. The basic framework for a @try-catch-finally is as follows: { @@ -5092,7 +5072,8 @@ enum ImageInfoFlags { eImageInfo_OptimizedByDyld = (1 << 3), // This flag is set by the dyld shared cache. // A flag indicating that the module has no instances of a @synthesize of a - // superclass variable. <rdar://problem/6803242> + // superclass variable. This flag used to be consumed by the runtime to work + // around miscompile by gcc. eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang. eImageInfo_ImageIsSimulated = (1 << 5), eImageInfo_ClassProperties = (1 << 6) @@ -5205,17 +5186,17 @@ llvm::Constant *CGObjCMac::EmitModuleSymbols() { if (ID->isWeakImported() && !IMP->isWeakImported()) DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage); - array.addBitCast(DefinedClasses[i], ObjCTypes.Int8PtrTy); + array.add(DefinedClasses[i]); } for (unsigned i=0; i<NumCategories; i++) - array.addBitCast(DefinedCategories[i], ObjCTypes.Int8PtrTy); + array.add(DefinedCategories[i]); array.finishAndAddTo(values); llvm::GlobalVariable *GV = CreateMetadataVar( "OBJC_SYMBOLS", values, "__OBJC,__symbols,regular,no_dead_strip", CGM.getPointerAlign(), true); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.SymtabPtrTy); + return GV; } llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF, @@ -5225,13 +5206,10 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF, llvm::GlobalVariable *&Entry = ClassReferences[II]; if (!Entry) { - llvm::Constant *Casted = - llvm::ConstantExpr::getBitCast(GetClassName(II->getName()), - ObjCTypes.ClassPtrTy); - Entry = CreateMetadataVar( - "OBJC_CLASS_REFERENCES_", Casted, - "__OBJC,__cls_refs,literal_pointers,no_dead_strip", - CGM.getPointerAlign(), true); + Entry = + CreateMetadataVar("OBJC_CLASS_REFERENCES_", GetClassName(II->getName()), + "__OBJC,__cls_refs,literal_pointers,no_dead_strip", + CGM.getPointerAlign(), true); } return CGF.Builder.CreateAlignedLoad(Entry->getValueType(), Entry, @@ -5264,11 +5242,8 @@ Address CGObjCMac::EmitSelectorAddr(Selector Sel) { llvm::GlobalVariable *&Entry = SelectorReferences[Sel]; if (!Entry) { - llvm::Constant *Casted = - llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel), - ObjCTypes.SelectorPtrTy); Entry = CreateMetadataVar( - "OBJC_SELECTOR_REFERENCES_", Casted, + "OBJC_SELECTOR_REFERENCES_", GetMethodVarName(Sel), "__OBJC,__message_refs,literal_pointers,no_dead_strip", Align, true); Entry->setExternallyInitialized(true); } @@ -5768,10 +5743,9 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // id self; // Class cls; // } - RecordDecl *RD = RecordDecl::Create(Ctx, TTK_Struct, - Ctx.getTranslationUnitDecl(), - SourceLocation(), SourceLocation(), - &Ctx.Idents.get("_objc_super")); + RecordDecl *RD = RecordDecl::Create( + Ctx, TagTypeKind::Struct, Ctx.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), &Ctx.Idents.get("_objc_super")); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), nullptr, Ctx.getObjCIdType(), nullptr, nullptr, false, ICIS_NoInit)); @@ -6121,10 +6095,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // }; // First the clang type for struct _message_ref_t - RecordDecl *RD = RecordDecl::Create(Ctx, TTK_Struct, - Ctx.getTranslationUnitDecl(), - SourceLocation(), SourceLocation(), - &Ctx.Idents.get("_message_ref_t")); + RecordDecl *RD = RecordDecl::Create( + Ctx, TagTypeKind::Struct, Ctx.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), &Ctx.Idents.get("_message_ref_t")); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), nullptr, Ctx.VoidPtrTy, nullptr, nullptr, false, ICIS_NoInit)); @@ -6179,8 +6152,8 @@ void CGObjCNonFragileABIMac::AddModuleClassList( SmallVector<llvm::Constant*, 8> Symbols(NumClasses); for (unsigned i=0; i<NumClasses; i++) - Symbols[i] = llvm::ConstantExpr::getBitCast(Container[i], - ObjCTypes.Int8PtrTy); + Symbols[i] = Container[i]; + llvm::Constant *Init = llvm::ConstantArray::get(llvm::ArrayType::get(ObjCTypes.Int8PtrTy, Symbols.size()), @@ -6616,9 +6589,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, // of protocol's meta-data (not a reference to it!) assert(!PD->isNonRuntimeProtocol() && "attempting to get a protocol ref to a static protocol."); - llvm::Constant *Init = - llvm::ConstantExpr::getBitCast(GetOrEmitProtocol(PD), - ObjCTypes.getExternalProtocolPtrTy()); + llvm::Constant *Init = GetOrEmitProtocol(PD); std::string ProtocolName("_OBJC_PROTOCOL_REFERENCE_$_"); ProtocolName += PD->getObjCRuntimeNameAsString(); @@ -6759,8 +6730,7 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder, const ObjCMethodDecl *MD, bool forProtocol) { auto method = builder.beginStruct(ObjCTypes.MethodTy); - method.addBitCast(GetMethodVarName(MD->getSelector()), - ObjCTypes.SelectorPtrTy); + method.add(GetMethodVarName(MD->getSelector())); method.add(GetMethodVarType(MD)); if (forProtocol) { @@ -6769,7 +6739,7 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder, } else { llvm::Function *fn = GetMethodDefinition(MD); assert(fn && "no definition for method?"); - method.addBitCast(fn, ObjCTypes.Int8PtrProgramASTy); + method.add(fn); } method.finishAndAddTo(builder); @@ -6843,7 +6813,7 @@ CGObjCNonFragileABIMac::emitMethodList(Twine name, MethodListType kind, llvm::GlobalVariable *GV = finishAndCreateGlobal(values, prefix + name, CGM); CGM.addCompilerUsedGlobal(GV); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListnfABIPtrTy); + return GV; } /// ObjCIvarOffsetVariable - Returns the ivar offset variable for @@ -6985,7 +6955,7 @@ llvm::Constant *CGObjCNonFragileABIMac::EmitIvarList( llvm::GlobalVariable *GV = finishAndCreateGlobal( ivarList, Prefix + OID->getObjCRuntimeNameAsString(), CGM); CGM.addCompilerUsedGlobal(GV); - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListnfABIPtrTy); + return GV; } llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( @@ -7159,7 +7129,7 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name, llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(TmpName.str(), true); if (GV) - return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListnfABIPtrTy); + return GV; ConstantInitBuilder builder(CGM); auto values = builder.beginStruct(); @@ -7177,8 +7147,7 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name, GV = finishAndCreateGlobal(values, Name, CGM); CGM.addCompilerUsedGlobal(GV); - return llvm::ConstantExpr::getBitCast(GV, - ObjCTypes.ProtocolListnfABIPtrTy); + return GV; } /// EmitObjCValueForIvar - Code Gen for nonfragile ivar reference. @@ -7412,8 +7381,7 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); if (GV) { - GV->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(NewGV, GV->getType())); + GV->replaceAllUsesWith(NewGV); GV->eraseFromParent(); } GV = NewGV; @@ -7624,14 +7592,11 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(Selector Sel) { llvm::GlobalVariable *&Entry = SelectorReferences[Sel]; CharUnits Align = CGM.getPointerAlign(); if (!Entry) { - llvm::Constant *Casted = - llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel), - ObjCTypes.SelectorPtrTy); std::string SectionName = GetSectionName("__objc_selrefs", "literal_pointers,no_dead_strip"); Entry = new llvm::GlobalVariable( CGM.getModule(), ObjCTypes.SelectorPtrTy, false, - getLinkageTypeForObjCMetadata(CGM, SectionName), Casted, + getLinkageTypeForObjCMetadata(CGM, SectionName), GetMethodVarName(Sel), "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); Entry->setSection(SectionName); diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index 634a3d5a938d..424564f97599 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -63,12 +63,10 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy); QualType IvarTy = Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers); - llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy); - llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy); + llvm::Value *V = BaseValue; V = CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, V, Offset, "add.ptr"); if (!Ivar->isBitField()) { - V = CGF.Builder.CreateBitCast(V, llvm::PointerType::getUnqual(LTy)); LValue LV = CGF.MakeNaturalAlignAddrLValue(V, IvarTy); return LV; } diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp index dc2330a29976..115b618056a4 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -37,44 +37,16 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { if (llvm::Type *TransTy = CGM.getTargetCodeGenInfo().getOpenCLType(CGM, T)) return TransTy; - switch (cast<BuiltinType>(T)->getKind()) { - default: - llvm_unreachable("Unexpected opencl builtin type!"); - return nullptr; -#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ - case BuiltinType::Id: \ - return getPointerType(T, "opencl." #ImgType "_" #Suffix "_t"); -#include "clang/Basic/OpenCLImageTypes.def" - case BuiltinType::OCLSampler: + if (T->isSamplerT()) return getSamplerType(T); - case BuiltinType::OCLEvent: - return getPointerType(T, "opencl.event_t"); - case BuiltinType::OCLClkEvent: - return getPointerType(T, "opencl.clk_event_t"); - case BuiltinType::OCLQueue: - return getPointerType(T, "opencl.queue_t"); - case BuiltinType::OCLReserveID: - return getPointerType(T, "opencl.reserve_id_t"); -#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ - case BuiltinType::Id: \ - return getPointerType(T, "opencl." #ExtType); -#include "clang/Basic/OpenCLExtensionTypes.def" - } -} -llvm::PointerType *CGOpenCLRuntime::getPointerType(const Type *T, - StringRef Name) { - auto I = CachedTys.find(Name); - if (I != CachedTys.end()) - return I->second; + return getPointerType(T); +} - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); +llvm::PointerType *CGOpenCLRuntime::getPointerType(const Type *T) { uint32_t AddrSpc = CGM.getContext().getTargetAddressSpace( CGM.getContext().getOpenCLTypeAddrSpace(T)); - auto *PTy = - llvm::PointerType::get(llvm::StructType::create(Ctx, Name), AddrSpc); - CachedTys[Name] = PTy; - return PTy; + return llvm::PointerType::get(CGM.getLLVMContext(), AddrSpc); } llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { @@ -90,10 +62,7 @@ llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name, llvm::Type *&PipeTy) { if (!PipeTy) - PipeTy = llvm::PointerType::get(llvm::StructType::create( - CGM.getLLVMContext(), Name), - CGM.getContext().getTargetAddressSpace( - CGM.getContext().getOpenCLTypeAddrSpace(T))); + PipeTy = getPointerType(T); return PipeTy; } @@ -105,10 +74,7 @@ llvm::Type *CGOpenCLRuntime::getSamplerType(const Type *T) { CGM, CGM.getContext().OCLSamplerTy.getTypePtr())) SamplerTy = TransTy; else - SamplerTy = llvm::PointerType::get( - llvm::StructType::create(CGM.getLLVMContext(), "opencl.sampler_t"), - CGM.getContext().getTargetAddressSpace( - CGM.getContext().getOpenCLTypeAddrSpace(T))); + SamplerTy = getPointerType(T); return SamplerTy; } @@ -134,7 +100,7 @@ llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) { llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { assert(CGM.getLangOpts().OpenCL); - return llvm::IntegerType::getInt8PtrTy( + return llvm::PointerType::get( CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h index df8084d6008b..34613c3516f3 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.h +++ b/clang/lib/CodeGen/CGOpenCLRuntime.h @@ -39,7 +39,6 @@ protected: llvm::Type *PipeROTy; llvm::Type *PipeWOTy; llvm::Type *SamplerTy; - llvm::StringMap<llvm::PointerType *> CachedTys; /// Structure for enqueued block information. struct EnqueuedBlockInfo { @@ -53,7 +52,7 @@ protected: virtual llvm::Type *getPipeType(const PipeType *T, StringRef Name, llvm::Type *&PipeTy); - llvm::PointerType *getPointerType(const Type *T, StringRef Name); + llvm::PointerType *getPointerType(const Type *T); public: CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a52ec8909b12..7f7e6f530666 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <cstdint> #include <numeric> #include <optional> @@ -479,27 +480,6 @@ enum OpenMPLocationFlags : unsigned { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Values for bit flags for marking which requires clauses have been used. -enum OpenMPOffloadingRequiresDirFlags : int64_t { - /// flag undefined. - OMP_REQ_UNDEFINED = 0x000, - /// no requires clause present. - OMP_REQ_NONE = 0x001, - /// reverse_offload clause. - OMP_REQ_REVERSE_OFFLOAD = 0x002, - /// unified_address clause. - OMP_REQ_UNIFIED_ADDRESS = 0x004, - /// unified_shared_memory clause. - OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, - /// dynamic_allocators clause. - OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) -}; - -} // anonymous namespace - /// Describes ident structure that describes a source location. /// All descriptions are taken from /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h @@ -1054,12 +1034,15 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, - isGPU(), hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); - OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice - ? CGM.getLangOpts().OMPHostIRFile - : StringRef{}); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), + CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); + OMPBuilder.initialize(); + OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice + ? CGM.getLangOpts().OMPHostIRFile + : StringRef{}); OMPBuilder.setConfig(Config); } @@ -1091,9 +1074,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), - /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); + /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), - /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); + /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); const CGFunctionInfo &FnInfo = @@ -1150,7 +1133,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( if (const Expr *Init = D->getInitializer()) { Initializer = emitCombinerOrInitializer( CGM, D->getType(), - D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init + D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init : nullptr, cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), @@ -1458,6 +1441,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); llvm::CallInst *Call = CGF.Builder.CreateCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_global_thread_num), @@ -1502,103 +1486,6 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } -llvm::FunctionCallee -CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, - bool IsGPUDistribute) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name; - if (IsGPUDistribute) - Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" - : "__kmpc_distribute_static_init_4u") - : (IVSigned ? "__kmpc_distribute_static_init_8" - : "__kmpc_distribute_static_init_8u"); - else - Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); - - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto *PtrTy = llvm::PointerType::getUnqual(ITy); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy, // p_stride - ITy, // incr - ITy // chunk - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") - : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - ITy, // lower - ITy, // upper - ITy, // stride - ITy // chunk - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") - : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") - : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto *PtrTy = llvm::PointerType::getUnqual(ITy); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy // p_stride - }; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD) { std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = @@ -1653,7 +1540,7 @@ static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); } @@ -1667,7 +1554,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; auto LinkageForVariable = [&VD, this]() { - return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + return CGM.getLLVMLinkageVarDefinition(VD); }; std::vector<llvm::GlobalVariable *> GeneratedRefs; @@ -1761,7 +1648,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, /*Id=*/nullptr, CGM.getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&Dst); const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1793,7 +1680,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, /*Id=*/nullptr, CGM.getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&Dst); const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1861,134 +1748,39 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, - llvm::GlobalVariable *Addr, - bool PerformInit) { - if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsTargetDevice) - return false; - std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory)) - return CGM.getLangOpts().OpenMPIsTargetDevice; - VD = VD->getDefinition(CGM.getContext()); - assert(VD && "Unknown VarDecl"); - - if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) - return CGM.getLangOpts().OpenMPIsTargetDevice; +void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, + llvm::GlobalValue *GV) { + std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = + OMPDeclareTargetDeclAttr::getActiveAttr(FD); - QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); - - // Produce the unique prefix to identify the new target regions. We use - // the source location of the variable declaration which we know to not - // conflict with any target region. - llvm::TargetRegionEntryInfo EntryInfo = - getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName()); - SmallString<128> Buffer, Out; - OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); - - const Expr *Init = VD->getAnyInitializer(); - if (CGM.getLangOpts().CPlusPlus && PerformInit) { - llvm::Constant *Ctor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsTargetDevice) { - // Generate function that re-emits the declaration's initializer into - // the threadprivate copy of the variable VD - CodeGenFunction CtorCGF(CGM); + // We only need to handle active 'indirect' declare target functions. + if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) + return; - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_ctor"), FI, Loc, false, - llvm::GlobalValue::WeakODRLinkage); - Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); - if (CGM.getTriple().isAMDGCN()) - Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); - CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); - llvm::Constant *AddrInAS0 = Addr; - if (Addr->getAddressSpace() != 0) - AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); - CtorCGF.EmitAnyExprToMem(Init, - Address(AddrInAS0, Addr->getValueType(), - CGM.getContext().getDeclAlign(VD)), - Init->getType().getQualifiers(), - /*IsInitializer=*/true); - CtorCGF.FinishFunction(); - Ctor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - } else { - Ctor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); - ID = Ctor; - } + // Get a mangled name to store the new device global in. + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); + SmallString<128> Name; + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); - // Register the information for the entry associated with the constructor. - Out.clear(); - auto CtorEntryInfo = EntryInfo; - CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); - OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( - CtorEntryInfo, Ctor, ID, - llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); + // We need to generate a new global to hold the address of the indirectly + // called device function. Doing this allows us to keep the visibility and + // linkage of the associated function unchanged while allowing the runtime to + // access its value. + llvm::GlobalValue *Addr = GV; + if (CGM.getLangOpts().OpenMPIsTargetDevice) { + Addr = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, + nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); + Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); } - if (VD->getType().isDestructedType() != QualType::DK_none) { - llvm::Constant *Dtor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsTargetDevice) { - // Generate function that emits destructor call for the threadprivate - // copy of the variable VD - CodeGenFunction DtorCGF(CGM); - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_dtor"), FI, Loc, false, - llvm::GlobalValue::WeakODRLinkage); - Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); - if (CGM.getTriple().isAMDGCN()) - Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); - DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - // Create a scope with an artificial location for the body of this - // function. - auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); - llvm::Constant *AddrInAS0 = Addr; - if (Addr->getAddressSpace() != 0) - AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); - DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), - CGM.getContext().getDeclAlign(VD)), - ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), - DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); - DtorCGF.FinishFunction(); - Dtor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - } else { - Dtor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); - ID = Dtor; - } - // Register the information for the entry associated with the destructor. - Out.clear(); - auto DtorEntryInfo = EntryInfo; - DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); - OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( - DtorEntryInfo, Dtor, ID, - llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); - } - return CGM.getLangOpts().OpenMPIsTargetDevice; + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( + Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, + llvm::GlobalValue::WeakODRLinkage); } Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, @@ -2161,11 +1953,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getName({Prefix, "var"}); - llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); - llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace()); - if (PtrAlign > llvm::Align(G->getAlignment())) - G->setAlignment(PtrAlign); - return G; + return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); } namespace { @@ -2353,9 +2141,9 @@ static llvm::Value *emitCopyprivateCopyFunction( // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); const auto &CGFI = @@ -2451,7 +2239,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); QualType CopyprivateArrayTy = C.getConstantArrayType( - C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = @@ -2753,7 +2541,8 @@ void CGOpenMPRuntime::emitForDispatchInit( CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; - CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); + CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), + Args); } static void emitForStaticInitCall( @@ -2820,7 +2609,8 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); + OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, + false); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -2839,7 +2629,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit( bool isGPUDistribute = CGM.getLangOpts().OpenMPIsTargetDevice && (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); - StaticInitFunction = createForStaticInitFunction( + StaticInitFunction = OMPBuilder.createForStaticInitFunction( Values.IVSize, Values.IVSigned, isGPUDistribute); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, @@ -2883,7 +2673,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); + CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), + Args); } llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, @@ -2903,8 +2694,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, UB.getPointer(), // &Upper ST.getPointer() // &Stride }; - llvm::Value *Call = - CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); + llvm::Value *Call = CGF.EmitRuntimeCall( + OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), CGF.getContext().BoolTy, Loc); @@ -2993,8 +2784,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), E = CGM.getContext().getSourceManager().fileinfo_end(); I != E; ++I) { - if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && - I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { + if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && + I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { Loc = CGM.getContext().getSourceManager().translateFileLineCol( I->getFirst(), EntryInfo.Line, 1); break; @@ -3121,7 +2912,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_int32 liter; // void * reductions; // }; - RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); + RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); UD->startDefinition(); addFieldToRecordDecl(C, UD, KmpInt32Ty); addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); @@ -3187,10 +2978,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); const auto &TaskEntryFnInfo = @@ -3289,10 +3080,10 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); const auto &DestructorFnInfo = @@ -3349,7 +3140,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(PrivatesQTy).withConst().withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&TaskPrivatesArg); llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; unsigned Counter = 1; @@ -3359,7 +3150,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3370,7 +3161,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3381,7 +3172,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3395,7 +3186,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); PrivateVarsPos[VD] = Counter; ++Counter; } @@ -3599,12 +3390,12 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); @@ -4018,12 +3809,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, VK_PRValue); CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); - KmpTaskAffinityInfoArrayTy = - C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, - /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); + KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( + KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // Properly emit variable-sized array. auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); CGF.EmitVarDecl(*PD); AffinitiesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, @@ -4032,7 +3823,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( KmpTaskAffinityInfoTy, llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, - ArrayType::Normal, /*IndexTypeQuals=*/0); + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); AffinitiesArray = CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); @@ -4477,12 +4268,12 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); KmpDependInfoArrayTy = - C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, + C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); // Properly emit variable-sized array. auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); CGF.EmitVarDecl(*PD); DependenciesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, @@ -4490,7 +4281,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( } else { KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, - ArrayType::Normal, /*IndexTypeQuals=*/0); + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); @@ -4570,7 +4361,7 @@ Address CGOpenMPRuntime::emitDepobjDependClause( } else { QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), - nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); Size = CGM.getSize(Sz.alignTo(Align)); NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); @@ -5012,9 +4803,9 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); const auto &CGFI = @@ -5186,9 +4977,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ++Size; } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType ReductionArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); const auto *IPriv = Privates.begin(); @@ -5451,9 +5242,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, VoidPtrTy.addRestrict(); FunctionArgList Args; ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&Param); Args.emplace_back(&ParamOrig); const auto &FnInfo = @@ -5522,9 +5313,9 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); FunctionArgList Args; ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&ParamInOut); Args.emplace_back(&ParamIn); const auto &FnInfo = @@ -5594,7 +5385,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&Param); const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -5657,8 +5448,9 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( QualType RDType = C.getRecordType(RD); unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); - QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); + QualType ArrayRDType = + C.getConstantArrayType(RDType, ArraySize, nullptr, + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, @@ -6082,6 +5874,42 @@ void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, {ThreadId, AllocatorVal}); } +void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( + const OMPExecutableDirective &D, CodeGenFunction &CGF, + int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, + int32_t &MaxTeamsVal) { + + getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); + getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, + /*UpperBoundOnly=*/true); + + for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { + for (auto *A : C->getAttrs()) { + int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; + int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; + if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) + CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, + &AttrMinBlocksVal, &AttrMaxBlocksVal); + else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) + CGM.handleAMDGPUFlatWorkGroupSizeAttr( + nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, + &AttrMaxThreadsVal); + else + continue; + + MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); + if (AttrMaxThreadsVal > 0) + MaxThreadsVal = MaxThreadsVal > 0 + ? std::min(MaxThreadsVal, AttrMaxThreadsVal) + : AttrMaxThreadsVal; + MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); + if (AttrMaxBlocksVal > 0) + MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) + : AttrMaxBlocksVal; + } + } +} + void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -6100,18 +5928,20 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); }; - // Get NumTeams and ThreadLimit attributes - int32_t DefaultValTeams = -1; - int32_t DefaultValThreads = -1; - getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); - getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); - OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, - DefaultValTeams, DefaultValThreads, IsOffloadEntry, OutlinedFn, OutlinedFnID); - if (OutlinedFn != nullptr) - CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + if (!OutlinedFn) + return; + + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + + for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { + for (auto *A : C->getAttrs()) { + if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) + CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); + } + } } /// Checks if the expression is constant or does not have non-trivial function @@ -6167,8 +5997,8 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, } const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, + int32_t &MaxTeamsVal) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && @@ -6189,22 +6019,22 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); return NumTeams; } - DefaultVal = 0; + MinTeamsVal = MaxTeamsVal = 0; return nullptr; } if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; } - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; } // A value of -1 is used to check if we need to emit no teams region - DefaultVal = -1; + MinTeamsVal = MaxTeamsVal = -1; return nullptr; } case OMPD_target_teams_loop: @@ -6218,10 +6048,10 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); return NumTeams; } - DefaultVal = 0; + MinTeamsVal = MaxTeamsVal = 0; return nullptr; } case OMPD_target_parallel: @@ -6229,7 +6059,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_target_parallel_for_simd: case OMPD_target_parallel_loop: case OMPD_target_simd: - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; case OMPD_parallel: case OMPD_for: @@ -6304,8 +6134,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( "Clauses associated with the teams directive expected to be emitted " "only for the host!"); CGBuilderTy &Bld = CGF.Builder; - int32_t DefaultNT = -1; - const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); + int32_t MinNT = -1, MaxNT = -1; + const Expr *NumTeams = + getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); if (NumTeams != nullptr) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); @@ -6335,239 +6166,142 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( } } - return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); + assert(MinNT == MaxNT && "Num threads ranges require handling here."); + return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); } -static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, - llvm::Value *DefaultThreadLimitVal) { +/// Check for a num threads constant value (stored in \p DefaultVal), or +/// expression (stored in \p E). If the value is conditional (via an if-clause), +/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are +/// nullptr, no expression evaluation is perfomed. +static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, + const Expr **E, int32_t &UpperBound, + bool UpperBoundOnly, llvm::Value **CondVal) { const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); - if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { - if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { - llvm::Value *NumThreads = nullptr; - llvm::Value *CondVal = nullptr; - // Handle if clause. If if clause present, the number of threads is - // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. - if (Dir->hasClausesOfKind<OMPIfClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const OMPIfClause *IfClause = nullptr; - for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { - if (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_parallel) { - IfClause = C; - break; - } + const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); + if (!Dir) + return; + + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const OMPIfClause *IfClause = nullptr; + for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; } - if (IfClause) { - const Expr *Cond = IfClause->getCondition(); - bool Result; - if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { - if (!Result) - return CGF.Builder.getInt32(1); - } else { - CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); - } + } + if (IfClause) { + const Expr *CondExpr = IfClause->getCondition(); + bool Result; + if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) { + UpperBound = 1; + return; + } + } else { + CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); } } - CondVal = CGF.EvaluateExprAsBool(Cond); + *CondVal = CGF.EvaluateExprAsBool(CondExpr); } } } - // Check the value of num_threads clause iff if clause was not specified - // or is not evaluated to false. - if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const auto *NumThreadsClause = - Dir->getSingleClause<OMPNumThreadsClause>(); - CodeGenFunction::LexicalScope Scope( - CGF, NumThreadsClause->getNumThreads()->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); - } + } + // Check the value of num_threads clause iff if clause was not specified + // or is not evaluated to false. + if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *NumThreadsClause = + Dir->getSingleClause<OMPNumThreadsClause>(); + const Expr *NTExpr = NumThreadsClause->getNumThreads(); + if (NTExpr->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) + UpperBound = + UpperBound + ? Constant->getZExtValue() + : std::min(UpperBound, + static_cast<int32_t>(Constant->getZExtValue())); + // If we haven't found a upper bound, remember we saw a thread limiting + // clause. + if (UpperBound == -1) + UpperBound = 0; + if (!E) + return; + CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); } } - NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); - NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, - /*isSigned=*/false); - if (DefaultThreadLimitVal) - NumThreads = CGF.Builder.CreateSelect( - CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), - DefaultThreadLimitVal, NumThreads); - } else { - NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); - } - // Process condition of the if clause. - if (CondVal) { - NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, - CGF.Builder.getInt32(1)); } - return NumThreads; + *E = NTExpr; } - if (isOpenMPSimdDirective(Dir->getDirectiveKind())) - return CGF.Builder.getInt32(1); + return; } - return DefaultThreadLimitVal; + if (isOpenMPSimdDirective(Dir->getDirectiveKind())) + UpperBound = 1; } const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, + bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { + assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - switch (DirectiveKind) { - case OMPD_target: - // Teams have no clause thread_limit - return nullptr; - case OMPD_target_teams: - case OMPD_target_teams_distribute: - if (D.hasClausesOfKind<OMPThreadLimitClause>()) { - const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); - if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) - if (auto Constant = - ThreadLimit->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); - return ThreadLimit; - } - return nullptr; - case OMPD_target_teams_loop: - case OMPD_target_parallel_loop: - case OMPD_target_parallel: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: { - Expr *ThreadLimit = nullptr; - Expr *NumThreads = nullptr; - if (D.hasClausesOfKind<OMPThreadLimitClause>()) { - const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - ThreadLimit = ThreadLimitClause->getThreadLimit(); - if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) - if (auto Constant = - ThreadLimit->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); - } - if (D.hasClausesOfKind<OMPNumThreadsClause>()) { - const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); - NumThreads = NumThreadsClause->getNumThreads(); - if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { - if (auto Constant = - NumThreads->getIntegerConstantExpr(CGF.getContext())) { - if (Constant->getExtValue() < DefaultVal) { - DefaultVal = Constant->getExtValue(); - ThreadLimit = NumThreads; - } - } - } + const Expr *NT = nullptr; + const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; + + auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { + if (E->isIntegerConstantExpr(CGF.getContext())) { + if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) + UpperBound = UpperBound ? Constant->getZExtValue() + : std::min(UpperBound, + int32_t(Constant->getZExtValue())); } - return ThreadLimit; - } - case OMPD_target_teams_distribute_simd: - case OMPD_target_simd: - DefaultVal = 1; - return nullptr; - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_tile: - case OMPD_unroll: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - break; - default: - break; - } - llvm_unreachable("Unsupported directive kind."); -} + // If we haven't found a upper bound, remember we saw a thread limiting + // clause. + if (UpperBound == -1) + UpperBound = 0; + if (EPtr) + *EPtr = E; + }; + + auto ReturnSequential = [&]() { + UpperBound = 1; + return NT; + }; -llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsTargetDevice && - "Clauses associated with the teams directive expected to be emitted " - "only for the host!"); - OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); - assert(isOpenMPTargetExecutionDirective(DirectiveKind) && - "Expected target-based executable directive."); - CGBuilderTy &Bld = CGF.Builder; - llvm::Value *ThreadLimitVal = nullptr; - llvm::Value *NumThreadsVal = nullptr; switch (DirectiveKind) { case OMPD_target: { const CapturedStmt *CS = D.getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); // TODO: The standard is not clear how to resolve two thread limit clauses, @@ -6576,30 +6310,28 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { ThreadLimitClause = TLC; - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - CodeGenFunction::LexicalScope Scope( - CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); + if (ThreadLimitExpr) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + CodeGenFunction::LexicalScope Scope( + CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } } } } } } - if (ThreadLimitClause) { - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); - } + if (ThreadLimitClause) + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { @@ -6608,53 +6340,41 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( CGF.getContext(), CS->getCapturedStmt()); Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); } - if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && - !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { CS = Dir->getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; - } - if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) - return Bld.getInt32(1); + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); + } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) + return ReturnSequential(); } - return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + return NT; } case OMPD_target_teams: { if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } const CapturedStmt *CS = D.getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (Dir->getDirectiveKind() == OMPD_distribute) { CS = Dir->getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); } } - return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + return NT; } case OMPD_target_teams_distribute: if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } - if (llvm::Value *NumThreads = - getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) - return NumThreads; - return Bld.getInt32(0); + getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, + UpperBoundOnly, CondVal); + return NT; case OMPD_target_teams_loop: case OMPD_target_parallel_loop: case OMPD_target_parallel: @@ -6662,10 +6382,7 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { - llvm::Value *CondVal = nullptr; - // Handle if clause. If if clause present, the number of threads is - // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. - if (D.hasClausesOfKind<OMPIfClause>()) { + if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { const OMPIfClause *IfClause = nullptr; for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { if (C->getNameModifier() == OMPD_unknown || @@ -6679,109 +6396,92 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( bool Result; if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { if (!Result) - return Bld.getInt32(1); + return ReturnSequential(); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); - CondVal = CGF.EvaluateExprAsBool(Cond); + *CondVal = CGF.EvaluateExprAsBool(Cond); } } } if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } if (D.hasClausesOfKind<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); - llvm::Value *NumThreads = CGF.EmitScalarExpr( - NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); - NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); - ThreadLimitVal = ThreadLimitVal - ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, - ThreadLimitVal), - NumThreadsVal, ThreadLimitVal) - : NumThreadsVal; + CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); + return NumThreadsClause->getNumThreads(); } - if (!ThreadLimitVal) - ThreadLimitVal = Bld.getInt32(0); - if (CondVal) - return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); - return ThreadLimitVal; + return NT; } case OMPD_target_teams_distribute_simd: case OMPD_target_simd: - return Bld.getInt32(1); - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_tile: - case OMPD_unroll: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_metadirective: - case OMPD_unknown: - break; + return ReturnSequential(); default: break; } llvm_unreachable("Unsupported directive kind."); } +llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D) { + llvm::Value *NumThreadsVal = nullptr; + llvm::Value *CondVal = nullptr; + llvm::Value *ThreadLimitVal = nullptr; + const Expr *ThreadLimitExpr = nullptr; + int32_t UpperBound = -1; + + const Expr *NT = getNumThreadsExprForTargetDirective( + CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, + &ThreadLimitExpr); + + // Thread limit expressions are used below, emit them. + if (ThreadLimitExpr) { + ThreadLimitVal = + CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); + ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, + /*isSigned=*/false); + } + + // Generate the num teams expression. + if (UpperBound == 1) { + NumThreadsVal = CGF.Builder.getInt32(UpperBound); + } else if (NT) { + NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); + NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, + /*isSigned=*/false); + } else if (ThreadLimitVal) { + // If we do not have a num threads value but a thread limit, replace the + // former with the latter. We know handled the thread limit expression. + NumThreadsVal = ThreadLimitVal; + ThreadLimitVal = nullptr; + } else { + // Default to "0" which means runtime choice. + assert(!ThreadLimitVal && "Default not applicable with thread limit value"); + NumThreadsVal = CGF.Builder.getInt32(0); + } + + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (CondVal) { + CodeGenFunction::RunCleanupsScope Scope(CGF); + NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, + CGF.Builder.getInt32(1)); + } + + // If the thread limit and num teams expression were present, take the + // minimum. + if (ThreadLimitVal) { + NumThreadsVal = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), + ThreadLimitVal, NumThreadsVal); + } + + return NumThreadsVal; +} + namespace { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -7689,7 +7389,14 @@ private: } else if (FieldIndex < PartialStruct.LowestElem.first) { PartialStruct.LowestElem = {FieldIndex, LowestElem}; } else if (FieldIndex > PartialStruct.HighestElem.first) { - PartialStruct.HighestElem = {FieldIndex, LowestElem}; + if (IsFinalArraySection) { + Address HB = + CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) + .getAddress(CGF); + PartialStruct.HighestElem = {FieldIndex, HB}; + } else { + PartialStruct.HighestElem = {FieldIndex, LowestElem}; + } } } @@ -7930,30 +7637,6 @@ private: OpenMPOffloadMappingFlags::OMP_MAP_FROM; } - static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Rotate by getFlagMemberOffset() bits. - return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << getFlagMemberOffset()); - } - - static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, - OpenMPOffloadMappingFlags MemberOfFlag) { - // If the entry is PTR_AND_OBJ but has not been marked with the special - // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be - // marked as MEMBER_OF. - if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && - static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != - OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) - return; - - // Reset the placeholder value to prepare the flag for the assignment of the - // proper MEMBER_OF value. - Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; - Flags |= MemberOfFlag; - } - void getPlainLayout(const CXXRecordDecl *RD, llvm::SmallVectorImpl<const FieldDecl *> &Layout, bool AsBase) const { @@ -8021,6 +7704,7 @@ private: /// the device pointers info array. void generateAllInfoForClauses( ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { // We have to process the component lists that relate with the same @@ -8355,7 +8039,7 @@ private: if (PartialStruct.Base.isValid()) { CurInfo.NonContigInfo.Dims.push_back(0); emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, - /*IsMapThis*/ !VD, VD); + /*IsMapThis*/ !VD, OMPBuilder, VD); } // We need to append the results of this capture to what we already @@ -8422,6 +8106,7 @@ public: void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, MapFlagsArrayTy &CurTypes, const StructRangeInfoTy &PartialStruct, bool IsMapThis, + llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD = nullptr, bool NotTargetParams = true) const { if (CurTypes.size() == 1 && @@ -8456,7 +8141,7 @@ public: // of tofrom. // Emit this[:1] CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); - QualType Ty = MD->getThisType()->getPointeeType(); + QualType Ty = MD->getFunctionObjectParameterType(); llvm::Value *Size = CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, /*isSigned=*/true); @@ -8509,9 +8194,9 @@ public: // (except for PTR_AND_OBJ entries which do not have a placeholder value // 0xFFFF in the MEMBER_OF field). OpenMPOffloadMappingFlags MemberOfFlag = - getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); + OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); for (auto &M : CurTypes) - setCorrectMemberOfFlag(M, MemberOfFlag); + OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); } /// Generate all the base pointers, section pointers, sizes, map types, and @@ -8520,23 +8205,26 @@ public: /// pair of the relevant declaration and index where it occurs is appended to /// the device pointers info array. void generateAllInfo( - MapCombinedInfoTy &CombinedInfo, + MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); - generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); + generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, + SkipVarSet); } /// Generate all the base pointers, section pointers, sizes, map types, and /// mappers for the extracted map clauses of user-defined mapper (all included /// in \a CombinedInfo). - void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { + void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder) const { assert(CurDir.is<const OMPDeclareMapperDecl *>() && "Expect a declare mapper directive"); const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); - generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); + generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, + OMPBuilder); } /// Emit capture info for lambdas for variables captured by reference. @@ -8618,6 +8306,7 @@ public: /// Set correct indices for lambdas captures. void adjustMemberOfForLambdaCaptures( + llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapFlagsArrayTy &Types) const { @@ -8642,8 +8331,9 @@ public: // All other current entries will be MEMBER_OF the combined entry // (except for PTR_AND_OBJ entries which do not have a placeholder value // 0xFFFF in the MEMBER_OF field). - OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); - setCorrectMemberOfFlag(Types[I], MemberOfFlag); + OpenMPOffloadMappingFlags MemberOfFlag = + OMPBuilder.getMemberOfFlag(TgtIdx); + OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); } } @@ -9242,17 +8932,17 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Prepare mapper function arguments and attributes. ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&HandleArg); Args.push_back(&BaseArg); @@ -9265,7 +8955,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); SmallString<64> TyStr; llvm::raw_svector_ostream Out(TyStr); - CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); std::string Name = getName({"omp_mapper", TyStr, D->getName()}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -9337,7 +9027,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Get map clause information. Fill up the arrays with all mapped variables. MappableExprsHandler::MapCombinedInfoTy Info; MappableExprsHandler MEHandler(*D, MapperCGF); - MEHandler.generateAllInfoForMapper(Info); + MEHandler.generateAllInfoForMapper(Info, OMPBuilder); // Call the runtime API __tgt_mapper_num_components to get the number of // pre-existing components. @@ -9721,7 +9411,8 @@ static void emitTargetCallKernelLaunch( CombinedInfo.append(PartialStruct.PreliminaryMapData); MEHandler.emitCombinedEntry( CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), - nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty()); + OMPBuilder, nullptr, + !PartialStruct.PreliminaryMapData.BasePointers.empty()); } // We need to append the results of this capture to what we already have. @@ -9729,11 +9420,11 @@ static void emitTargetCallKernelLaunch( } // Adjust MEMBER_OF flags for the lambdas captures. MEHandler.adjustMemberOfForLambdaCaptures( - LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, - CombinedInfo.Types); + OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, + CombinedInfo.Pointers, CombinedInfo.Types); // Map any list items in a map clause that were not captures because they // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. @@ -9858,9 +9549,13 @@ void CGOpenMPRuntime::emitTargetCall( assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); - const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || - D.hasClausesOfKind<OMPNowaitClause>() || - D.hasClausesOfKind<OMPInReductionClause>(); + const bool RequiresOuterTask = + D.hasClausesOfKind<OMPDependClause>() || + D.hasClausesOfKind<OMPNowaitClause>() || + D.hasClausesOfKind<OMPInReductionClause>() || + (CGM.getLangOpts().OpenMP >= 51 && + needsTaskBasedThreadLimit(D.getDirectiveKind()) && + D.hasClausesOfKind<OMPThreadLimitClause>()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, @@ -10151,6 +9846,13 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + + // If this is an 'extern' declaration we defer to the canonical definition and + // do not emit an offloading entry. + if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && + VD->hasExternalStorage()) + return; + if (!Res) { if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Register non-target variables being emitted in device code (debug info @@ -10163,7 +9865,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; auto LinkageForVariable = [&VD, this]() { - return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + return CGM.getLLVMLinkageVarDefinition(VD); }; std::vector<llvm::GlobalVariable *> GeneratedRefs; @@ -10181,8 +9883,6 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, for (auto *ref : GeneratedRefs) CGM.addCompilerUsedGlobal(ref); - - return; } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { @@ -10331,7 +10031,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { std::string ReqName = getName({"omp_offloading", "requires_reg"}); RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); - OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; // TODO: check for other requires clauses. // The requires directive takes effect only when a target region is // present in the compilation unit. Otherwise it is ignored and not @@ -10341,11 +10040,10 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); - if (HasRequiresUnifiedSharedMemory) - Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___tgt_register_requires), - llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + llvm::ConstantInt::get( + CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags())); CGF.FinishFunction(); } return RequiresRegFn; @@ -10405,6 +10103,24 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, PushNumTeamsArgs); } +void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, + const Expr *ThreadLimit, + SourceLocation Loc) { + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadLimitVal = + ThreadLimit + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + + // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) + llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), + ThreadLimitVal}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_set_thread_limit), + ThreadLimitArgs); +} + void CGOpenMPRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, @@ -10417,11 +10133,6 @@ void CGOpenMPRuntime::emitTargetDataCalls( PrePostActionTy NoPrivAction; using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()); - InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), - CGF.Builder.GetInsertPoint()); - llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); llvm::Value *IfCondVal = nullptr; if (IfCond) @@ -10443,7 +10154,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( CGF.Builder.restoreIP(CodeGenIP); // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { return emitMappingInformation(CGF, OMPBuilder, MapExpr); @@ -10501,6 +10212,11 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); + llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); CGF.Builder.restoreIP(OMPBuilder.createTargetData( OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); @@ -10649,7 +10365,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. @@ -11327,8 +11043,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); - QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); + QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, + ArraySizeModifier::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -11380,7 +11096,7 @@ static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, nullptr, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 2ee2a39ba538..b01b39abd160 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -311,6 +311,14 @@ protected: /// An OpenMP-IR-Builder instance. llvm::OpenMPIRBuilder OMPBuilder; + /// Helper to determine the min/max number of threads/teams for \p D. + void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, + CodeGenFunction &CGF, + int32_t &MinThreadsVal, + int32_t &MaxThreadsVal, + int32_t &MinTeamsVal, + int32_t &MaxTeamsVal); + /// Helper to emit outlined function for 'target' directive. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. @@ -527,28 +535,6 @@ protected: /// Returns pointer to kmpc_micro type. llvm::Type *getKmpc_MicroPointerTy(); - /// Returns __kmpc_for_static_init_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. Will create a distribute call - /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. - llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, - bool IVSigned, - bool IsGPUDistribute); - - /// Returns __kmpc_dispatch_init_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. - llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize, - bool IVSigned); - - /// Returns __kmpc_dispatch_next_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. - llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize, - bool IVSigned); - - /// Returns __kmpc_dispatch_fini_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. - llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize, - bool IVSigned); - /// If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of /// storage that's reserved for use by the OpenMP runtime. @@ -659,21 +645,23 @@ public: /// Otherwise, return nullptr. const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal); + int32_t &MinTeamsVal, + int32_t &MaxTeamsVal); llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D); - /// Emit the number of threads for a target directive. Inspect the - /// thread_limit clause associated with a teams construct combined or closely - /// nested with the target directive. - /// - /// Emit the num_threads clause for directives such as 'target parallel' that - /// have no associated teams construct. - /// - /// Otherwise, return nullptr. - const Expr * - getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - int32_t &DefaultVal); + + /// Check for a number of threads upper bound constant value (stored in \p + /// UpperBound), or expression (returned). If the value is conditional (via an + /// if-clause), store the condition in \p CondExpr. Similarly, a potential + /// thread limit expression is stored in \p ThreadLimitExpr. If \p + /// UpperBoundOnly is true, no expression evaluation is perfomed. + const Expr *getNumThreadsExprForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + int32_t &UpperBound, bool UpperBoundOnly, + llvm::Value **CondExpr = nullptr, const Expr **ThreadLimitExpr = nullptr); + + /// Emit an expression that denotes the number of threads a target region + /// shall use. Will generate "i32 0" to allow the runtime to choose. llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D); @@ -1101,13 +1089,12 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); - /// Emit a code for initialization of declare target variable. - /// \param VD Declare target variable. - /// \param Addr Address of the global variable \a VD. + /// Emit code for handling declare target functions in the runtime. + /// \param FD Declare target function. + /// \param Addr Address of the global \a FD. /// \param PerformInit true if initialization expression is not constant. - virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, - llvm::GlobalVariable *Addr, - bool PerformInit); + virtual void emitDeclareTargetFunction(const FunctionDecl *FD, + llvm::GlobalValue *GV); /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. @@ -1449,6 +1436,14 @@ public: virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc); + /// Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 thread_limit) to generate code for + /// thread_limit clause on target directive + /// \param ThreadLimit An integer expression of threads. + virtual void emitThreadLimitClause(CodeGenFunction &CGF, + const Expr *ThreadLimit, + SourceLocation Loc); + /// Struct that keeps all the relevant information that should be kept /// throughout a 'target data' region. class TargetDataInfo : public llvm::OpenMPIRBuilder::TargetDataInfo { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 62aacb9e24d6..293ccaa3413c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -85,18 +85,6 @@ public: ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; } }; -/// GPU Configuration: This information can be derived from cuda registers, -/// however, providing compile time constants helps generate more efficient -/// code. For all practical purposes this is fine because the configuration -/// is the same for all known NVPTX architectures. -enum MachineConfiguration : unsigned { - /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Slot_Size - - /// Global memory alignment for performance. - GlobalMemoryAlignment = 128, -}; - static const ValueDecl *getPrivateItem(const Expr *RefExpr) { RefExpr = RefExpr->IgnoreParens(); if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) { @@ -119,31 +107,23 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) { return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); } - static RecordDecl *buildRecordForGlobalizedVars( ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, ArrayRef<const ValueDecl *> EscapedDeclsForTeams, llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> - &MappedDeclsFields, int BufSize) { + &MappedDeclsFields, + int BufSize) { using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>; if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) return nullptr; SmallVector<VarsDataTy, 4> GlobalizedVars; for (const ValueDecl *D : EscapedDecls) - GlobalizedVars.emplace_back( - CharUnits::fromQuantity(std::max( - C.getDeclAlign(D).getQuantity(), - static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))), - D); + GlobalizedVars.emplace_back(C.getDeclAlign(D), D); for (const ValueDecl *D : EscapedDeclsForTeams) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) { - return L.first > R.first; - }); // Build struct _globalized_locals_ty { - // /* globalized vars */[WarSize] align (max(decl_align, - // GlobalMemoryAlignment)) + // /* globalized vars */[WarSize] align (decl_align) // /* globalized vars */ for EscapedDeclsForTeams // }; RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); @@ -173,18 +153,18 @@ static RecordDecl *buildRecordForGlobalizedVars( Field->addAttr(*I); } } else { - llvm::APInt ArraySize(32, BufSize); - Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal, - 0); + if (BufSize > 1) { + llvm::APInt ArraySize(32, BufSize); + Type = C.getConstantArrayType(Type, ArraySize, nullptr, + ArraySizeModifier::Normal, 0); + } Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, C.getTrivialTypeSourceInfo(Type, SourceLocation()), /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); Field->setAccess(AS_public); - llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(), - static_cast<CharUnits::QuantityType>( - GlobalMemoryAlignment))); + llvm::APInt Align(32, Pair.first.getQuantity()); Field->addAttr(AlignedAttr::CreateImplicit( C, /*IsAlignmentExpr=*/true, IntegerLiteral::Create(C, Align, @@ -551,10 +531,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const { return CurrentExecutionMode; } -static CGOpenMPRuntimeGPU::DataSharingMode -getDataSharingMode(CodeGenModule &CGM) { - return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA - : CGOpenMPRuntimeGPU::Generic; +CGOpenMPRuntimeGPU::DataSharingMode +CGOpenMPRuntimeGPU::getDataSharingMode() const { + return CurrentDataSharingMode; } /// Check for inner (nested) SPMD construct, if any @@ -752,27 +731,30 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, EntryFunctionState EST; WrapperFunctionsMap.clear(); + [[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>(); + assert(!IsBareKernel && "bare kernel should not be at generic mode"); + // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { CGOpenMPRuntimeGPU::EntryFunctionState &EST; + const OMPExecutableDirective &D; public: - NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST) - : EST(EST) {} + NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST, + const OMPExecutableDirective &D) + : EST(EST), D(D) {} void Enter(CodeGenFunction &CGF) override { - auto &RT = - static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); - RT.emitKernelInit(CGF, EST, /* IsSPMD */ false); + auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); + RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ false); // Skip target region initialization. RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { - auto &RT = - static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); + auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); RT.clearLocThreadIdInsertPt(CGF); RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false); } - } Action(EST); + } Action(EST, D); CodeGen.setAction(Action); IsInTTDRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, @@ -780,10 +762,17 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, IsInTTDRegion = false; } -void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, +void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D, + CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { + int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1, + MaxTeamsVal = -1; + computeMinAndMaxThreadsAndTeams(D, CGF, MinThreadsVal, MaxThreadsVal, + MinTeamsVal, MaxTeamsVal); + CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD)); + Bld.restoreIP(OMPBuilder.createTargetInit( + Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal)); if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } @@ -794,8 +783,34 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, if (!IsSPMD) emitGenericVarsEpilog(CGF); + // This is temporary until we remove the fixed sized buffer. + ASTContext &C = CGM.getContext(); + RecordDecl *StaticRD = C.buildImplicitRecord( + "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union); + StaticRD->startDefinition(); + for (const RecordDecl *TeamReductionRec : TeamsReductions) { + QualType RecTy = C.getRecordType(TeamReductionRec); + auto *Field = FieldDecl::Create( + C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy, + C.getTrivialTypeSourceInfo(RecTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + StaticRD->addDecl(Field); + } + StaticRD->completeDefinition(); + QualType StaticTy = C.getRecordType(StaticRD); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + const auto &DL = CGM.getModule().getDataLayout(); + uint64_t ReductionDataSize = + TeamsReductions.empty() + ? 0 + : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue(); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD); + OMPBuilder.createTargetDeinit(Bld, ReductionDataSize, + C.getLangOpts().OpenMPCUDAReductionBufNum); + TeamsReductions.clear(); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -807,25 +822,40 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD); EntryFunctionState EST; + bool IsBareKernel = D.getSingleClause<OMPXBareClause>(); + // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { CGOpenMPRuntimeGPU &RT; CGOpenMPRuntimeGPU::EntryFunctionState &EST; + bool IsBareKernel; + DataSharingMode Mode; + const OMPExecutableDirective &D; public: NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT, - CGOpenMPRuntimeGPU::EntryFunctionState &EST) - : RT(RT), EST(EST) {} + CGOpenMPRuntimeGPU::EntryFunctionState &EST, + bool IsBareKernel, const OMPExecutableDirective &D) + : RT(RT), EST(EST), IsBareKernel(IsBareKernel), + Mode(RT.CurrentDataSharingMode), D(D) {} void Enter(CodeGenFunction &CGF) override { - RT.emitKernelInit(CGF, EST, /* IsSPMD */ true); + if (IsBareKernel) { + RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA; + return; + } + RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ true); // Skip target region initialization. RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { + if (IsBareKernel) { + RT.CurrentDataSharingMode = Mode; + return; + } RT.clearLocThreadIdInsertPt(CGF); RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true); } - } Action(*this, EST); + } Action(*this, EST, IsBareKernel, D); CodeGen.setAction(Action); IsInTTDRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, @@ -833,24 +863,6 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, IsInTTDRegion = false; } -// Create a unique global variable to indicate the execution mode of this target -// region. The execution mode is either 'generic', or 'spmd' depending on the -// target directive. This variable is picked up by the offload library to setup -// the device appropriately before kernel launch. If the execution mode is -// 'generic', the runtime reserves one warp for the master, otherwise, all -// warps participate in parallel work. -static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, - bool Mode) { - auto *GVMode = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC), - Twine(Name, "_exec_mode")); - GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); - CGM.addCompilerUsedGlobal(GVMode); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -861,26 +873,30 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); - if (Mode) + bool IsBareKernel = D.getSingleClause<OMPXBareClause>(); + if (Mode || IsBareKernel) emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - - setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) { - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, - isGPU(), hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), + CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsTargetDevice) llvm_unreachable("OpenMP can only handle device code."); + if (CGM.getLangOpts().OpenMPCUDAMode) + CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA; + llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty()) return; @@ -900,11 +916,7 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, ProcBindKind ProcBind, SourceLocation Loc) { - // Do nothing in case of SPMD mode and L0 parallel. - if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) - return; - - CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); + // Nothing to do. } void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF, @@ -1046,10 +1058,8 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( } void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, - SourceLocation Loc, - bool WithSPMDCheck) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic && - getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) + SourceLocation Loc) { + if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic) return; CGBuilderTy &Bld = CGF.Builder; @@ -1158,10 +1168,8 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared( {AddrSizePair.first, AddrSizePair.second}); } -void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, - bool WithSPMDCheck) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic && - getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) +void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) { + if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic) return; const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); @@ -1196,11 +1204,18 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; + bool IsBareKernel = D.getSingleClause<OMPXBareClause>(); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); + // We don't emit any thread id function call in bare kernel, but because the + // outlined function has a pointer argument, we emit a nullptr here. + if (IsBareKernel) + OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); + else + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -1405,9 +1420,7 @@ static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val, return CGF.Builder.CreateIntCast(Val, LLVMCastTy, CastTy->hasSignedIntegerRepresentation()); Address CastItem = CGF.CreateMemTemp(CastTy); - Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()), - Val->getType()); + Address ValCastItem = CastItem.withElementType(Val->getType()); CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy, LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo()); @@ -1543,11 +1556,6 @@ enum CopyAction : unsigned { RemoteLaneToThread, // ThreadCopy: Make a copy of a Reduce list on the thread's stack. ThreadCopy, - // ThreadToScratchpad: Copy a team-reduced array to the scratchpad. - ThreadToScratchpad, - // ScratchpadToThread: Copy from a scratchpad array in global memory - // containing team-reduced data to a thread's stack. - ScratchpadToThread, }; } // namespace @@ -1569,13 +1577,10 @@ static void emitReductionListCopy( CGBuilderTy &Bld = CGF.Builder; llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; - llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex; - llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth; // Iterates, element-by-element, through the source Reduce list and // make a copy. unsigned Idx = 0; - unsigned Size = Privates.size(); for (const Expr *Private : Privates) { Address SrcElementAddr = Address::invalid(); Address DestElementAddr = Address::invalid(); @@ -1585,10 +1590,6 @@ static void emitReductionListCopy( // Set to true to update the pointer in the dest Reduce list to a // newly created element. bool UpdateDestListPtr = false; - // Increment the src or dest pointer to the scratchpad, for each - // new element. - bool IncrScratchpadSrc = false; - bool IncrScratchpadDest = false; QualType PrivatePtrType = C.getPointerType(Private->getType()); llvm::Type *PrivateLlvmPtrType = CGF.ConvertType(PrivatePtrType); @@ -1624,49 +1625,6 @@ static void emitReductionListCopy( PrivatePtrType->castAs<PointerType>()); break; } - case ThreadToScratchpad: { - // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = CGF.EmitLoadOfPointer( - SrcElementPtrAddr.withElementType(PrivateLlvmPtrType), - PrivatePtrType->castAs<PointerType>()); - - // Step 1.2: Get the address for dest element: - // address = base + index * ElementSizeInChars. - llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); - llvm::Value *CurrentOffset = - Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); - llvm::Value *ScratchPadElemAbsolutePtrVal = - Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset); - ScratchPadElemAbsolutePtrVal = - Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); - DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, CGF.Int8Ty, - C.getTypeAlignInChars(Private->getType())); - IncrScratchpadDest = true; - break; - } - case ScratchpadToThread: { - // Step 1.1: Get the address for the src element in the scratchpad. - // address = base + index * ElementSizeInChars. - llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); - llvm::Value *CurrentOffset = - Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); - llvm::Value *ScratchPadElemAbsolutePtrVal = - Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset); - ScratchPadElemAbsolutePtrVal = - Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); - SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, CGF.Int8Ty, - C.getTypeAlignInChars(Private->getType())); - IncrScratchpadSrc = true; - - // Step 1.2: Create a temporary to store the element in the destination - // Reduce list. - DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); - DestElementAddr = - CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); - UpdateDestListPtr = true; - break; - } } // Regardless of src and dest of copy, we emit the load of src @@ -1724,39 +1682,6 @@ static void emitReductionListCopy( C.VoidPtrTy); } - // Step 4.1: Increment SrcBase/DestBase so that it points to the starting - // address of the next element in scratchpad memory, unless we're currently - // processing the last one. Memory alignment is also taken care of here. - if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { - // FIXME: This code doesn't make any sense, it's trying to perform - // integer arithmetic on pointers. - llvm::Value *ScratchpadBasePtr = - IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); - llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); - ScratchpadBasePtr = Bld.CreateNUWAdd( - ScratchpadBasePtr, - Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars)); - - // Take care of global memory alignment for performance - ScratchpadBasePtr = Bld.CreateNUWSub( - ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateUDiv( - ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); - ScratchpadBasePtr = Bld.CreateNUWAdd( - ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateNUWMul( - ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); - - if (IncrScratchpadDest) - DestBase = - Address(ScratchpadBasePtr, CGF.VoidPtrTy, CGF.getPointerAlign()); - else /* IncrScratchpadSrc = true */ - SrcBase = - Address(ScratchpadBasePtr, CGF.VoidPtrTy, CGF.getPointerAlign()); - } - ++Idx; } } @@ -1784,12 +1709,12 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // At the stage of the computation when this function is called, partially // aggregated values reside in the first lane of every active warp. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // NumWarps: number of warps active in the parallel region. This could // be smaller than 32 (max warps in a CTA) for partial block reduction. ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getIntTypeForBitwidth(32, /* Signed */ true), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&NumWarpsArg); @@ -1914,12 +1839,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); // Casting to actual data type. // MediumPtr = (CopyType*)MediumPtrAddr; - Address MediumPtr( - Bld.CreateBitCast( - MediumPtrVal, - CopyType->getPointerTo( - MediumPtrVal->getType()->getPointerAddressSpace())), - CopyType, Align); + Address MediumPtr(MediumPtrVal, CopyType, Align); // elem = *elemptr //*MediumPtr = elem @@ -1966,12 +1886,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, TransferMedium->getValueType(), TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); // SrcMediumVal = *SrcMediumPtr; - Address SrcMediumPtr( - Bld.CreateBitCast( - SrcMediumPtrVal, - CopyType->getPointerTo( - SrcMediumPtrVal->getType()->getPointerAddressSpace())), - CopyType, Align); + Address SrcMediumPtr(SrcMediumPtrVal, CopyType, Align); // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); @@ -2082,16 +1997,16 @@ static llvm::Function *emitShuffleAndReduceFunction( // Thread local Reduce list used to host the values of data to be reduced. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // Current lane id; could be logical. ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // Offset of the remote source lane relative to the current lane. ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.ShortTy, ImplicitParamDecl::Other); + C.ShortTy, ImplicitParamKind::Other); // Algorithm version. This is expected to be known at compile time. ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.ShortTy, ImplicitParamDecl::Other); + C.ShortTy, ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&LaneIDArg); @@ -2243,13 +2158,13 @@ static llvm::Value *emitListToGlobalCopyFunction( // Buffer: global reduction buffer. ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // Idx: index of the buffer. ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // ReduceList: thread local Reduce list. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&BufferArg); Args.push_back(&IdxArg); @@ -2282,8 +2197,7 @@ static llvm::Value *emitListToGlobalCopyFunction( llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), LLVMReductionsBufferTy->getPointerTo()); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2301,12 +2215,12 @@ static llvm::Value *emitListToGlobalCopyFunction( const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); // Global = Buffer.VD[Idx]; const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(), - GlobAddr.getPointer(), Idxs); - GlobLVal.setAddress(Address(BufferPtr, + GlobLVal.setAddress(Address(GlobAddr.getPointer(), CGF.ConvertTypeForMem(Private->getType()), GlobAddr.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { @@ -2356,13 +2270,13 @@ static llvm::Value *emitListToGlobalReduceFunction( // Buffer: global reduction buffer. ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // Idx: index of the buffer. ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // ReduceList: thread local Reduce list. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&BufferArg); Args.push_back(&IdxArg); @@ -2393,8 +2307,7 @@ static llvm::Value *emitListToGlobalReduceFunction( Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2403,12 +2316,13 @@ static llvm::Value *emitListToGlobalReduceFunction( // Global = Buffer.VD[Idx]; const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( - GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false, + C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; @@ -2450,13 +2364,13 @@ static llvm::Value *emitGlobalToListCopyFunction( // Buffer: global reduction buffer. ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // Idx: index of the buffer. ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // ReduceList: thread local Reduce list. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&BufferArg); Args.push_back(&IdxArg); @@ -2490,8 +2404,7 @@ static llvm::Value *emitGlobalToListCopyFunction( CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), LLVMReductionsBufferTy->getPointerTo()); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2509,12 +2422,12 @@ static llvm::Value *emitGlobalToListCopyFunction( const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); // Global = Buffer.VD[Idx]; const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(), - GlobAddr.getPointer(), Idxs); - GlobLVal.setAddress(Address(BufferPtr, + GlobLVal.setAddress(Address(GlobAddr.getPointer(), CGF.ConvertTypeForMem(Private->getType()), GlobAddr.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { @@ -2564,13 +2477,13 @@ static llvm::Value *emitGlobalToListReduceFunction( // Buffer: global reduction buffer. ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); // Idx: index of the buffer. ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // ReduceList: thread local Reduce list. ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&BufferArg); Args.push_back(&IdxArg); @@ -2601,8 +2514,7 @@ static llvm::Value *emitGlobalToListReduceFunction( Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); - llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), - CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), /*Volatile=*/false, C.IntTy, Loc)}; unsigned Idx = 0; @@ -2611,12 +2523,13 @@ static llvm::Value *emitGlobalToListReduceFunction( // Global = Buffer.VD[Idx]; const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); const FieldDecl *FD = VarFieldMap.lookup(VD); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs); LValue GlobLVal = CGF.EmitLValueForField( - CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD); Address GlobAddr = GlobLVal.getAddress(CGF); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( - GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false, + C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; @@ -2907,15 +2820,25 @@ void CGOpenMPRuntimeGPU::emitReduction( assert((TeamsReduction || ParallelReduction) && "Invalid reduction selection in emitReduction."); + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap; + llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size()); + int Cnt = 0; + for (const Expr *DRE : Privates) { + PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl(); + ++Cnt; + } + + ASTContext &C = CGM.getContext(); + const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars( + CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, 1); + // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), // RedList, shuffle_reduce_func, interwarp_copy_func); // or // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>); llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::Value *Res; - ASTContext &C = CGM.getContext(); // 1. Build a list of reduction variables. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); @@ -2925,9 +2848,9 @@ void CGOpenMPRuntimeGPU::emitReduction( ++Size; } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType ReductionArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); @@ -2957,19 +2880,17 @@ void CGOpenMPRuntimeGPU::emitReduction( llvm::Function *ReductionFn = emitReductionFunction( CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), Privates, LHSExprs, RHSExprs, ReductionOps); - llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *ReductionDataSize = + CGF.getTypeSize(C.getRecordType(ReductionRec)); + ReductionDataSize = + CGF.Builder.CreateSExtOrTrunc(ReductionDataSize, CGF.Int64Ty); llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); llvm::Value *InterWarpCopyFn = emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); if (ParallelReduction) { - llvm::Value *Args[] = {RTLoc, - ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, + llvm::Value *Args[] = {RTLoc, ReductionDataSize, RL, ShuffleAndReduceFn, InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( @@ -2978,42 +2899,27 @@ void CGOpenMPRuntimeGPU::emitReduction( Args); } else { assert(TeamsReduction && "expected teams reduction."); - llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap; - llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size()); - int Cnt = 0; - for (const Expr *DRE : Privates) { - PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl(); - ++Cnt; - } - const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( - CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, - C.getLangOpts().OpenMPCUDAReductionBufNum); - TeamsReductions.push_back(TeamReductionRec); - if (!KernelTeamsReductionPtr) { - KernelTeamsReductionPtr = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, nullptr, - "_openmp_teams_reductions_buffer_$_$ptr"); - } - llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar( - Address(KernelTeamsReductionPtr, CGF.VoidPtrTy, CGM.getPointerAlign()), - /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + TeamsReductions.push_back(ReductionRec); + auto *KernelTeamsReductionPtr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_reduction_get_fixed_buffer), + {}, "_openmp_teams_reductions_buffer_$_$ptr"); llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction( - CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap); llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction( - CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap, ReductionFn); llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction( - CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap); llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction( - CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap, ReductionFn); llvm::Value *Args[] = { RTLoc, - ThreadId, - GlobalBufferPtr, + KernelTeamsReductionPtr, CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum), + ReductionDataSize, RL, ShuffleAndReduceFn, InterWarpCopyFn, @@ -3055,14 +2961,7 @@ void CGOpenMPRuntimeGPU::emitReduction( ++IRHS; } }; - llvm::Value *EndArgs[] = {ThreadId}; RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, std::nullopt, - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), - EndArgs); - RCG.setAction(Action); RCG(CGF); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -3092,7 +2991,7 @@ CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, if (isa<ImplicitParamDecl>(NativeParam)) return ImplicitParamDecl::Create( CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), - NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); + NativeParam->getIdentifier(), ArgType, ImplicitParamKind::Other); return ParmVarDecl::Create( CGM.getContext(), const_cast<DeclContext *>(NativeParam->getDeclContext()), @@ -3118,11 +3017,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF, QualType TargetTy = TargetParam->getType(); llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); - // First cast to generic. - TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TargetAddr, - llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace=*/0)); - // Cast from generic to native address space. + // Cast to native address space. TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TargetAddr, llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace)); @@ -3149,11 +3044,8 @@ void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall( TargetArgs.emplace_back(NativeArg); continue; } - llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - NativeArg, - llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace*/ 0)); TargetArgs.emplace_back( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NativeArg, TargetType)); } CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs); } @@ -3175,10 +3067,10 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false); ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(), /*Id=*/nullptr, Int16QTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(), /*Id=*/nullptr, Int32QTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); WrapperArgs.emplace_back(&ParallelLevelArg); WrapperArgs.emplace_back(&WrapperArg); @@ -3291,7 +3183,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic) + if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic) return; assert(D && "Expected function or captured|block decl."); @@ -3343,13 +3235,13 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, Data.insert(std::make_pair(VD, MappedVarData())); } if (!NeedToDelayGlobalization) { - emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); + emitGenericVarsProlog(CGF, D->getBeginLoc()); struct GlobalizationScope final : EHScopeStack::Cleanup { GlobalizationScope() = default; void Emit(CodeGenFunction &CGF, Flags flags) override { static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true); + .emitGenericVarsEpilog(CGF); } }; CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup); @@ -3400,7 +3292,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF, VarTy, Align); } - if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic) + if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic) return Address::invalid(); VD = VD->getCanonicalDecl(); @@ -3633,6 +3525,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX1103: case CudaArch::GFX1150: case CudaArch::GFX1151: + case CudaArch::GFX1200: + case CudaArch::GFX1201: case CudaArch::Generic: case CudaArch::UNUSED: case CudaArch::UNKNOWN: @@ -3645,42 +3539,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( CGOpenMPRuntime::processRequiresDirective(D); } -void CGOpenMPRuntimeGPU::clear() { - - if (!TeamsReductions.empty()) { - ASTContext &C = CGM.getContext(); - RecordDecl *StaticRD = C.buildImplicitRecord( - "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union); - StaticRD->startDefinition(); - for (const RecordDecl *TeamReductionRec : TeamsReductions) { - QualType RecTy = C.getRecordType(TeamReductionRec); - auto *Field = FieldDecl::Create( - C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy, - C.getTrivialTypeSourceInfo(RecTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - StaticRD->addDecl(Field); - } - StaticRD->completeDefinition(); - QualType StaticTy = C.getRecordType(StaticRD); - llvm::Type *LLVMReductionsBufferTy = - CGM.getTypes().ConvertTypeForMem(StaticTy); - // FIXME: nvlink does not handle weak linkage correctly (object with the - // different size are reported as erroneous). - // Restore CommonLinkage as soon as nvlink is fixed. - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), LLVMReductionsBufferTy, - /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::Constant::getNullValue(LLVMReductionsBufferTy), - "_openmp_teams_reductions_buffer_$_"); - KernelTeamsReductionPtr->setInitializer( - llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, - CGM.VoidPtrTy)); - } - CGOpenMPRuntime::clear(); -} - llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index dddfe5a94dcc..141436f26230 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -32,6 +32,18 @@ public: /// Unknown execution mode (orphaned directive). EM_Unknown, }; + + /// Target codegen is specialized based on two data-sharing modes: CUDA, in + /// which the local variables are actually global threadlocal, and Generic, in + /// which the local variables are placed in global memory if they may escape + /// their declaration context. + enum DataSharingMode { + /// CUDA data sharing mode. + DS_CUDA, + /// Generic data-sharing mode. + DS_Generic, + }; + private: /// Parallel outlined function work for workers to execute. llvm::SmallVector<llvm::Function *, 16> Work; @@ -42,23 +54,24 @@ private: ExecutionMode getExecutionMode() const; + DataSharingMode getDataSharingMode() const; + /// Get barrier to synchronize all threads in a block. void syncCTAThreads(CodeGenFunction &CGF); /// Helper for target directive initialization. - void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, - bool IsSPMD); + void emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF, + EntryFunctionState &EST, bool IsSPMD); /// Helper for target directive finalization. void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD); /// Helper for generic variables globalization prolog. - void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, - bool WithSPMDCheck = false); + void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc); /// Helper for generic variables globalization epilog. - void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false); + void emitGenericVarsEpilog(CodeGenFunction &CGF); // // Base class overrides. @@ -117,7 +130,6 @@ protected: public: explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); - void clear() override; bool isGPU() const override { return true; }; @@ -297,17 +309,6 @@ public: Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override; - /// Target codegen is specialized based on two data-sharing modes: CUDA, in - /// which the local variables are actually global threadlocal, and Generic, in - /// which the local variables are placed in global memory if they may escape - /// their declaration context. - enum DataSharingMode { - /// CUDA data sharing mode. - CUDA, - /// Generic data-sharing mode. - Generic, - }; - /// Cleans up references to the objects in finished function. /// void functionFinished(CodeGenFunction &CGF) override; @@ -343,6 +344,10 @@ private: /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; + /// Track the data sharing mode when codegening directives within a target + /// region. + DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic; + /// true if currently emitting code for target/teams/distribute region, false /// - otherwise. bool IsInTTDRegion = false; @@ -380,7 +385,6 @@ private: /// Maps the function to the list of the globalized variables with their /// addresses. llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; - llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; /// List of the records with the list of fields for the reductions across the /// teams. Used to build the intermediate buffer for the fast teams /// reductions. diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 888b7ddcccd3..cbfa79e10bfe 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -658,12 +658,13 @@ void CGRecordLowering::computeVolatileBitfields() { void CGRecordLowering::accumulateVPtrs() { if (Layout.hasOwnVFPtr()) - Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, - llvm::FunctionType::get(getIntNType(32), /*isVarArg=*/true)-> - getPointerTo()->getPointerTo())); + Members.push_back( + MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, + llvm::PointerType::getUnqual(Types.getLLVMContext()))); if (Layout.hasOwnVBPtr()) - Members.push_back(MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr, - llvm::Type::getInt32PtrTy(Types.getLLVMContext()))); + Members.push_back( + MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr, + llvm::PointerType::getUnqual(Types.getLLVMContext()))); } void CGRecordLowering::accumulateVBases() { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 2184b8600d76..a5cb80640641 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -407,8 +407,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { EmitOMPInteropDirective(cast<OMPInteropDirective>(*S)); break; case Stmt::OMPDispatchDirectiveClass: - llvm_unreachable("Dispatch directive not supported yet."); + CGM.ErrorUnsupported(S, "OpenMP dispatch directive"); break; + case Stmt::OMPScopeDirectiveClass: + llvm_unreachable("scope not supported with FE outlining"); case Stmt::OMPMaskedDirectiveClass: EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S)); break; @@ -1297,8 +1299,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr); assert(ReturnLocation.isValid() && "No valid return location"); - Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy), - ReturnLocation); + Builder.CreateStore(SLocPtr, ReturnLocation); } // Returning from an outlined SEH helper is UB, and we already warn on it. @@ -2418,6 +2419,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, } } +static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF, + const AsmStmt &S) { + constexpr auto Name = "__ASM__hipstdpar_unsupported"; + + StringRef Asm; + if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S)) + Asm = GCCAsm->getAsmString()->getString(); + + auto &Ctx = CGF->CGM.getLLVMContext(); + + auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm); + auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), + {StrTy->getType()}, false); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + CGF->Builder.CreateCall(UBF, {StrTy}); +} + void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Pop all cleanup blocks at the end of the asm statement. CodeGenFunction::RunCleanupsScope Cleanups(*this); @@ -2429,27 +2448,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos; SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos; - for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) { + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; + bool IsValidTargetAsm = true; + for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S)) Name = GAS->getOutputName(i); TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name); bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid; - assert(IsValid && "Failed to parse output constraint"); + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse output constraint"); OutputConstraintInfos.push_back(Info); } - for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) { + for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) { StringRef Name; if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S)) Name = GAS->getInputName(i); TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name); bool IsValid = getTarget().validateInputConstraint(OutputConstraintInfos, Info); - assert(IsValid && "Failed to parse input constraint"); (void)IsValid; + if (IsHipStdPar && !IsValid) + IsValidTargetAsm = false; + else + assert(IsValid && "Failed to parse input constraint"); InputConstraintInfos.push_back(Info); } + if (!IsValidTargetAsm) + return EmitHipStdParUnsupportedAsm(this, S); + std::string Constraints; std::vector<LValue> ResultRegDests; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4910ff6865e4..ed426098ac69 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -485,7 +485,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, ArgType, - ImplicitParamDecl::ThreadPrivateVar); + ImplicitParamKind::ThreadPrivateVar); } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { Arg = ParmVarDecl::Create( Ctx, DebugFunctionDecl, @@ -494,7 +494,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); } else { Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), - II, ArgType, ImplicitParamDecl::Other); + II, ArgType, ImplicitParamKind::Other); } Args.emplace_back(Arg); // Do not cast arguments if we emit function with non-original types. @@ -667,11 +667,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, I->second.first ? I->second.first->getType() : Arg->getType(), AlignmentSource::Decl); if (LV.getType()->isAnyComplexType()) - LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - LV.getAddress(WrapperCGF), - PI->getType()->getPointerTo( - LV.getAddress(WrapperCGF).getAddressSpace()), - PI->getType())); + LV.setAddress(LV.getAddress(WrapperCGF).withElementType(PI->getType())); CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } else { auto EI = VLASizes.find(Arg); @@ -2562,9 +2558,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, (void)CGF.EmitOMPLinearClauseInit(S); { CodeGenFunction::OMPPrivateScope LoopScope(CGF); + CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPPrivateLoopCounters(S, LoopScope); CGF.EmitOMPLinearClause(S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( CGF, S, CGF.EmitLValue(S.getIterationVariable())); @@ -4828,8 +4824,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), ParamTypes, /*isVarArg=*/false); - CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CopyFn, CopyFnTy->getPointerTo()); CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { @@ -4991,18 +4985,18 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, QualType Ty, CapturedDecl *CD, SourceLocation Loc) { auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); auto *OrigRef = DeclRefExpr::Create( C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); auto *PrivateRef = DeclRefExpr::Create( C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); QualType ElemType = C.getBaseElementType(Ty); auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); auto *InitRef = DeclRefExpr::Create( C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); @@ -5062,7 +5056,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( - getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, + getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); BPVD = createImplicitFirstprivateForType( getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); @@ -5070,7 +5064,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); QualType SizesType = getContext().getConstantArrayType( getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), - ArrSize, nullptr, ArrayType::Normal, + ArrSize, nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, S.getBeginLoc()); @@ -5115,8 +5109,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), ParamTypes, /*isVarArg=*/false); - CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CopyFn, CopyFnTy->getPointerTo()); CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : PrivatePtrs) { @@ -5143,6 +5135,15 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( Action.Enter(CGF); OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); + auto *TL = S.getSingleClause<OMPThreadLimitClause>(); + if (CGF.CGM.getLangOpts().OpenMP >= 51 && + needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) { + // Emit __kmpc_set_thread_limit() to set the thread_limit for the task + // enclosing this target region. This will indirectly set the thread_limit + // for every applicable construct within target region. + CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( + CGF, TL->getThreadLimit(), S.getBeginLoc()); + } BodyGen(CGF); }; llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( @@ -6205,7 +6206,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, X.getAddress(CGF).getElementType()); } llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); + CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } @@ -6507,6 +6508,10 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, IsPostfixUpdate, IsFailOnly, Loc); break; } + case OMPC_fail: { + //TODO + break; + } default: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } @@ -8055,7 +8060,8 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( D.getDirectiveKind() == OMPD_critical || D.getDirectiveKind() == OMPD_section || D.getDirectiveKind() == OMPD_master || - D.getDirectiveKind() == OMPD_masked) { + D.getDirectiveKind() == OMPD_masked || + D.getDirectiveKind() == OMPD_unroll) { EmitStmt(D.getAssociatedStmt()); } else { auto LPCRegion = diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp index 22790147c6f5..1d3f14f1c534 100644 --- a/clang/lib/CodeGen/CGVTT.cpp +++ b/clang/lib/CodeGen/CGVTT.cpp @@ -93,6 +93,11 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, if (CGM.supportsCOMDAT() && VTT->isWeakForLinker()) VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName())); + + // Set the visibility. This will already have been set on the VTT declaration. + // Set it again, now that we have a definition, as the implicit visibility can + // apply differently to definitions. + CGM.setGVProperties(VTT, RD); } llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 91dd7a8e046b..27a2cab4f753 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> #include <cstdio> +#include <utility> using namespace clang; using namespace CodeGen; @@ -201,7 +202,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, // Find the first store of "this", which will be to the alloca associated // with "this". Address ThisPtr = - Address(&*AI, ConvertTypeForMem(MD->getThisType()->getPointeeType()), + Address(&*AI, ConvertTypeForMem(MD->getFunctionObjectParameterType()), CGM.getClassPointerAlignment(MD->getParent())); llvm::BasicBlock *EntryBB = &Fn->front(); llvm::BasicBlock::iterator ThisStore = @@ -464,10 +465,6 @@ void CodeGenFunction::generateThunk(llvm::Function *Fn, llvm::Constant *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); - // Fix up the function type for an unprototyped musttail call. - if (IsUnprototyped) - Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType()); - // Make the call and return the result. EmitCallAndReturnForThunk(llvm::FunctionCallee(Fn->getFunctionType(), Callee), &Thunk, IsUnprototyped); @@ -536,11 +533,8 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD, Name.str(), &CGM.getModule()); CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn, /*IsThunk=*/false); - // If needed, replace the old thunk with a bitcast. if (!OldThunkFn->use_empty()) { - llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(ThunkFn, OldThunkFn->getType()); - OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl); + OldThunkFn->replaceAllUsesWith(ThunkFn); } // Remove the old thunk. @@ -639,8 +633,16 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder, // want the stub/proxy to be emitted for properly calculating the offset. // Examples where there would be no symbol emitted are available_externally // and private linkages. - auto stubLinkage = vtableHasLocalLinkage ? llvm::GlobalValue::InternalLinkage - : llvm::GlobalValue::ExternalLinkage; + // + // `internal` linkage results in STB_LOCAL Elf binding while still manifesting a + // local symbol. + // + // `linkonce_odr` linkage results in a STB_DEFAULT Elf binding but also allows for + // the rtti_proxy to be transparently replaced with a GOTPCREL reloc by a + // target that supports this replacement. + auto stubLinkage = vtableHasLocalLinkage + ? llvm::GlobalValue::InternalLinkage + : llvm::GlobalValue::LinkOnceODRLinkage; llvm::Constant *target; if (auto *func = dyn_cast<llvm::Function>(globalVal)) { @@ -1303,48 +1305,42 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - if (!getCodeGenOpts().LTOUnit) + // Emit type metadata on vtables with LTO or IR instrumentation. + // In IR instrumentation, the type metadata is used to find out vtable + // definitions (for type profiling) among all global variables. + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); - typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint; + struct AddressPoint { + const CXXRecordDecl *Base; + size_t Offset; + std::string TypeName; + bool operator<(const AddressPoint &RHS) const { + int D = TypeName.compare(RHS.TypeName); + return D < 0 || (D == 0 && Offset < RHS.Offset); + } + }; std::vector<AddressPoint> AddressPoints; - for (auto &&AP : VTLayout.getAddressPoints()) - AddressPoints.push_back(std::make_pair( - AP.first.getBase(), VTLayout.getVTableOffset(AP.second.VTableIndex) + - AP.second.AddressPointIndex)); + for (auto &&AP : VTLayout.getAddressPoints()) { + AddressPoint N{AP.first.getBase(), + VTLayout.getVTableOffset(AP.second.VTableIndex) + + AP.second.AddressPointIndex, + {}}; + llvm::raw_string_ostream Stream(N.TypeName); + getCXXABI().getMangleContext().mangleCanonicalTypeName( + QualType(N.Base->getTypeForDecl(), 0), Stream); + AddressPoints.push_back(std::move(N)); + } // Sort the address points for determinism. - llvm::sort(AddressPoints, [this](const AddressPoint &AP1, - const AddressPoint &AP2) { - if (&AP1 == &AP2) - return false; - - std::string S1; - llvm::raw_string_ostream O1(S1); - getCXXABI().getMangleContext().mangleTypeName( - QualType(AP1.first->getTypeForDecl(), 0), O1); - O1.flush(); - - std::string S2; - llvm::raw_string_ostream O2(S2); - getCXXABI().getMangleContext().mangleTypeName( - QualType(AP2.first->getTypeForDecl(), 0), O2); - O2.flush(); - - if (S1 < S2) - return true; - if (S1 != S2) - return false; - - return AP1.second < AP2.second; - }); + llvm::sort(AddressPoints); ArrayRef<VTableComponent> Comps = VTLayout.vtable_components(); for (auto AP : AddressPoints) { // Create type metadata for the address point. - AddVTableTypeMetadata(VTable, ComponentWidth * AP.second, AP.first); + AddVTableTypeMetadata(VTable, ComponentWidth * AP.Offset, AP.Base); // The class associated with each address point could also potentially be // used for indirect calls via a member function pointer, so we need to @@ -1356,7 +1352,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::Metadata *MD = CreateMetadataIdentifierForVirtualMemPtrType( Context.getMemberPointerType( Comps[I].getFunctionDecl()->getType(), - Context.getRecordType(AP.first).getTypePtr())); + Context.getRecordType(AP.Base).getTypePtr())); VTable->addTypeMetadata((ComponentWidth * I).getQuantity(), MD); } } diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp index d3a16a1d5acc..a6073e1188d6 100644 --- a/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -65,9 +65,8 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, ArrayRef<CanQualType> argTypes, FunctionType::ExtInfo info, RequiredArgs args) { - return CGM.getTypes().arrangeLLVMFunctionInfo( - returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes, - info, {}, args); + return CGM.getTypes().arrangeLLVMFunctionInfo(returnType, FnInfoOpts::None, + argTypes, info, {}, args); } ImplicitCXXConstructorArgs diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index a3b72381d73f..bb6b1a3bc228 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenAction.h" +#include "BackendConsumer.h" #include "CGCall.h" #include "CodeGenModule.h" #include "CoverageMappingGen.h" @@ -48,428 +49,374 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Utils/Cloning.h" -#include <memory> #include <optional> using namespace clang; using namespace llvm; #define DEBUG_TYPE "codegenaction" -namespace clang { - class BackendConsumer; - class ClangDiagnosticHandler final : public DiagnosticHandler { - public: - ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon) - : CodeGenOpts(CGOpts), BackendCon(BCon) {} - - bool handleDiagnostics(const DiagnosticInfo &DI) override; - - bool isAnalysisRemarkEnabled(StringRef PassName) const override { - return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName); - } - bool isMissedOptRemarkEnabled(StringRef PassName) const override { - return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName); - } - bool isPassedOptRemarkEnabled(StringRef PassName) const override { - return CodeGenOpts.OptimizationRemark.patternMatches(PassName); - } - - bool isAnyRemarkEnabled() const override { - return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() || - CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() || - CodeGenOpts.OptimizationRemark.hasValidPattern(); - } +namespace llvm { +extern cl::opt<bool> ClRelinkBuiltinBitcodePostop; +} - private: - const CodeGenOptions &CodeGenOpts; - BackendConsumer *BackendCon; - }; +namespace clang { +class BackendConsumer; +class ClangDiagnosticHandler final : public DiagnosticHandler { +public: + ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon) + : CodeGenOpts(CGOpts), BackendCon(BCon) {} - static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, - const CodeGenOptions &CodeGenOpts) { - handleAllErrors( - std::move(E), - [&](const LLVMRemarkSetupFileError &E) { - Diags.Report(diag::err_cannot_open_file) - << CodeGenOpts.OptRecordFile << E.message(); - }, - [&](const LLVMRemarkSetupPatternError &E) { - Diags.Report(diag::err_drv_optimization_remark_pattern) - << E.message() << CodeGenOpts.OptRecordPasses; - }, - [&](const LLVMRemarkSetupFormatError &E) { - Diags.Report(diag::err_drv_optimization_remark_format) - << CodeGenOpts.OptRecordFormat; - }); - } + bool handleDiagnostics(const DiagnosticInfo &DI) override; - class BackendConsumer : public ASTConsumer { - using LinkModule = CodeGenAction::LinkModule; + bool isAnalysisRemarkEnabled(StringRef PassName) const override { + return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName); + } + bool isMissedOptRemarkEnabled(StringRef PassName) const override { + return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName); + } + bool isPassedOptRemarkEnabled(StringRef PassName) const override { + return CodeGenOpts.OptimizationRemark.patternMatches(PassName); + } - virtual void anchor(); - DiagnosticsEngine &Diags; - BackendAction Action; - const HeaderSearchOptions &HeaderSearchOpts; - const CodeGenOptions &CodeGenOpts; - const TargetOptions &TargetOpts; - const LangOptions &LangOpts; - std::unique_ptr<raw_pwrite_stream> AsmOutStream; - ASTContext *Context; - IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; + bool isAnyRemarkEnabled() const override { + return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() || + CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() || + CodeGenOpts.OptimizationRemark.hasValidPattern(); + } - Timer LLVMIRGeneration; - unsigned LLVMIRGenerationRefCount; +private: + const CodeGenOptions &CodeGenOpts; + BackendConsumer *BackendCon; +}; - /// True if we've finished generating IR. This prevents us from generating - /// additional LLVM IR after emitting output in HandleTranslationUnit. This - /// can happen when Clang plugins trigger additional AST deserialization. - bool IRGenFinished = false; +static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, + const CodeGenOptions &CodeGenOpts) { + handleAllErrors( + std::move(E), + [&](const LLVMRemarkSetupFileError &E) { + Diags.Report(diag::err_cannot_open_file) + << CodeGenOpts.OptRecordFile << E.message(); + }, + [&](const LLVMRemarkSetupPatternError &E) { + Diags.Report(diag::err_drv_optimization_remark_pattern) + << E.message() << CodeGenOpts.OptRecordPasses; + }, + [&](const LLVMRemarkSetupFormatError &E) { + Diags.Report(diag::err_drv_optimization_remark_format) + << CodeGenOpts.OptRecordFormat; + }); +} - bool TimerIsEnabled = false; +BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, + const std::string &InFile, + SmallVector<LinkModule, 4> LinkModules, + std::unique_ptr<raw_pwrite_stream> OS, + LLVMContext &C, + CoverageSourceInfo *CoverageInfo) + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), + AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), + LLVMIRGenerationRefCount(0), + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, + PPOpts, CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { + TimerIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; +} - std::unique_ptr<CodeGenerator> Gen; +// This constructor is used in installing an empty BackendConsumer +// to use the clang diagnostic handler for IR input files. It avoids +// initializing the OS field. +BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, + const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, + const LangOptions &LangOpts, + llvm::Module *Module, + SmallVector<LinkModule, 4> LinkModules, + LLVMContext &C, + CoverageSourceInfo *CoverageInfo) + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), + Context(nullptr), FS(VFS), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), + LLVMIRGenerationRefCount(0), + Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, + PPOpts, CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)), CurLinkModule(Module) { + TimerIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; +} - SmallVector<LinkModule, 4> LinkModules; +llvm::Module* BackendConsumer::getModule() const { + return Gen->GetModule(); +} - // A map from mangled names to their function's source location, used for - // backend diagnostics as the Clang AST may be unavailable. We actually use - // the mangled name's hash as the key because mangled names can be very - // long and take up lots of space. Using a hash can cause name collision, - // but that is rare and the consequences are pointing to a wrong source - // location which is not severe. This is a vector instead of an actual map - // because we optimize for time building this map rather than time - // retrieving an entry, as backend diagnostics are uncommon. - std::vector<std::pair<llvm::hash_code, FullSourceLoc>> - ManglingFullSourceLocs; +std::unique_ptr<llvm::Module> BackendConsumer::takeModule() { + return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); +} - // This is here so that the diagnostic printer knows the module a diagnostic - // refers to. - llvm::Module *CurLinkModule = nullptr; +CodeGenerator* BackendConsumer::getCodeGenerator() { + return Gen.get(); +} - public: - BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, const std::string &InFile, - SmallVector<LinkModule, 4> LinkModules, - std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, - PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)) { - TimerIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; - } +void BackendConsumer::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { + Gen->HandleCXXStaticMemberVarInstantiation(VD); +} - // This constructor is used in installing an empty BackendConsumer - // to use the clang diagnostic handler for IR input files. It avoids - // initializing the OS field. - BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, llvm::Module *Module, - SmallVector<LinkModule, 4> LinkModules, LLVMContext &C, - CoverageSourceInfo *CoverageInfo = nullptr) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, - PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)), CurLinkModule(Module) { - TimerIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; - } - llvm::Module *getModule() const { return Gen->GetModule(); } - std::unique_ptr<llvm::Module> takeModule() { - return std::unique_ptr<llvm::Module>(Gen->ReleaseModule()); - } +void BackendConsumer::Initialize(ASTContext &Ctx) { + assert(!Context && "initialized multiple times"); - CodeGenerator *getCodeGenerator() { return Gen.get(); } + Context = &Ctx; - void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { - Gen->HandleCXXStaticMemberVarInstantiation(VD); - } + if (TimerIsEnabled) + LLVMIRGeneration.startTimer(); - void Initialize(ASTContext &Ctx) override { - assert(!Context && "initialized multiple times"); + Gen->Initialize(Ctx); - Context = &Ctx; + if (TimerIsEnabled) + LLVMIRGeneration.stopTimer(); +} - if (TimerIsEnabled) - LLVMIRGeneration.startTimer(); +bool BackendConsumer::HandleTopLevelDecl(DeclGroupRef D) { + PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(), + Context->getSourceManager(), + "LLVM IR generation of declaration"); - Gen->Initialize(Ctx); + // Recurse. + if (TimerIsEnabled) { + LLVMIRGenerationRefCount += 1; + if (LLVMIRGenerationRefCount == 1) + LLVMIRGeneration.startTimer(); + } - if (TimerIsEnabled) - LLVMIRGeneration.stopTimer(); - } + Gen->HandleTopLevelDecl(D); - bool HandleTopLevelDecl(DeclGroupRef D) override { - PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(), - Context->getSourceManager(), - "LLVM IR generation of declaration"); + if (TimerIsEnabled) { + LLVMIRGenerationRefCount -= 1; + if (LLVMIRGenerationRefCount == 0) + LLVMIRGeneration.stopTimer(); + } - // Recurse. - if (TimerIsEnabled) { - LLVMIRGenerationRefCount += 1; - if (LLVMIRGenerationRefCount == 1) - LLVMIRGeneration.startTimer(); - } + return true; +} - Gen->HandleTopLevelDecl(D); +void BackendConsumer::HandleInlineFunctionDefinition(FunctionDecl *D) { + PrettyStackTraceDecl CrashInfo(D, SourceLocation(), + Context->getSourceManager(), + "LLVM IR generation of inline function"); + if (TimerIsEnabled) + LLVMIRGeneration.startTimer(); - if (TimerIsEnabled) { - LLVMIRGenerationRefCount -= 1; - if (LLVMIRGenerationRefCount == 0) - LLVMIRGeneration.stopTimer(); - } + Gen->HandleInlineFunctionDefinition(D); - return true; - } + if (TimerIsEnabled) + LLVMIRGeneration.stopTimer(); +} - void HandleInlineFunctionDefinition(FunctionDecl *D) override { - PrettyStackTraceDecl CrashInfo(D, SourceLocation(), - Context->getSourceManager(), - "LLVM IR generation of inline function"); - if (TimerIsEnabled) - LLVMIRGeneration.startTimer(); +void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) { + // Ignore interesting decls from the AST reader after IRGen is finished. + if (!IRGenFinished) + HandleTopLevelDecl(D); +} - Gen->HandleInlineFunctionDefinition(D); +// Links each entry in LinkModules into our module. Returns true on error. +bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) { - if (TimerIsEnabled) - LLVMIRGeneration.stopTimer(); - } + for (auto &LM : LinkModules) { + assert(LM.Module && "LinkModule does not actually have a module"); - void HandleInterestingDecl(DeclGroupRef D) override { - // Ignore interesting decls from the AST reader after IRGen is finished. - if (!IRGenFinished) - HandleTopLevelDecl(D); - } + // If ShouldLinkFiles is not set, skip files added via the + // -mlink-bitcode-files, only linking -mlink-builtin-bitcode + if (!LM.Internalize && !ShouldLinkFiles) + continue; - // Links each entry in LinkModules into our module. Returns true on error. - bool LinkInModules(llvm::Module *M) { - for (auto &LM : LinkModules) { - assert(LM.Module && "LinkModule does not actually have a module"); - if (LM.PropagateAttrs) - for (Function &F : *LM.Module) { - // Skip intrinsics. Keep consistent with how intrinsics are created - // in LLVM IR. - if (F.isIntrinsic()) - continue; - CodeGen::mergeDefaultFunctionDefinitionAttributes( - F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize); - } + if (LM.PropagateAttrs) + for (Function &F : *LM.Module) { + // Skip intrinsics. Keep consistent with how intrinsics are created + // in LLVM IR. + if (F.isIntrinsic()) + continue; + CodeGen::mergeDefaultFunctionDefinitionAttributes( + F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize); + } - CurLinkModule = LM.Module.get(); + CurLinkModule = LM.Module.get(); + bool Err; - bool Err; - if (LM.Internalize) { - Err = Linker::linkModules( - *M, std::move(LM.Module), LM.LinkFlags, - [](llvm::Module &M, const llvm::StringSet<> &GVS) { - internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { - return !GV.hasName() || (GVS.count(GV.getName()) == 0); - }); + auto DoLink = [&](auto &Mod) { + if (LM.Internalize) { + Err = Linker::linkModules( + *M, std::move(Mod), LM.LinkFlags, + [](llvm::Module &M, const llvm::StringSet<> &GVS) { + internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { + return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); - } else { - Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags); - } - - if (Err) - return true; - } - LinkModules.clear(); - return false; // success - } + }); + } else + Err = Linker::linkModules(*M, std::move(Mod), LM.LinkFlags); + }; - void HandleTranslationUnit(ASTContext &C) override { - { - llvm::TimeTraceScope TimeScope("Frontend"); - PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); - if (TimerIsEnabled) { - LLVMIRGenerationRefCount += 1; - if (LLVMIRGenerationRefCount == 1) - LLVMIRGeneration.startTimer(); - } + // Create a Clone to move to the linker, which preserves the original + // linking modules, allowing them to be linked again in the future + if (ClRelinkBuiltinBitcodePostop) { + // TODO: If CloneModule() is updated to support cloning of unmaterialized + // modules, we can remove this + if (Error E = CurLinkModule->materializeAll()) + return false; - Gen->HandleTranslationUnit(C); + std::unique_ptr<llvm::Module> Clone = llvm::CloneModule(*LM.Module); - if (TimerIsEnabled) { - LLVMIRGenerationRefCount -= 1; - if (LLVMIRGenerationRefCount == 0) - LLVMIRGeneration.stopTimer(); - } - - IRGenFinished = true; - } + DoLink(Clone); + } + // Otherwise we can link (and clean up) the original modules + else { + DoLink(LM.Module); + } + } - // Silently ignore if we weren't initialized for some reason. - if (!getModule()) - return; + return false; // success +} - LLVMContext &Ctx = getModule()->getContext(); - std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = - Ctx.getDiagnosticHandler(); - Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>( - CodeGenOpts, this)); +void BackendConsumer::HandleTranslationUnit(ASTContext &C) { + { + llvm::TimeTraceScope TimeScope("Frontend"); + PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); + if (TimerIsEnabled) { + LLVMIRGenerationRefCount += 1; + if (LLVMIRGenerationRefCount == 1) + LLVMIRGeneration.startTimer(); + } - Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = - setupLLVMOptimizationRemarks( - Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, - CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, - CodeGenOpts.DiagnosticsHotnessThreshold); + Gen->HandleTranslationUnit(C); - if (Error E = OptRecordFileOrErr.takeError()) { - reportOptRecordError(std::move(E), Diags, CodeGenOpts); - return; - } + if (TimerIsEnabled) { + LLVMIRGenerationRefCount -= 1; + if (LLVMIRGenerationRefCount == 0) + LLVMIRGeneration.stopTimer(); + } - std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = - std::move(*OptRecordFileOrErr); + IRGenFinished = true; + } - if (OptRecordFile && - CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) - Ctx.setDiagnosticsHotnessRequested(true); + // Silently ignore if we weren't initialized for some reason. + if (!getModule()) + return; - if (CodeGenOpts.MisExpect) { - Ctx.setMisExpectWarningRequested(true); - } + LLVMContext &Ctx = getModule()->getContext(); + std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = + Ctx.getDiagnosticHandler(); + Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>( + CodeGenOpts, this)); - if (CodeGenOpts.DiagnosticsMisExpectTolerance) { - Ctx.setDiagnosticsMisExpectTolerance( - CodeGenOpts.DiagnosticsMisExpectTolerance); - } + Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = + setupLLVMOptimizationRemarks( + Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, + CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, + CodeGenOpts.DiagnosticsHotnessThreshold); - // Link each LinkModule into our module. - if (LinkInModules(getModule())) - return; + if (Error E = OptRecordFileOrErr.takeError()) { + reportOptRecordError(std::move(E), Diags, CodeGenOpts); + return; + } - for (auto &F : getModule()->functions()) { - if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) { - auto Loc = FD->getASTContext().getFullLoc(FD->getLocation()); - // TODO: use a fast content hash when available. - auto NameHash = llvm::hash_value(F.getName()); - ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc)); - } - } + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = + std::move(*OptRecordFileOrErr); - if (CodeGenOpts.ClearASTBeforeBackend) { - LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n"); - // Access to the AST is no longer available after this. - // Other things that the ASTContext manages are still available, e.g. - // the SourceManager. It'd be nice if we could separate out all the - // things in ASTContext used after this point and null out the - // ASTContext, but too many various parts of the ASTContext are still - // used in various parts. - C.cleanup(); - C.getAllocator().Reset(); - } + if (OptRecordFile && + CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) + Ctx.setDiagnosticsHotnessRequested(true); - EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); + if (CodeGenOpts.MisExpect) { + Ctx.setMisExpectWarningRequested(true); + } - EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, - LangOpts, C.getTargetInfo().getDataLayoutString(), - getModule(), Action, FS, std::move(AsmOutStream)); + if (CodeGenOpts.DiagnosticsMisExpectTolerance) { + Ctx.setDiagnosticsMisExpectTolerance( + CodeGenOpts.DiagnosticsMisExpectTolerance); + } - Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); + // Link each LinkModule into our module. + if (LinkInModules(getModule())) + return; - if (OptRecordFile) - OptRecordFile->keep(); + for (auto &F : getModule()->functions()) { + if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) { + auto Loc = FD->getASTContext().getFullLoc(FD->getLocation()); + // TODO: use a fast content hash when available. + auto NameHash = llvm::hash_value(F.getName()); + ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc)); } + } - void HandleTagDeclDefinition(TagDecl *D) override { - PrettyStackTraceDecl CrashInfo(D, SourceLocation(), - Context->getSourceManager(), - "LLVM IR generation of declaration"); - Gen->HandleTagDeclDefinition(D); - } + if (CodeGenOpts.ClearASTBeforeBackend) { + LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n"); + // Access to the AST is no longer available after this. + // Other things that the ASTContext manages are still available, e.g. + // the SourceManager. It'd be nice if we could separate out all the + // things in ASTContext used after this point and null out the + // ASTContext, but too many various parts of the ASTContext are still + // used in various parts. + C.cleanup(); + C.getAllocator().Reset(); + } - void HandleTagDeclRequiredDefinition(const TagDecl *D) override { - Gen->HandleTagDeclRequiredDefinition(D); - } + EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); - void CompleteTentativeDefinition(VarDecl *D) override { - Gen->CompleteTentativeDefinition(D); - } + EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, + C.getTargetInfo().getDataLayoutString(), getModule(), + Action, FS, std::move(AsmOutStream), this); - void CompleteExternalDeclaration(VarDecl *D) override { - Gen->CompleteExternalDeclaration(D); - } + Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); - void AssignInheritanceModel(CXXRecordDecl *RD) override { - Gen->AssignInheritanceModel(RD); - } + if (OptRecordFile) + OptRecordFile->keep(); +} - void HandleVTable(CXXRecordDecl *RD) override { - Gen->HandleVTable(RD); - } +void BackendConsumer::HandleTagDeclDefinition(TagDecl *D) { + PrettyStackTraceDecl CrashInfo(D, SourceLocation(), + Context->getSourceManager(), + "LLVM IR generation of declaration"); + Gen->HandleTagDeclDefinition(D); +} - /// Get the best possible source location to represent a diagnostic that - /// may have associated debug info. - const FullSourceLoc - getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D, - bool &BadDebugInfo, StringRef &Filename, - unsigned &Line, unsigned &Column) const; +void BackendConsumer::HandleTagDeclRequiredDefinition(const TagDecl *D) { + Gen->HandleTagDeclRequiredDefinition(D); +} - std::optional<FullSourceLoc> - getFunctionSourceLocation(const Function &F) const; +void BackendConsumer::CompleteTentativeDefinition(VarDecl *D) { + Gen->CompleteTentativeDefinition(D); +} - void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); - /// Specialized handler for InlineAsm diagnostic. - /// \return True if the diagnostic has been successfully reported, false - /// otherwise. - bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D); - /// Specialized handler for diagnostics reported using SMDiagnostic. - void SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &D); - /// Specialized handler for StackSize diagnostic. - /// \return True if the diagnostic has been successfully reported, false - /// otherwise. - bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); - /// Specialized handler for ResourceLimit diagnostic. - /// \return True if the diagnostic has been successfully reported, false - /// otherwise. - bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D); +void BackendConsumer::CompleteExternalDeclaration(VarDecl *D) { + Gen->CompleteExternalDeclaration(D); +} - /// Specialized handler for unsupported backend feature diagnostic. - void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); - /// Specialized handlers for optimization remarks. - /// Note that these handlers only accept remarks and they always handle - /// them. - void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, - unsigned DiagID); - void - OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D); - void OptimizationRemarkHandler( - const llvm::OptimizationRemarkAnalysisFPCommute &D); - void OptimizationRemarkHandler( - const llvm::OptimizationRemarkAnalysisAliasing &D); - void OptimizationFailureHandler( - const llvm::DiagnosticInfoOptimizationFailure &D); - void DontCallDiagHandler(const DiagnosticInfoDontCall &D); - /// Specialized handler for misexpect warnings. - /// Note that misexpect remarks are emitted through ORE - void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D); - }; +void BackendConsumer::AssignInheritanceModel(CXXRecordDecl *RD) { + Gen->AssignInheritanceModel(RD); +} - void BackendConsumer::anchor() {} +void BackendConsumer::HandleVTable(CXXRecordDecl *RD) { + Gen->HandleVTable(RD); } +void BackendConsumer::anchor() { } + +} // namespace clang + bool ClangDiagnosticHandler::handleDiagnostics(const DiagnosticInfo &DI) { BackendCon->DiagnosticHandlerImpl(DI); return true; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index fab70b66d1d9..2199d7b58fb9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -495,12 +495,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { if (CurFnInfo->getMaxVectorWidth() > LargestVectorWidth) LargestVectorWidth = CurFnInfo->getMaxVectorWidth(); - // Add the required-vector-width attribute. This contains the max width from: + // Add the min-legal-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. // 2. Any builtins used that have a vector width specified. // 3. Values passed in and out of inline assembly. // 4. Width of vector arguments and return types for this function. - // 5. Width of vector aguments and return types for functions called by this + // 5. Width of vector arguments and return types for functions called by this // function. if (getContext().getTargetInfo().getTriple().isX86()) CurFn->addFnAttr("min-legal-vector-width", @@ -572,11 +572,11 @@ llvm::ConstantInt * CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { // Remove any (C++17) exception specifications, to allow calling e.g. a // noexcept function through a non-noexcept pointer. - if (!isa<FunctionNoProtoType>(Ty)) + if (!Ty->isFunctionNoProtoType()) Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None); std::string Mangled; llvm::raw_string_ostream Out(Mangled); - CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out, false); + CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out, false); return llvm::ConstantInt::get( CGM.Int32Ty, static_cast<uint32_t>(llvm::xxh3_64bits(Mangled))); } @@ -683,6 +683,19 @@ static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { return true; } +bool CodeGenFunction::isInAllocaArgument(CGCXXABI &ABI, QualType Ty) { + const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory; +} + +bool CodeGenFunction::hasInAllocaArg(const CXXMethodDecl *MD) { + return getTarget().getTriple().getArch() == llvm::Triple::x86 && + getTarget().getCXXABI().isMicrosoft() && + llvm::any_of(MD->parameters(), [&](ParmVarDecl *P) { + return isInAllocaArgument(CGM.getCXXABI(), P->getType()); + }); +} + /// Return the UBSan prologue signature for \p FD if one is available. static llvm::Constant *getPrologueSignature(CodeGenModule &CGM, const FunctionDecl *FD) { @@ -1108,11 +1121,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Address(&*AI, ConvertType(RetTy), CurFnInfo->getReturnInfo().getIndirectAlign(), KnownNonNull); if (!CurFnInfo->getReturnInfo().getIndirectByVal()) { - ReturnValuePointer = - CreateDefaultAlignTempAlloca(Int8PtrTy, "result.ptr"); - Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast( - ReturnValue.getPointer(), Int8PtrTy), - ReturnValuePointer); + ReturnValuePointer = CreateDefaultAlignTempAlloca( + ReturnValue.getPointer()->getType(), "result.ptr"); + Builder.CreateStore(ReturnValue.getPointer(), ReturnValuePointer); } } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca && !hasScalarEvaluationKind(CurFnInfo->getReturnType())) { @@ -1154,12 +1165,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, EmitFunctionProlog(*CurFnInfo, CurFn, Args); - if (isa_and_nonnull<CXXMethodDecl>(D) && - cast<CXXMethodDecl>(D)->isInstance()) { - CGM.getCXXABI().EmitInstanceFunctionProlog(*this); - const CXXMethodDecl *MD = cast<CXXMethodDecl>(D); - if (MD->getParent()->isLambda() && - MD->getOverloadedOperator() == OO_Call) { + if (const CXXMethodDecl *MD = dyn_cast_if_present<CXXMethodDecl>(D); + MD && !MD->isStatic()) { + bool IsInLambda = + MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call; + if (MD->isImplicitObjectMemberFunction()) + CGM.getCXXABI().EmitInstanceFunctionProlog(*this); + if (IsInLambda) { // We're in a lambda; figure out the captures. MD->getParent()->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField); @@ -1189,7 +1201,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, VLASizeMap[VAT->getSizeExpr()] = ExprArg; } } - } else { + } else if (MD->isImplicitObjectMemberFunction()) { // Not in a lambda; just use 'this' from the method. // FIXME: Should we generate a new load for each use of 'this'? The // fast register allocator would be happier... @@ -1202,11 +1214,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, SkippedChecks.set(SanitizerKind::ObjectSize, true); QualType ThisTy = MD->getThisType(); - // If this is the call operator of a lambda with no capture-default, it + // If this is the call operator of a lambda with no captures, it // may have a static invoker function, which may call this operator with // a null 'this' pointer. - if (isLambdaCallOperator(MD) && - MD->getParent()->getLambdaCaptureDefault() == LCD_None) + if (isLambdaCallOperator(MD) && MD->getParent()->isCapturelessLambda()) SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck( @@ -1249,11 +1260,6 @@ void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { EmitCompoundStmtWithoutScope(*S); else EmitStmt(Body); - - // This is checked after emitting the function body so we know if there - // are any permitted infinite loops. - if (checkIfFunctionMustProgress()) - CurFn->addFnAttr(llvm::Attribute::MustProgress); } /// When instrumenting to collect profile data, the counts for some blocks @@ -1300,7 +1306,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, QualType ResTy = FD->getReturnType(); const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD); - if (MD && MD->isInstance()) { + if (MD && MD->isImplicitObjectMemberFunction()) { if (CGM.getCXXABI().HasThisReturn(GD)) ResTy = MD->getThisType(); else if (CGM.getCXXABI().hasMostDerivedReturn(GD)) @@ -1325,7 +1331,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, auto *Implicit = ImplicitParamDecl::Create( getContext(), Param->getDeclContext(), Param->getLocation(), - /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other); + /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamKind::Other); SizeArguments[Param] = Implicit; Args.push_back(Implicit); } @@ -1432,6 +1438,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, if (Body && isa_and_nonnull<CoroutineBodyStmt>(Body)) llvm::append_range(FnArgs, FD->parameters()); + // Ensure that the function adheres to the forward progress guarantee, which + // is required by certain optimizations. + if (checkIfFunctionMustProgress()) + CurFn->addFnAttr(llvm::Attribute::MustProgress); + // Generate the body of the function. PGO.assignRegionCounters(GD, CurFn); if (isa<CXXDestructorDecl>(FD)) @@ -1447,6 +1458,17 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // The lambda static invoker function is special, because it forwards or // clones the body of the function call operator (but is actually static). EmitLambdaStaticInvokeBody(cast<CXXMethodDecl>(FD)); + } else if (isa<CXXMethodDecl>(FD) && + isLambdaCallOperator(cast<CXXMethodDecl>(FD)) && + !FnInfo.isDelegateCall() && + cast<CXXMethodDecl>(FD)->getParent()->getLambdaStaticInvoker() && + hasInAllocaArg(cast<CXXMethodDecl>(FD))) { + // If emitting a lambda with static invoker on X86 Windows, change + // the call operator body. + // Make sure that this is a call operator with an inalloca arg and check + // for delegate call to make sure this is the original call op and not the + // new forwarding function for the static invoker. + EmitLambdaInAllocaCallOpBody(cast<CXXMethodDecl>(FD)); } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) && (cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() || cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator())) { @@ -2025,8 +2047,7 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { NullConstant, Twine()); CharUnits NullAlign = DestPtr.getAlignment(); NullVariable->setAlignment(NullAlign.getAsAlign()); - Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()), - Builder.getInt8Ty(), NullAlign); + Address SrcPtr(NullVariable, Builder.getInt8Ty(), NullAlign); if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal); @@ -2465,10 +2486,8 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn, const AnnotateAttr *Attr) { SmallVector<llvm::Value *, 5> Args = { AnnotatedVal, - Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr), - ConstGlobalsPtrTy), - Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location), - ConstGlobalsPtrTy), + CGM.EmitAnnotationString(AnnotationStr), + CGM.EmitAnnotationUnit(Location), CGM.EmitAnnotationLineNo(Location), }; if (Attr) @@ -2478,15 +2497,10 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn, void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) { assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute"); - // FIXME We create a new bitcast for every annotation because that's what - // llvm-gcc was doing. - unsigned AS = V->getType()->getPointerAddressSpace(); - llvm::Type *I8PtrTy = Builder.getInt8PtrTy(AS); for (const auto *I : D->specific_attrs<AnnotateAttr>()) EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation, - {I8PtrTy, CGM.ConstGlobalsPtrTy}), - Builder.CreateBitCast(V, I8PtrTy, V->getName()), - I->getAnnotation(), D->getLocation(), I); + {V->getType(), CGM.ConstGlobalsPtrTy}), + V, I->getAnnotation(), D->getLocation(), I); } Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, @@ -2571,10 +2585,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, std::string MissingFeature; llvm::StringMap<bool> CallerFeatureMap; CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); + // When compiling in HipStdPar mode we have to be conservative in rejecting + // target specific features in the FE, and defer the possible error to the + // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is + // referenced by an accelerator executable function, we emit an error. + bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice; if (BuiltinID) { StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures( - FeatureList, CallerFeatureMap)) { + FeatureList, CallerFeatureMap) && !IsHipStdPar) { CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << FeatureList; @@ -2607,7 +2626,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, return false; } return true; - })) + }) && !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) { @@ -2616,7 +2635,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, for (const auto &F : CalleeFeatureMap) { if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) || - !CallerFeatureMap.find(F.getKey())->getValue())) + !CallerFeatureMap.find(F.getKey())->getValue()) && + !IsHipStdPar) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey(); } @@ -2658,8 +2678,15 @@ llvm::Value *CodeGenFunction::FormX86ResolverCondition( const MultiVersionResolverOption &RO) { llvm::Value *Condition = nullptr; - if (!RO.Conditions.Architecture.empty()) - Condition = EmitX86CpuIs(RO.Conditions.Architecture); + if (!RO.Conditions.Architecture.empty()) { + StringRef Arch = RO.Conditions.Architecture; + // If arch= specifies an x86-64 micro-architecture level, test the feature + // with __builtin_cpu_supports, otherwise use __builtin_cpu_is. + if (Arch.starts_with("x86-64")) + Condition = EmitX86CpuSupports({Arch}); + else + Condition = EmitX86CpuIs(Arch); + } if (!RO.Conditions.Features.empty()) { llvm::Value *FeatureCond = EmitX86CpuSupports(RO.Conditions.Features); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 409f48a04906..618e78809db4 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1250,11 +1250,11 @@ public: /// destroyed by aggressive peephole optimizations that assume that /// all uses of a value have been realized in the IR. class PeepholeProtection { - llvm::Instruction *Inst; + llvm::Instruction *Inst = nullptr; friend class CodeGenFunction; public: - PeepholeProtection() : Inst(nullptr) {} + PeepholeProtection() = default; }; /// A non-RAII class containing all the information about a bound @@ -1963,6 +1963,9 @@ private: /// Check if the return value of this function requires sanitization. bool requiresReturnValueCheck() const; + bool isInAllocaArgument(CGCXXABI &ABI, QualType Ty); + bool hasInAllocaArg(const CXXMethodDecl *MD); + llvm::BasicBlock *TerminateLandingPad = nullptr; llvm::BasicBlock *TerminateHandler = nullptr; llvm::SmallVector<llvm::BasicBlock *, 2> TrapBBs; @@ -2227,10 +2230,17 @@ public: void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S); void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, - CallArgList &CallArgs); + CallArgList &CallArgs, + const CGFunctionInfo *CallOpFnInfo = nullptr, + llvm::Constant *CallOpFn = nullptr); void EmitLambdaBlockInvokeBody(); - void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); + void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD, + CallArgList &CallArgs); + void EmitLambdaInAllocaImplFn(const CXXMethodDecl *CallOp, + const CGFunctionInfo **ImplFnInfo, + llvm::Function **ImplFn); + void EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD); void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) { EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); } @@ -3012,6 +3022,19 @@ public: void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed); + // Find a struct's flexible array member. It may be embedded inside multiple + // sub-structs, but must still be the last field. + const ValueDecl *FindFlexibleArrayMemberField(ASTContext &Ctx, + const RecordDecl *RD); + + /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns + /// \p nullptr if either the attribute or the field doesn't exist. + const ValueDecl *FindCountedByField(const Expr *Base); + + /// Build an expression accessing the "counted_by" field. + const Expr *BuildCountedByFieldExpr(const Expr *Base, + const ValueDecl *CountedByVD); + llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, @@ -4007,6 +4030,8 @@ public: const ObjCIvarDecl *Ivar); LValue EmitLValueForField(LValue Base, const FieldDecl* Field); LValue EmitLValueForLambdaField(const FieldDecl *Field); + LValue EmitLValueForLambdaField(const FieldDecl *Field, + llvm::Value *ThisValue); /// EmitLValueForFieldInitialization - Like EmitLValueForField, except that /// if the Field is a reference, this will return the address of the reference @@ -4262,7 +4287,6 @@ public: llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); - llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); @@ -4275,20 +4299,31 @@ public: llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); + /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider + /// vector. It extracts the scalable vector from the struct and inserts into + /// the wider vector. This avoids the error when allocating space in llvm + /// for struct of scalable vectors if a function returns struct. + llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); + llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); - llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, + llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags, + llvm::Value *EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags, + llvm::Value *EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); - llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags, + llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); + + void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl<llvm::Value *> &Ops, + SVETypeFlags TypeFlags); + llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, @@ -4299,6 +4334,8 @@ public: llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, + const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, @@ -4306,7 +4343,6 @@ public: llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); - llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); @@ -4500,6 +4536,11 @@ public: void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn, llvm::Constant *addr); + /// Registers the dtor using 'llvm.global_dtors' for platforms that do not + /// support an 'atexit()' function. + void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn, + llvm::Constant *addr); + /// Call atexit() with function dtorStub. void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub); @@ -4789,6 +4830,9 @@ private: llvm::Value *EmittedE, bool IsDynamic); + llvm::Value *emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType); + void emitZeroOrPatternForAutoVarInit(QualType type, const VarDecl &D, Address Loc); @@ -4888,7 +4932,7 @@ private: llvm::Value *EmitX86CpuIs(StringRef CPUStr); llvm::Value *EmitX86CpuSupports(const CallExpr *E); llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); - llvm::Value *EmitX86CpuSupports(uint64_t Mask); + llvm::Value *EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask); llvm::Value *EmitX86CpuInit(); llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO); llvm::Value *EmitAArch64CpuInit(); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 07a9dec12f6f..b931a81bc008 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -28,6 +28,7 @@ #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -360,13 +361,14 @@ CodeGenModule::CodeGenModule(ASTContext &C, IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); - Int8PtrTy = Int8Ty->getPointerTo(0); - Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); + Int8PtrTy = llvm::PointerType::get(LLVMContext, 0); const llvm::DataLayout &DL = M.getDataLayout(); - AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace()); - GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace()); - ConstGlobalsPtrTy = Int8Ty->getPointerTo( - C.getTargetAddressSpace(GetGlobalConstantAddressSpace())); + AllocaInt8PtrTy = + llvm::PointerType::get(LLVMContext, DL.getAllocaAddrSpace()); + GlobalsInt8PtrTy = + llvm::PointerType::get(LLVMContext, DL.getDefaultGlobalsAddressSpace()); + ConstGlobalsPtrTy = llvm::PointerType::get( + LLVMContext, C.getTargetAddressSpace(GetGlobalConstantAddressSpace())); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); // Build C++20 Module initializers. @@ -563,8 +565,8 @@ static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) { } static bool checkAliasedGlobal( - DiagnosticsEngine &Diags, SourceLocation Location, bool IsIFunc, - const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV, + const ASTContext &Context, DiagnosticsEngine &Diags, SourceLocation Location, + bool IsIFunc, const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV, const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames, SourceRange AliasRange) { GV = getAliasedGlobal(Alias); @@ -573,6 +575,14 @@ static bool checkAliasedGlobal( return false; } + if (GV->hasCommonLinkage()) { + const llvm::Triple &Triple = Context.getTargetInfo().getTriple(); + if (Triple.getObjectFormat() == llvm::Triple::XCOFF) { + Diags.Report(Location, diag::err_alias_to_common); + return false; + } + } + if (GV->isDeclaration()) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; Diags.Report(Location, diag::note_alias_requires_mangled_name) @@ -633,7 +643,7 @@ void CodeGenModule::checkAliases() { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); const llvm::GlobalValue *GV = nullptr; - if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV, + if (!checkAliasedGlobal(getContext(), Diags, Location, IsIFunc, Alias, GV, MangledDeclNames, Range)) { Error = true; continue; @@ -689,6 +699,7 @@ void CodeGenModule::checkAliases() { void CodeGenModule::clear() { DeferredDeclsToEmit.clear(); EmittedDeferredDecls.clear(); + DeferredAnnotations.clear(); if (OpenMPRuntime) OpenMPRuntime->clear(); } @@ -752,6 +763,14 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, } } +static bool isStackProtectorOn(const LangOptions &LangOpts, + const llvm::Triple &Triple, + clang::LangOptions::StackProtectorMode Mode) { + if (Triple.isAMDGPU() || Triple.isNVPTX()) + return false; + return LangOpts.getStackProtector() == Mode; +} + void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) @@ -829,7 +848,7 @@ void CodeGenModule::Release() { // Emit amdgpu_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != - TargetOptions::COV_None) { + llvm::CodeObjectVersionKind::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdgpu_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); @@ -967,6 +986,41 @@ void CodeGenModule::Release() { Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); + if (getTriple().isOSzOS()) { + getModule().addModuleFlag(llvm::Module::Warning, + "zos_product_major_version", + uint32_t(CLANG_VERSION_MAJOR)); + getModule().addModuleFlag(llvm::Module::Warning, + "zos_product_minor_version", + uint32_t(CLANG_VERSION_MINOR)); + getModule().addModuleFlag(llvm::Module::Warning, "zos_product_patchlevel", + uint32_t(CLANG_VERSION_PATCHLEVEL)); + std::string ProductId; +#ifdef CLANG_VENDOR + ProductId = #CLANG_VENDOR; +#else + ProductId = "clang"; +#endif + getModule().addModuleFlag(llvm::Module::Error, "zos_product_id", + llvm::MDString::get(VMContext, ProductId)); + + // Record the language because we need it for the PPA2. + StringRef lang_str = languageToString( + LangStandard::getLangStandardForKind(LangOpts.LangStd).Language); + getModule().addModuleFlag(llvm::Module::Error, "zos_cu_language", + llvm::MDString::get(VMContext, lang_str)); + + time_t TT = PreprocessorOpts.SourceDateEpoch + ? *PreprocessorOpts.SourceDateEpoch + : std::time(nullptr); + getModule().addModuleFlag(llvm::Module::Max, "zos_translation_time", + static_cast<uint64_t>(TT)); + + // Multiple modes will be supported here. + getModule().addModuleFlag(llvm::Module::Error, "zos_le_char_mode", + llvm::MDString::get(VMContext, "ascii")); + } + llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch(); if ( Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb @@ -1067,6 +1121,15 @@ void CodeGenModule::Release() { "sign-return-address-with-bkey", 1); } + if (CodeGenOpts.StackClashProtector) + getModule().addModuleFlag( + llvm::Module::Override, "probe-stack", + llvm::MDString::get(TheModule.getContext(), "inline-asm")); + + if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) + getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", + CodeGenOpts.StackProbeSize); + if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( @@ -1137,6 +1200,12 @@ void CodeGenModule::Release() { if (CM != ~0u) { llvm::CodeModel::Model codeModel = static_cast<llvm::CodeModel::Model>(CM); getModule().setCodeModel(codeModel); + + if (CM == llvm::CodeModel::Medium && + Context.getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) { + getModule().setLargeDataThreshold(getCodeGenOpts().LargeDataThreshold); + } } } @@ -1196,11 +1265,15 @@ void CodeGenModule::Release() { getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); + if (getLangOpts().RegCall4) + getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1); if (getContext().getTargetInfo().getMaxTLSAlign()) getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", getContext().getTargetInfo().getMaxTLSAlign()); + getTargetCodeGenInfo().emitTargetGlobals(*this); + getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); @@ -1371,9 +1444,24 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, } if (!D) return; + // Set visibility for definitions, and for declarations if requested globally // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); + + // OpenMP declare target variables must be visible to the host so they can + // be registered. We require protected visibility unless the variable has + // the DT_nohost modifier and does not need to be registered. + if (Context.getLangOpts().OpenMP && + Context.getLangOpts().OpenMPIsTargetDevice && isa<VarDecl>(D) && + D->hasAttr<OMPDeclareTargetDeclAttr>() && + D->getAttr<OMPDeclareTargetDeclAttr>()->getDevType() != + OMPDeclareTargetDeclAttr::DT_NoHost && + LV.getVisibility() == HiddenVisibility) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + return; + } + if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) { // Reject incompatible dlllstorage and visibility annotations. if (!LV.isVisibilityExplicit()) @@ -1407,6 +1495,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, return false; const llvm::Triple &TT = CGM.getTriple(); + const auto &CGOpts = CGM.getCodeGenOpts(); if (TT.isWindowsGNUEnvironment()) { // In MinGW, variables without DLLImport can still be automatically // imported from a DLL by the linker; don't mark variables that @@ -1417,7 +1506,8 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, // such variables can't be marked as DSO local. (Native TLS variables // can't be dllimported at all, though.) if (GV->isDeclarationForLinker() && isa<llvm::GlobalVariable>(GV) && - (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS)) + (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS) && + CGOpts.AutoImport) return false; } @@ -1440,7 +1530,6 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, return false; // If this is not an executable, don't assume anything is local. - const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); if (RM != llvm::Reloc::Static && !LOpts.PIE) { @@ -1707,7 +1796,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, if (FD && FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) { - Out << "__regcall3__" << II->getName(); + if (CGM.getLangOpts().RegCall4) + Out << "__regcall4__" << II->getName(); + else + Out << "__regcall3__" << II->getName(); } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); @@ -1936,9 +2028,9 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { for (const auto &I : Fns) { auto ctor = ctors.beginStruct(CtorStructTy); ctor.addInt(Int32Ty, I.Priority); - ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy)); + ctor.add(I.Initializer); if (I.AssociatedData) - ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)); + ctor.add(I.AssociatedData); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); @@ -1965,16 +2057,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(D)) return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType()); - if (isa<CXXConstructorDecl>(D) && - cast<CXXConstructorDecl>(D)->isInheritingConstructor() && - Context.getTargetInfo().getCXXABI().isMicrosoft()) { - // Our approach to inheriting constructors is fundamentally different from - // that used by the MS ABI, so keep our inheriting constructor thunks - // internal rather than trying to pick an unambiguous mangling for them. - return llvm::GlobalValue::InternalLinkage; - } - - return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false); + return getLLVMLinkageForDeclarator(D, Linkage); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { @@ -1992,7 +2075,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) { std::string OutName; llvm::raw_string_ostream Out(OutName); - getCXXABI().getMangleContext().mangleTypeName( + getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) @@ -2232,11 +2315,11 @@ static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM, // Only functions whose address can be taken with a member function pointer // need this sort of type metadata. - return !MD->isStatic() && !MD->isVirtual() && !isa<CXXConstructorDecl>(MD) && - !isa<CXXDestructorDecl>(MD); + return MD->isImplicitObjectMemberFunction() && !MD->isVirtual() && + !isa<CXXConstructorDecl, CXXDestructorDecl>(MD); } -std::vector<const CXXRecordDecl *> +SmallVector<const CXXRecordDecl *, 0> CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { llvm::SetVector<const CXXRecordDecl *> MostBases; @@ -2261,19 +2344,23 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (CodeGenOpts.StackClashProtector) B.addAttribute("probe-stack", "inline-asm"); + if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) + B.addAttribute("stack-probe-size", + std::to_string(CodeGenOpts.StackProbeSize)); + if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); if (D && D->hasAttr<NoStackProtectorAttr>()) ; // Do nothing. else if (D && D->hasAttr<StrictGuardStackCheckAttr>() && - LangOpts.getStackProtector() == LangOptions::SSPOn) + isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtectStrong); - else if (LangOpts.getStackProtector() == LangOptions::SSPOn) + else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtect); - else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) + else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPStrong)) B.addAttribute(llvm::Attribute::StackProtectStrong); - else if (LangOpts.getStackProtector() == LangOptions::SSPReq) + else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPReq)) B.addAttribute(llvm::Attribute::StackProtectReq); if (!D) { @@ -2288,6 +2375,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, return; } + // Handle SME attributes that apply to function definitions, + // rather than to function prototypes. + if (D->hasAttr<ArmLocallyStreamingAttr>()) + B.addAttribute("aarch64_pstate_sm_body"); + + if (D->hasAttr<ArmNewZAAttr>()) + B.addAttribute("aarch64_pstate_za_new"); + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = @@ -2386,7 +2481,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { - if (F->getPointerAlignment(getDataLayout()) < 2 && isa<CXXMethodDecl>(D)) + if (isa<CXXMethodDecl>(D) && F->getPointerAlignment(getDataLayout()) < 2) F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } @@ -2893,6 +2988,9 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { + assert(Primary->isNamedModuleUnit() && + "We should only emit module initializers for named modules."); + // Emit the initializers in the order that sub-modules appear in the // source, first Global Module Fragments, if present. if (auto GMF = Primary->getGlobalModuleFragment()) { @@ -2913,6 +3011,9 @@ void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { // Third any associated with the Privat eMOdule Fragment, if present. if (auto PMF = Primary->getPrivateModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(PMF)) { + // Skip import decls, the inits for those are called explicitly. + if (isa<ImportDecl>(D)) + continue; assert(isa<VarDecl>(D) && "PMF initializer decl is not a var?"); EmitTopLevelDecl(D); } @@ -3078,6 +3179,13 @@ void CodeGenModule::EmitVTablesOpportunistically() { } void CodeGenModule::EmitGlobalAnnotations() { + for (const auto& [MangledName, VD] : DeferredAnnotations) { + llvm::GlobalValue *GV = GetGlobalValue(MangledName); + if (GV) + AddGlobalAnnotations(VD, GV); + } + DeferredAnnotations.clear(); + if (Annotations.empty()) return; @@ -3150,10 +3258,9 @@ llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ".args"); GV->setSection(AnnotationSection); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, GlobalsInt8PtrTy); - Lookup = Bitcasted; - return Bitcasted; + Lookup = GV; + return GV; } llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, @@ -3169,17 +3276,14 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, if (GV->getAddressSpace() != getDataLayout().getDefaultGlobalsAddressSpace()) { GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast( - GV, GV->getValueType()->getPointerTo( - getDataLayout().getDefaultGlobalsAddressSpace())); + GV, + llvm::PointerType::get( + GV->getContext(), getDataLayout().getDefaultGlobalsAddressSpace())); } // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[] = { - llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, ConstGlobalsPtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, ConstGlobalsPtrTy), - LineNoCst, - Args, + GVInGlobalsAS, AnnoGV, UnitGV, LineNoCst, Args, }; return llvm::ConstantStruct::getAnon(Fields); } @@ -3200,7 +3304,7 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn, return true; // NoSanitize by location. Check "mainfile" prefix. auto &SM = Context.getSourceManager(); - const FileEntry &MainFile = *SM.getFileEntryForID(SM.getMainFileID()); + FileEntryRef MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); if (NoSanitizeL.containsMainFile(Kind, MainFile.getName())) return true; @@ -3221,7 +3325,8 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, return true; auto &SM = Context.getSourceManager(); if (NoSanitizeL.containsMainFile( - Kind, SM.getFileEntryForID(SM.getMainFileID())->getName(), Category)) + Kind, SM.getFileEntryRefForID(SM.getMainFileID())->getName(), + Category)) return true; if (NoSanitizeL.containsLocation(Kind, Loc, Category)) return true; @@ -3287,7 +3392,7 @@ CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); - if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) + if (auto MainFile = SM.getFileEntryRefForID(SM.getMainFileID())) if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind)) return *V; return ProfileList.getDefault(Kind); @@ -3364,7 +3469,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) && - !isTypeConstant(Global->getType(), false, false) && + !Global->getType().isConstantStorage(getContext(), false, false) && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; @@ -3419,9 +3524,7 @@ ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { } llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType()); - llvm::Constant *Addr = llvm::ConstantExpr::getBitCast( - GV, Ty->getPointerTo(GV->getAddressSpace())); - return ConstantAddress(Addr, Ty, Alignment); + return ConstantAddress(GV, Ty, Alignment); } ConstantAddress CodeGenModule::GetAddrOfUnnamedGlobalConstantDecl( @@ -3483,7 +3586,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); - return ConstantAddress(GV, GV->getValueType(), Alignment); + return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { @@ -3495,11 +3598,8 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); - if (Entry) { - unsigned AS = getTypes().getTargetAddressSpace(VD->getType()); - auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); - return ConstantAddress(Ptr, DeclTy, Alignment); - } + if (Entry) + return ConstantAddress(Entry, DeclTy, Alignment); llvm::Constant *Aliasee; if (isa<llvm::FunctionType>(DeclTy)) @@ -3517,6 +3617,14 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { return ConstantAddress(Aliasee, DeclTy, Alignment); } +template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) { + if (!D) + return false; + if (auto *A = D->getAttr<AttrT>()) + return A->isImplicit(); + return D->isImplicit(); +} + void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast<ValueDecl>(GD.getDecl()); @@ -3538,14 +3646,24 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return emitCPUDispatchDefinition(GD); // If this is CUDA, be selective about which declarations we emit. + // Non-constexpr non-lambda implicit host device functions are not emitted + // unless they are used on device side. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { - if (!Global->hasAttr<CUDADeviceAttr>() && + const auto *FD = dyn_cast<FunctionDecl>(Global); + if ((!Global->hasAttr<CUDADeviceAttr>() || + (LangOpts.OffloadImplicitHostDeviceTemplates && FD && + hasImplicitAttr<CUDAHostAttr>(FD) && + hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() && + !isLambdaCallOperator(FD) && + !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) && !Global->hasAttr<CUDAGlobalAttr>() && !Global->hasAttr<CUDAConstantAttr>() && !Global->hasAttr<CUDASharedAttr>() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType()) + !Global->getType()->isCUDADeviceBuiltinTextureType() && + !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) && + !Global->hasAttr<CUDAHostAttr>())) return; } else { // We need to emit host-side 'shadows' for all global @@ -3581,6 +3699,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { + // Update deferred annotations with the latest declaration if the function + // function was already used or defined. + if (FD->hasAttr<AnnotateAttr>()) { + StringRef MangledName = getMangledName(GD); + if (GetGlobalValue(MangledName)) + DeferredAnnotations[MangledName] = FD; + } + // Forward declarations are emitted lazily on first use. if (!FD->doesThisDeclarationHaveABody()) { if (!FD->doesDeclarationForceExternallyVisibleDefinition()) @@ -3605,6 +3731,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Emit declaration of the must-be-emitted declare target variable. if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { + + // If this variable has external storage and doesn't require special + // link handling we defer to its canonical definition. + if (VD->hasExternalStorage() && + Res != OMPDeclareTargetDeclAttr::MT_Link) + return; + bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); if ((*Res == OMPDeclareTargetDeclAttr::MT_To || @@ -3638,6 +3771,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) { // Emit the definition if it can't be deferred. EmitGlobalDefinition(GD); + addEmittedDeferredDecl(GD); return; } @@ -3657,7 +3791,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); - EmittedDeferredDecls[MangledName] = GD; } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into @@ -3798,10 +3931,22 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; + const auto *F = cast<FunctionDecl>(GD.getDecl()); if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>()) return false; + // We don't import function bodies from other named module units since that + // behavior may break ABI compatibility of the current unit. + if (const Module *M = F->getOwningModule(); + M && M->getTopLevelModule()->isNamedModule() && + getContext().getCurrentNamedModule() != M->getTopLevelModule() && + !F->hasAttr<AlwaysInlineAttr>()) + return false; + + if (F->hasAttr<NoInlineAttr>()) + return false; + if (F->hasAttr<DLLImportAttr>() && !F->hasAttr<AlwaysInlineAttr>()) { // Check whether it would be safe to inline this dllimport function. DLLImportFunctionVisitor Visitor; @@ -3936,7 +4081,7 @@ TargetMVPriority(const TargetInfo &TI, llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) { const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (FD->getFormalLinkage() == InternalLinkage) + if (FD->getFormalLinkage() == Linkage::Internal) return llvm::GlobalValue::InternalLinkage; return llvm::GlobalValue::WeakODRLinkage; } @@ -4033,13 +4178,34 @@ void CodeGenModule::emitMultiVersionFunctions() { } llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD); - if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) + if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) { ResolverConstant = IFunc->getResolver(); + // In Aarch64, default versions of multiversioned functions are mangled to + // their 'normal' assembly name. This deviates from other targets which + // append a '.default' string. As a result we need to continue appending + // .ifunc in Aarch64. + // FIXME: Should Aarch64 mangling for 'default' multiversion function and + // in turn ifunc function match that of other targets? + if (FD->isTargetClonesMultiVersion() && + !getTarget().getTriple().isAArch64()) { + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); + llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); + std::string MangledName = getMangledNameImpl( + *this, GD, FD, /*OmitMultiVersionMangling=*/true); + // In prior versions of Clang, the mangling for ifuncs incorrectly + // included an .ifunc suffix. This alias is generated for backward + // compatibility. It is deprecated, and may be removed in the future. + auto *Alias = llvm::GlobalAlias::create( + DeclTy, 0, getMultiversionLinkage(*this, GD), + MangledName + ".ifunc", IFunc, &getModule()); + SetCommonAttributes(FD, Alias); + } + } llvm::Function *ResolverFunc = cast<llvm::Function>(ResolverConstant); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); - if (supportsCOMDAT()) + if (!ResolverFunc->hasLocalLinkage() && supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); @@ -4148,8 +4314,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { // always run on at least a 'pentium'). We do this by deleting the 'least // advanced' (read, lowest mangling letter). while (Options.size() > 1 && - llvm::X86::getCpuSupportsMask( - (Options.end() - 2)->Conditions.Features) == 0) { + llvm::all_of(llvm::X86::getCpuSupportsMask( + (Options.end() - 2)->Conditions.Features), + [](auto X) { return X == 0; })) { StringRef LHSName = (Options.end() - 2)->Function->getName(); StringRef RHSName = (Options.end() - 1)->Function->getName(); if (LHSName.compare(RHSName) < 0) @@ -4200,10 +4367,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { // Holds the name of the resolver, in ifunc mode this is the ifunc (which has // a separate resolver). std::string ResolverName = MangledName; - if (getTarget().supportsIFunc()) - ResolverName += ".ifunc"; - else if (FD->isTargetMultiVersion()) + if (getTarget().supportsIFunc()) { + // In Aarch64, default versions of multiversioned functions are mangled to + // their 'normal' assembly name. This deviates from other targets which + // append a '.default' string. As a result we need to continue appending + // .ifunc in Aarch64. + // FIXME: Should Aarch64 mangling for 'default' multiversion function and + // in turn ifunc function match that of other targets? + if (!FD->isTargetClonesMultiVersion() || + getTarget().getTriple().isAArch64()) + ResolverName += ".ifunc"; + } else if (FD->isTargetMultiVersion()) { ResolverName += ".resolver"; + } // If the resolver has already been created, just return it. if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) @@ -4325,8 +4501,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast( - Entry, Ty->getPointerTo(Entry->getAddressSpace())); + return Entry; } // This function doesn't have a complete type (for example, the return @@ -4346,6 +4521,11 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( llvm::Function::Create(FTy, llvm::Function::ExternalLinkage, Entry ? StringRef() : MangledName, &getModule()); + // Store the declaration associated with this function so it is potentially + // updated by further declarations or definitions and emitted at the end. + if (D && D->hasAttr<AnnotateAttr>()) + DeferredAnnotations[MangledName] = cast<ValueDecl>(D); + // If we already created a function with the same mangled name (but different // type) before, take its name and add it to the list of functions to be // replaced with F at the end of CodeGen. @@ -4366,9 +4546,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( Entry->removeDeadConstantUsers(); } - llvm::Constant *BC = llvm::ConstantExpr::getBitCast( - F, Entry->getValueType()->getPointerTo(Entry->getAddressSpace())); - addGlobalValReplacement(Entry, BC); + addGlobalValReplacement(Entry, F); } assert(F->getName() == MangledName && "name was uniqued!"); @@ -4397,7 +4575,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); - EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're @@ -4431,8 +4608,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( return F; } - return llvm::ConstantExpr::getBitCast(F, - Ty->getPointerTo(F->getAddressSpace())); + return F; } /// GetAddrOfFunction - Return the address of the given function. If Ty is @@ -4469,7 +4645,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, cast<llvm::Function>(F->stripPointerCasts()), GD); if (IsForDefinition) return F; - return llvm::ConstantExpr::getBitCast(Handle, Ty->getPointerTo()); + return Handle; } return F; } @@ -4478,9 +4654,7 @@ llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) { llvm::GlobalValue *F = cast<llvm::GlobalValue>(GetAddrOfFunction(Decl)->stripPointerCasts()); - return llvm::ConstantExpr::getBitCast( - llvm::NoCFIValue::get(F), - llvm::Type::getInt8PtrTy(VMContext, F->getAddressSpace())); + return llvm::NoCFIValue::get(F); } static const FunctionDecl * @@ -4561,27 +4735,6 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, return {FTy, C}; } -/// isTypeConstant - Determine whether an object of this type can be emitted -/// as a constant. -/// -/// If ExcludeCtor is true, the duration when the object's constructor runs -/// will not be considered. The caller will need to verify that the object is -/// not written to during its construction. ExcludeDtor works similarly. -bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor, - bool ExcludeDtor) { - if (!Ty.isConstant(Context) && !Ty->isReferenceType()) - return false; - - if (Context.getLangOpts().CPlusPlus) { - if (const CXXRecordDecl *Record - = Context.getBaseElementType(Ty)->getAsCXXRecordDecl()) - return ExcludeCtor && !Record->hasMutableFields() && - (Record->hasTrivialDestructor() || ExcludeDtor); - } - - return true; -} - /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type and address /// space. If there is something in the module with the specified name, return @@ -4638,15 +4791,14 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, } // Make sure the result is of the correct type. - if (Entry->getType()->getAddressSpace() != TargetAS) { - return llvm::ConstantExpr::getAddrSpaceCast(Entry, - Ty->getPointerTo(TargetAS)); - } + if (Entry->getType()->getAddressSpace() != TargetAS) + return llvm::ConstantExpr::getAddrSpaceCast( + Entry, llvm::PointerType::get(Ty->getContext(), TargetAS)); // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(TargetAS)); + return Entry; } auto DAddrSpace = GetGlobalVarAddressSpace(D); @@ -4662,9 +4814,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, GV->takeName(Entry); if (!Entry->use_empty()) { - llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, Entry->getType()); - Entry->replaceAllUsesWith(NewPtrForOldDecl); + Entry->replaceAllUsesWith(GV); } Entry->eraseFromParent(); @@ -4678,7 +4828,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); - EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); } @@ -4689,7 +4838,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // FIXME: This code is overly simple and should be merged with other global // handling. - GV->setConstant(isTypeConstant(D->getType(), false, false)); + GV->setConstant(D->getType().isConstantStorage(getContext(), false, false)); GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); @@ -4785,7 +4934,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS); if (DAddrSpace != ExpectedAS) { return getTargetCodeGenInfo().performAddrSpaceCast( - *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(TargetAS)); + *this, GV, DAddrSpace, ExpectedAS, + llvm::PointerType::get(getLLVMContext(), TargetAS)); } return GV; @@ -4843,9 +4993,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( GV->takeName(OldGV); if (!OldGV->use_empty()) { - llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); - OldGV->replaceAllUsesWith(NewPtrForOldDecl); + OldGV->replaceAllUsesWith(GV); } OldGV->eraseFromParent(); @@ -4997,7 +5145,8 @@ castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, if (AS != LangAS::Default) Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( CGM, GV, AS, LangAS::Default, - GV->getValueType()->getPointerTo( + llvm::PointerType::get( + CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::Default))); } return Cast; @@ -5015,7 +5164,7 @@ void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, return; // Must have internal linkage and an ordinary name. - if (!D->getIdentifier() || D->getFormalLinkage() != InternalLinkage) + if (!D->getIdentifier() || D->getFormalLinkage() != Linkage::Internal) return; // Must be in an extern "C" context. Entities declared directly within @@ -5221,8 +5370,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, AddGlobalAnnotations(D, GV); // Set the llvm linkage type as appropriate. - llvm::GlobalValue::LinkageTypes Linkage = - getLLVMLinkageVarDefinition(D, GV->isConstant()); + llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" @@ -5250,7 +5398,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && - isTypeConstant(D->getType(), true, true)); + D->getType().isConstantStorage(getContext(), true, true)); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr<SectionAttr>()) { @@ -5415,8 +5563,9 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, return false; } -llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( - const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) { +llvm::GlobalValue::LinkageTypes +CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D, + GVALinkage Linkage) { if (Linkage == GVA_Internal) return llvm::Function::InternalLinkage; @@ -5486,10 +5635,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( return llvm::GlobalVariable::ExternalLinkage; } -llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition( - const VarDecl *VD, bool IsConstant) { +llvm::GlobalValue::LinkageTypes +CodeGenModule::getLLVMLinkageVarDefinition(const VarDecl *VD) { GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD); - return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant); + return getLLVMLinkageForDeclarator(VD, Linkage); } /// Replace the uses of a function that was declared with a non-proto type. @@ -5663,8 +5812,8 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, AddGlobalCtor(Fn, CA->getPriority()); if (const DestructorAttr *DA = D->getAttr<DestructorAttr>()) AddGlobalDtor(Fn, DA->getPriority(), true); - if (D->hasAttr<AnnotateAttr>()) - AddGlobalAnnotations(D, Fn); + if (getLangOpts().OpenMP && D->hasAttr<OMPDeclareTargetDeclAttr>()) + getOpenMPRuntime().emitDeclareTargetFunction(D, GV); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { @@ -5701,7 +5850,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, /*D=*/nullptr); if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl())) - LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified()); + LT = getLLVMLinkageVarDefinition(VD); else LT = getFunctionLinkage(GD); } @@ -5728,8 +5877,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { // Remove it and replace uses of it with the alias. GA->takeName(Entry); - Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA, - Entry->getType())); + Entry->replaceAllUsesWith(GA); Entry->eraseFromParent(); } else { GA->setName(MangledName); @@ -5807,12 +5955,13 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { // Remove it and replace uses of it with the ifunc. GIF->takeName(Entry); - Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF, - Entry->getType())); + Entry->replaceAllUsesWith(GIF); Entry->eraseFromParent(); } else GIF->setName(MangledName); - + if (auto *F = dyn_cast<llvm::Function>(Resolver)) { + F->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation); + } SetCommonAttributes(GD, GIF); } @@ -5976,7 +6125,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { } // Note: -fwritable-strings doesn't make the backing store strings of - // CFStrings writable. (See <rdar://problem/10657500>) + // CFStrings writable. auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); @@ -6002,9 +6151,6 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm::Constant *Str = llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); - if (isUTF16) - // Cast the UTF16 string to the correct type. - Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy); Fields.add(Str); // String length. @@ -6062,12 +6208,10 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { D->startDefinition(); QualType FieldTypes[] = { - Context.UnsignedLongTy, - Context.getPointerType(Context.getObjCIdType()), - Context.getPointerType(Context.UnsignedLongTy), - Context.getConstantArrayType(Context.UnsignedLongTy, - llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0) - }; + Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()), + Context.getPointerType(Context.UnsignedLongTy), + Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5), + nullptr, ArraySizeModifier::Normal, 0)}; for (size_t i = 0; i < 4; ++i) { FieldDecl *Field = FieldDecl::Create(Context, @@ -6295,7 +6439,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; - if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) { + if (E->getStorageDuration() == SD_Static && VD->evaluateValue()) { // If the initializer of the extending declaration is a constant // initializer, we should have a cached constant initializer for this // temporary. Note that this might have a different value from the value @@ -6310,8 +6454,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( !EvalResult.hasSideEffects()) Value = &EvalResult.Val; - LangAS AddrSpace = - VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); + LangAS AddrSpace = GetGlobalVarAddressSpace(VD); std::optional<ConstantEmitter> emitter; llvm::Constant *InitialValue = nullptr; @@ -6322,8 +6465,9 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( emitter.emplace(*this); InitialValue = emitter->emitForInitializer(*Value, AddrSpace, MaterializedType); - Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/ Value, - /*ExcludeDtor*/ false); + Constant = + MaterializedType.isConstantStorage(getContext(), /*ExcludeCtor*/ Value, + /*ExcludeDtor*/ false); Type = InitialValue->getType(); } else { // No initializer, the initialization will be provided when we @@ -6332,8 +6476,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( } // Create a global variable for this lifetime-extended temporary. - llvm::GlobalValue::LinkageTypes Linkage = - getLLVMLinkageVarDefinition(VD, Constant); + llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD); if (Linkage == llvm::GlobalVariable::ExternalLinkage) { const VarDecl *InitVD; if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && @@ -6368,15 +6511,15 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( if (AddrSpace != LangAS::Default) CV = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AddrSpace, LangAS::Default, - Type->getPointerTo( + llvm::PointerType::get( + getLLVMContext(), getContext().getTargetAddressSpace(LangAS::Default))); // Update the map with the new temporary. If we created a placeholder above, // replace it with the new global now. llvm::Constant *&Entry = MaterializedGlobalTemporaryMap[E]; if (Entry) { - Entry->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(CV, Entry->getType())); + Entry->replaceAllUsesWith(CV); llvm::cast<llvm::GlobalVariable>(Entry)->eraseFromParent(); } Entry = CV; @@ -6446,7 +6589,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, - /*isDefined=*/false, ObjCMethodDecl::Required); + /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(DTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false); D->setHasDestructors(true); @@ -6467,7 +6610,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, - /*isDefined=*/false, ObjCMethodDecl::Required); + /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(CTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true); D->setHasNonZeroConstructors(true); @@ -6475,8 +6618,8 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // EmitLinkageSpec - Emit all declarations in a linkage spec. void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { - if (LSD->getLanguage() != LinkageSpecDecl::lang_c && - LSD->getLanguage() != LinkageSpecDecl::lang_cxx) { + if (LSD->getLanguage() != LinkageSpecLanguageIDs::C && + LSD->getLanguage() != LinkageSpecLanguageIDs::CXX) { ErrorUnsupported(LSD, "linkage spec"); return; } @@ -6856,9 +6999,7 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { SourceManager &SM = getContext().getSourceManager(); if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) break; - auto I = DeferredEmptyCoverageMappingDecls.find(D); - if (I == DeferredEmptyCoverageMappingDecls.end()) - DeferredEmptyCoverageMappingDecls[D] = true; + DeferredEmptyCoverageMappingDecls.try_emplace(D, true); break; } default: @@ -6874,11 +7015,7 @@ void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) { if (Fn->isTemplateInstantiation()) ClearUnusedCoverageMapping(Fn->getTemplateInstantiationPattern()); } - auto I = DeferredEmptyCoverageMappingDecls.find(D); - if (I == DeferredEmptyCoverageMappingDecls.end()) - DeferredEmptyCoverageMappingDecls[D] = false; - else - I->second = false; + DeferredEmptyCoverageMappingDecls.insert_or_assign(D, false); } void CodeGenModule::EmitDeferredUnusedCoverageMappings() { @@ -7194,7 +7331,7 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, if (isExternallyVisible(T->getLinkage())) { std::string OutName; llvm::raw_string_ostream Out(OutName); - getCXXABI().getMangleContext().mangleTypeName( + getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) @@ -7428,7 +7565,7 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, // Get the UniqueID for the file containing the decl. llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false); assert(PLoc.isValid() && "Source location is expected to be valid."); if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) @@ -7448,6 +7585,8 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { assert(NewBuilder->DeferredDecls.empty() && "Newly created module should not have deferred decls"); NewBuilder->DeferredDecls = std::move(DeferredDecls); + assert(EmittedDeferredDecls.empty() && + "Still have (unmerged) EmittedDeferredDecls deferred decls"); assert(NewBuilder->DeferredVTables.empty() && "Newly created module should not have deferred vtables"); @@ -7463,10 +7602,5 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { NewBuilder->TBAA = std::move(TBAA); - assert(NewBuilder->EmittedDeferredDecls.empty() && - "Still have (unmerged) EmittedDeferredDecls deferred decls"); - - NewBuilder->EmittedDeferredDecls = std::move(EmittedDeferredDecls); - NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 05cb217e2bee..ec34680fd3f7 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -26,6 +26,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/NoSanitizeList.h" +#include "clang/Basic/ProfileList.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/XRayLists.h" #include "clang/Lex/PreprocessorOptions.h" @@ -214,16 +215,14 @@ struct ObjCEntrypoints { /// This class records statistics on instrumentation based profiling. class InstrProfStats { - uint32_t VisitedInMainFile; - uint32_t MissingInMainFile; - uint32_t Visited; - uint32_t Missing; - uint32_t Mismatched; + uint32_t VisitedInMainFile = 0; + uint32_t MissingInMainFile = 0; + uint32_t Visited = 0; + uint32_t Missing = 0; + uint32_t Mismatched = 0; public: - InstrProfStats() - : VisitedInMainFile(0), MissingInMainFile(0), Visited(0), Missing(0), - Mismatched(0) {} + InstrProfStats() = default; /// Record that we've visited a function and whether or not that function was /// in the main source file. void addVisited(bool MainFile) { @@ -361,10 +360,19 @@ private: llvm::DenseMap<llvm::StringRef, GlobalDecl> EmittedDeferredDecls; void addEmittedDeferredDecl(GlobalDecl GD) { - if (!llvm::isa<FunctionDecl>(GD.getDecl())) + // Reemission is only needed in incremental mode. + if (!Context.getLangOpts().IncrementalExtensions) return; - llvm::GlobalVariable::LinkageTypes L = getFunctionLinkage(GD); - if (llvm::GlobalValue::isLinkOnceLinkage(L) || + + // Assume a linkage by default that does not need reemission. + auto L = llvm::GlobalValue::ExternalLinkage; + if (llvm::isa<FunctionDecl>(GD.getDecl())) + L = getFunctionLinkage(GD); + else if (auto *VD = llvm::dyn_cast<VarDecl>(GD.getDecl())) + L = getLLVMLinkageVarDefinition(VD); + + if (llvm::GlobalValue::isInternalLinkage(L) || + llvm::GlobalValue::isLinkOnceLinkage(L) || llvm::GlobalValue::isWeakLinkage(L)) { EmittedDeferredDecls[getMangledName(GD)] = GD; } @@ -423,6 +431,10 @@ private: /// Global annotations. std::vector<llvm::Constant*> Annotations; + // Store deferred function annotations so they can be emitted at the end with + // most up to date ValueDecl that will have all the inherited annotations. + llvm::DenseMap<StringRef, const ValueDecl *> DeferredAnnotations; + /// Map used to get unique annotation strings. llvm::StringMap<llvm::Constant*> AnnotationStrings; @@ -814,8 +826,6 @@ public: return getTBAAAccessInfo(AccessType); } - bool isTypeConstant(QualType QTy, bool ExcludeCtor, bool ExcludeDtor); - bool isPaddedAtomicType(QualType type); bool isPaddedAtomicType(const AtomicType *type); @@ -1019,11 +1029,6 @@ public: /// Return a pointer to a constant CFString object for the given string. ConstantAddress GetAddrOfConstantCFString(const StringLiteral *Literal); - /// Return a pointer to a constant NSString object for the given string. Or a - /// user defined String object as defined via - /// -fconstant-string-class=class_name option. - ConstantAddress GetAddrOfConstantString(const StringLiteral *Literal); - /// Return a constant array for the given string. llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E); @@ -1259,26 +1264,11 @@ public: llvm::AttributeList &Attrs, unsigned &CallingConv, bool AttrOnCallSite, bool IsThunk); - /// Adds attributes to F according to our CodeGenOptions and LangOptions, as - /// though we had emitted it ourselves. We remove any attributes on F that - /// conflict with the attributes we add here. - /// - /// This is useful for adding attrs to bitcode modules that you want to link - /// with but don't control, such as CUDA's libdevice. When linking with such - /// a bitcode library, you might want to set e.g. its functions' - /// "unsafe-fp-math" attribute to match the attr of the functions you're - /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of - /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM - /// will propagate unsafe-fp-math=false up to every transitive caller of a - /// function in the bitcode library! - /// - /// With the exception of fast-math attrs, this will only make the attributes - /// on the function more conservative. But it's unsafe to call this on a - /// function which relies on particular fast-math attributes for correctness. - /// It's up to you to ensure that this is safe. - void addDefaultFunctionDefinitionAttributes(llvm::Function &F); - void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, - bool WillInternalize); + /// Adjust Memory attribute to ensure that the BE gets the right attribute + // in order to generate the library call or the intrinsic for the function + // name 'Name'. + void AdjustMemoryAttribute(StringRef Name, CGCalleeInfo CalleeInfo, + llvm::AttributeList &Attrs); /// Like the overload taking a `Function &`, but intended specifically /// for frontends that want to build on Clang's target-configuration logic. @@ -1321,12 +1311,11 @@ public: /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes - getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage, - bool IsConstantVariable); + getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage); /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes - getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant); + getLLVMLinkageVarDefinition(const VarDecl *VD); /// Emit all the global annotations. void EmitGlobalAnnotations(); @@ -1505,7 +1494,7 @@ public: /// /// A most-base class of a class C is defined as a recursive base class of C, /// including C itself, that does not have any bases. - std::vector<const CXXRecordDecl *> + SmallVector<const CXXRecordDecl *, 0> getMostBaseClasses(const CXXRecordDecl *RD); /// Get the declaration of std::terminate for the platform. @@ -1557,6 +1546,41 @@ public: /// because we'll lose all important information after each repl. void moveLazyEmissionStates(CodeGenModule *NewBuilder); + /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F. + /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it, + /// if a valid one was found. + void handleCUDALaunchBoundsAttr(llvm::Function *F, + const CUDALaunchBoundsAttr *A, + int32_t *MaxThreadsVal = nullptr, + int32_t *MinBlocksVal = nullptr, + int32_t *MaxClusterRankVal = nullptr); + + /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute + /// to \p F. Alternatively, the work group size can be taken from a \p + /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is + /// stored in it, if a valid one was found. If \p MaxThreadsVal is not + /// nullptr, the max threads value is stored in it, if a valid one was found. + void handleAMDGPUFlatWorkGroupSizeAttr( + llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, + const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr, + int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr); + + /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F. + void handleAMDGPUWavesPerEUAttr(llvm::Function *F, + const AMDGPUWavesPerEUAttr *A); + + llvm::Constant * + GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace, + const VarDecl *D, + ForDefinition_t IsForDefinition = NotForDefinition); + + // FIXME: Hardcoding priority here is gross. + void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, + unsigned LexOrder = ~0U, + llvm::Constant *AssociatedData = nullptr); + void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, + bool IsDtorAttrFunc = false); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, @@ -1579,11 +1603,6 @@ private: void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD, StringRef &CurName); - llvm::Constant * - GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace, - const VarDecl *D, - ForDefinition_t IsForDefinition = NotForDefinition); - bool GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &AttrBuilder, bool SetTargetFeatures = true); @@ -1633,13 +1652,6 @@ private: void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr, llvm::Function *InitFunc, InitSegAttr *ISA); - // FIXME: Hardcoding priority here is gross. - void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, - unsigned LexOrder = ~0U, - llvm::Constant *AssociatedData = nullptr); - void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, - bool IsDtorAttrFunc = false); - /// EmitCtorList - Generates a global array of functions and priorities using /// the given list and name. This array will have appending linkage and is /// suitable for use as a LLVM constructor or destructor array. Clears Fns. diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index b80317529b72..81bf8ea696b1 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -376,9 +376,9 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { /// BreakContinueStack - Keep counts of breaks and continues inside loops. struct BreakContinue { - uint64_t BreakCount; - uint64_t ContinueCount; - BreakContinue() : BreakCount(0), ContinueCount(0) {} + uint64_t BreakCount = 0; + uint64_t ContinueCount = 0; + BreakContinue() = default; }; SmallVector<BreakContinue, 8> BreakContinueStack; @@ -755,7 +755,8 @@ void PGOHash::combine(HashType Type) { // Pass through MD5 if enough work has built up. if (Count && Count % NumTypesPerWord == 0) { using namespace llvm::support; - uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); + uint64_t Swapped = + endian::byte_swap<uint64_t, llvm::endianness::little>(Working); MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); Working = 0; } @@ -781,7 +782,8 @@ uint64_t PGOHash::finalize() { MD5.update({(uint8_t)Working}); } else { using namespace llvm::support; - uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); + uint64_t Swapped = + endian::byte_swap<uint64_t, llvm::endianness::little>(Working); MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); } } @@ -952,15 +954,12 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, llvm::Value *StepV) { - if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap) - return; - if (!Builder.GetInsertBlock()) + if (!RegionCounterMap || !Builder.GetInsertBlock()) return; unsigned Counter = (*RegionCounterMap)[S]; - auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext()); - llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + llvm::Value *Args[] = {FuncNameVar, Builder.getInt64(FunctionHash), Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV}; @@ -998,7 +997,7 @@ void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, auto BuilderInsertPoint = Builder.saveIP(); Builder.SetInsertPoint(ValueSite); llvm::Value *Args[5] = { - llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()), + FuncNameVar, Builder.getInt64(FunctionHash), Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()), Builder.getInt32(ValueKind), diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 395ed7b1d703..dc288bc3f615 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -196,16 +196,19 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // Enum types are distinct types. In C++ they have "underlying types", // however they aren't related for TBAA. if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) { + if (!Features.CPlusPlus) + return getTypeInfo(ETy->getDecl()->getIntegerType()); + // In C++ mode, types have linkage, so we can rely on the ODR and // on their mangled names, if they're external. // TODO: Is there a way to get a program-wide unique name for a // decl with local linkage or no linkage? - if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible()) + if (!ETy->getDecl()->isExternallyVisible()) return getChar(); SmallString<256> OutName; llvm::raw_svector_ostream Out(OutName); - MContext.mangleTypeName(QualType(ETy, 0), Out); + MContext.mangleCanonicalTypeName(QualType(ETy, 0), Out); return createScalarTypeNode(OutName, getChar(), Size); } @@ -342,7 +345,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { // field. Virtual bases are more complex and omitted, but avoid an // incomplete view for NewStructPathTBAA. if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) - return BaseTypeMetadataCache[Ty] = nullptr; + return nullptr; for (const CXXBaseSpecifier &B : CXXRD->bases()) { if (B.isVirtual()) continue; @@ -354,7 +357,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { ? getBaseTypeInfo(BaseQTy) : getTypeInfo(BaseQTy); if (!TypeNode) - return BaseTypeMetadataCache[Ty] = nullptr; + return nullptr; uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity(); uint64_t Size = Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity(); @@ -378,7 +381,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ? getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); if (!TypeNode) - return BaseTypeMetadataCache[Ty] = nullptr; + return nullptr; uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex()); uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity(); @@ -391,7 +394,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { if (Features.CPlusPlus) { // Don't use the mangler for C code. llvm::raw_svector_ostream Out(OutName); - MContext.mangleTypeName(QualType(Ty, 0), Out); + MContext.mangleCanonicalTypeName(QualType(Ty, 0), Out); } else { OutName = RD->getName(); } @@ -418,14 +421,20 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { return nullptr; const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); - if (llvm::MDNode *N = BaseTypeMetadataCache[Ty]) - return N; - // Note that the following helper call is allowed to add new nodes to the - // cache, which invalidates all its previously obtained iterators. So we - // first generate the node for the type and then add that node to the cache. + // nullptr is a valid value in the cache, so use find rather than [] + auto I = BaseTypeMetadataCache.find(Ty); + if (I != BaseTypeMetadataCache.end()) + return I->second; + + // First calculate the metadata, before recomputing the insertion point, as + // the helper can recursively call us. llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); - return BaseTypeMetadataCache[Ty] = TypeNode; + LLVM_ATTRIBUTE_UNUSED auto inserted = + BaseTypeMetadataCache.insert({Ty, TypeNode}); + assert(inserted.second && "BaseType metadata was already inserted"); + + return TypeNode; } llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h index e848dc3b449c..083d69214fb3 100644 --- a/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/clang/lib/CodeGen/CodeGenTypeCache.h @@ -51,14 +51,11 @@ struct CodeGenTypeCache { llvm::IntegerType *PtrDiffTy; }; - /// void* in address space 0 + /// void*, void** in address space 0 union { + llvm::PointerType *UnqualPtrTy; llvm::PointerType *VoidPtrTy; llvm::PointerType *Int8PtrTy; - }; - - /// void** in address space 0 - union { llvm::PointerType *VoidPtrPtrTy; llvm::PointerType *Int8PtrPtrTy; }; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 30021794a0bb..a6b51bfef876 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -34,6 +34,7 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm) Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()), TheABIInfo(cgm.getTargetCodeGenInfo().getABIInfo()) { SkippedLayout = false; + LongDoubleReferenced = false; } CodeGenTypes::~CodeGenTypes() { @@ -406,10 +407,12 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { Context.getLangOpts().NativeHalfType || !Context.getTargetInfo().useFP16ConversionIntrinsics()); break; + case BuiltinType::LongDouble: + LongDoubleReferenced = true; + LLVM_FALLTHROUGH; case BuiltinType::BFloat16: case BuiltinType::Float: case BuiltinType::Double: - case BuiltinType::LongDouble: case BuiltinType::Float128: case BuiltinType::Ibm128: ResultType = getTypeForFormat(getLLVMContext(), @@ -419,7 +422,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::NullPtr: // Model std::nullptr_t as i8* - ResultType = llvm::Type::getInt8PtrTy(getLLVMContext()); + ResultType = llvm::PointerType::getUnqual(getLLVMContext()); break; case BuiltinType::UInt128: diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 9088f77b95c3..01c0c673795c 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -84,6 +84,9 @@ class CodeGenTypes { /// a recursive struct conversion, set this to true. bool SkippedLayout; + /// True if any instance of long double types are used. + bool LongDoubleReferenced; + /// This map keeps cache of llvm::Types and maps clang::Type to /// corresponding llvm::Type. llvm::DenseMap<const Type *, llvm::Type *> TypeCache; @@ -252,13 +255,11 @@ public: /// this. /// /// \param argTypes - must all actually be canonical as params - const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType, - bool instanceMethod, - bool chainCall, - ArrayRef<CanQualType> argTypes, - FunctionType::ExtInfo info, - ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, - RequiredArgs args); + const CGFunctionInfo &arrangeLLVMFunctionInfo( + CanQualType returnType, FnInfoOpts opts, ArrayRef<CanQualType> argTypes, + FunctionType::ExtInfo info, + ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, + RequiredArgs args); /// Compute a new LLVM record layout object for the given record. std::unique_ptr<CGRecordLayout> ComputeRecordLayout(const RecordDecl *D, @@ -291,6 +292,7 @@ public: // These are internal details of CGT that shouldn't be used externally. /// zero-initialized (in the C++ sense) with an LLVM zeroinitializer. bool isZeroInitializable(const RecordDecl *RD); + bool isLongDoubleReferenced() const { return LongDoubleReferenced; } bool isRecordLayoutComplete(const Type *Ty) const; unsigned getTargetAddressSpace(QualType T) const; }; diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index bb4c6f5e0cde..b16358ee117a 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -322,12 +322,12 @@ public: for (const auto &FL : FileLocs) { SourceLocation Loc = FL.first; FileID SpellingFile = SM.getDecomposedSpellingLoc(Loc).first; - auto Entry = SM.getFileEntryForID(SpellingFile); + auto Entry = SM.getFileEntryRefForID(SpellingFile); if (!Entry) continue; FileIDMapping[SM.getFileID(Loc)] = std::make_pair(Mapping.size(), Loc); - Mapping.push_back(CVM.getFileID(Entry)); + Mapping.push_back(CVM.getFileID(*Entry)); } } @@ -1032,11 +1032,21 @@ struct CounterCoverageMappingBuilder // lexer may not be able to report back precise token end locations for // these children nodes (llvm.org/PR39822), and moreover users will not be // able to see coverage for them. + Counter BodyCounter = getRegionCounter(Body); bool Defaulted = false; if (auto *Method = dyn_cast<CXXMethodDecl>(D)) Defaulted = Method->isDefaulted(); + if (auto *Ctor = dyn_cast<CXXConstructorDecl>(D)) { + for (auto *Initializer : Ctor->inits()) { + if (Initializer->isWritten()) { + auto *Init = Initializer->getInit(); + if (getStart(Init).isValid() && getEnd(Init).isValid()) + propagateCounts(BodyCounter, Init); + } + } + } - propagateCounts(getRegionCounter(Body), Body, + propagateCounts(BodyCounter, Body, /*VisitChildren=*/!Defaulted); assert(RegionStack.empty() && "Regions entered but never exited"); } @@ -1718,13 +1728,11 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord( void CoverageMappingModuleGen::addFunctionMappingRecord( llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, const std::string &CoverageMapping, bool IsUsed) { - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue); FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed}); if (!IsUsed) - FunctionNames.push_back( - llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))); + FunctionNames.push_back(NamePtr); if (CGM.getCodeGenOpts().DumpCoverageMapping) { // Dump the coverage mapping data for this function by decoding the @@ -1740,7 +1748,7 @@ void CoverageMappingModuleGen::addFunctionMappingRecord( FilenameStrs[0] = normalizeFilename(getCurrentDirname()); for (const auto &Entry : FileEntries) { auto I = Entry.second; - FilenameStrs[I] = normalizeFilename(Entry.first->getName()); + FilenameStrs[I] = normalizeFilename(Entry.first.getName()); } ArrayRef<std::string> FilenameRefs = llvm::ArrayRef(FilenameStrs); RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames, @@ -1764,7 +1772,7 @@ void CoverageMappingModuleGen::emit() { FilenameStrs[0] = normalizeFilename(getCurrentDirname()); for (const auto &Entry : FileEntries) { auto I = Entry.second; - FilenameStrs[I] = normalizeFilename(Entry.first->getName()); + FilenameStrs[I] = normalizeFilename(Entry.first.getName()); } std::string Filenames; @@ -1812,7 +1820,7 @@ void CoverageMappingModuleGen::emit() { CGM.addUsedGlobal(CovData); // Create the deferred function records array if (!FunctionNames.empty()) { - auto NamesArrTy = llvm::ArrayType::get(llvm::Type::getInt8PtrTy(Ctx), + auto NamesArrTy = llvm::ArrayType::get(llvm::PointerType::getUnqual(Ctx), FunctionNames.size()); auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames); // This variable will *NOT* be emitted to the object file. It is used @@ -1823,7 +1831,7 @@ void CoverageMappingModuleGen::emit() { } } -unsigned CoverageMappingModuleGen::getFileID(const FileEntry *File) { +unsigned CoverageMappingModuleGen::getFileID(FileEntryRef File) { auto It = FileEntries.find(File); if (It != FileEntries.end()) return It->second; diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index eca68d9abd79..77d7c6cd87cf 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -104,7 +104,7 @@ class CoverageMappingModuleGen { CodeGenModule &CGM; CoverageSourceInfo &SourceInfo; - llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries; + llvm::SmallDenseMap<FileEntryRef, unsigned, 8> FileEntries; std::vector<llvm::Constant *> FunctionNames; std::vector<FunctionInfo> FunctionRecords; @@ -137,7 +137,7 @@ public: /// Return the coverage mapping translation unit file id /// for the given file. - unsigned getFileID(const FileEntry *File); + unsigned getFileID(FileEntryRef File); /// Return an interface into CodeGenModule. CodeGenModule &getCodeGenModule() { return CGM; } diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h index 3c8a51590d1b..0c667e80bb6d 100644 --- a/clang/lib/CodeGen/EHScopeStack.h +++ b/clang/lib/CodeGen/EHScopeStack.h @@ -166,10 +166,10 @@ public: F_IsEHCleanupKind = 0x4, F_HasExitSwitch = 0x8, }; - unsigned flags; + unsigned flags = 0; public: - Flags() : flags(0) {} + Flags() = default; /// isForEH - true if the current emission is for an EH cleanup. bool isForEHCleanup() const { return flags & F_IsForEH; } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 79a926cb9edd..d173806ec8ce 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -647,9 +647,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // Apply the adjustment and cast back to the original struct type // for consistency. llvm::Value *This = ThisAddr.getPointer(); - llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy()); - Ptr = Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Ptr, Adj); - This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted"); + This = Builder.CreateInBoundsGEP(Builder.getInt8Ty(), This, Adj); ThisPtrForCall = This; // Load the function pointer. @@ -740,9 +738,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( ? llvm::Intrinsic::type_test : llvm::Intrinsic::public_type_test; - CheckResult = Builder.CreateCall( - CGM.getIntrinsic(IID), - {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); + CheckResult = + Builder.CreateCall(CGM.getIntrinsic(IID), {VFPAddr, TypeId}); } if (CGM.getItaniumVTableContext().isRelativeLayout()) { @@ -753,9 +750,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( } else { llvm::Value *VFPAddr = CGF.Builder.CreateGEP(CGF.Int8Ty, VTable, VTableOffset); - VirtualFn = CGF.Builder.CreateAlignedLoad( - llvm::PointerType::getUnqual(CGF.getLLVMContext()), VFPAddr, - CGF.getPointerAlign(), "memptr.virtualfn"); + VirtualFn = CGF.Builder.CreateAlignedLoad(CGF.UnqualPtrTy, VFPAddr, + CGF.getPointerAlign(), + "memptr.virtualfn"); } } assert(VirtualFn && "Virtual fuction pointer not created!"); @@ -795,9 +792,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // In the non-virtual path, the function pointer is actually a // function pointer. CGF.EmitBlock(FnNonVirtual); - llvm::Value *NonVirtualFn = Builder.CreateIntToPtr( - FnAsInt, llvm::PointerType::getUnqual(CGF.getLLVMContext()), - "memptr.nonvirtualfn"); + llvm::Value *NonVirtualFn = + Builder.CreateIntToPtr(FnAsInt, CGF.UnqualPtrTy, "memptr.nonvirtualfn"); // Check the function pointer if CFI on member function pointers is enabled. if (ShouldEmitCFICheck) { @@ -812,8 +808,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( }; llvm::Value *Bit = Builder.getFalse(); - llvm::Value *CastedNonVirtualFn = - Builder.CreateBitCast(NonVirtualFn, CGF.Int8PtrTy); for (const CXXRecordDecl *Base : CGM.getMostBaseClasses(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType( getContext().getMemberPointerType( @@ -824,13 +818,13 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *TypeTest = Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test), - {CastedNonVirtualFn, TypeId}); + {NonVirtualFn, TypeId}); Bit = Builder.CreateOr(Bit, TypeTest); } CGF.EmitCheck(std::make_pair(Bit, SanitizerKind::CFIMFCall), SanitizerHandler::CFICheckFail, StaticData, - {CastedNonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)}); + {NonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)}); FnNonVirtual = Builder.GetInsertBlock(); } @@ -838,8 +832,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // We're done. CGF.EmitBlock(FnEnd); - llvm::PHINode *CalleePtr = - Builder.CreatePHI(llvm::PointerType::getUnqual(CGF.getLLVMContext()), 2); + llvm::PHINode *CalleePtr = Builder.CreatePHI(CGF.UnqualPtrTy, 2); CalleePtr->addIncoming(VirtualFn, FnVirtual); CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual); @@ -1243,8 +1236,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // Grab the vtable pointer as an intptr_t*. auto *ClassDecl = cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl()); - llvm::Value *VTable = CGF.GetVTablePtr( - Ptr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl); + llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.UnqualPtrTy, ClassDecl); // Track back to entry -2 and pull out the offset there. llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64( @@ -1253,8 +1245,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, CGF.getPointerAlign()); // Apply the offset. - llvm::Value *CompletePtr = - CGF.Builder.CreateBitCast(Ptr.getPointer(), CGF.Int8PtrTy); + llvm::Value *CompletePtr = Ptr.getPointer(); CompletePtr = CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, CompletePtr, Offset); @@ -1333,7 +1324,6 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)); - Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); } } if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy); @@ -1344,15 +1334,16 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { // void *__dynamic_cast(const void *sub, - // const abi::__class_type_info *src, - // const abi::__class_type_info *dst, + // global_as const abi::__class_type_info *src, + // global_as const abi::__class_type_info *dst, // std::ptrdiff_t src2dst_offset); llvm::Type *Int8PtrTy = CGF.Int8PtrTy; + llvm::Type *GlobInt8PtrTy = CGF.GlobalsInt8PtrTy; llvm::Type *PtrDiffTy = CGF.ConvertType(CGF.getContext().getPointerDiffType()); - llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy }; + llvm::Type *Args[4] = { Int8PtrTy, GlobInt8PtrTy, GlobInt8PtrTy, PtrDiffTy }; llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false); @@ -1449,12 +1440,11 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, llvm::Type *StdTypeInfoPtrTy) { auto *ClassDecl = cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); - llvm::Value *Value = CGF.GetVTablePtr( - ThisPtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl); + llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, CGM.GlobalsInt8PtrTy, + ClassDecl); if (CGM.getItaniumVTableContext().isRelativeLayout()) { // Load the type info. - Value = CGF.Builder.CreateBitCast(Value, CGM.Int8PtrTy); Value = CGF.Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), {Value, llvm::ConstantInt::get(CGM.Int32Ty, -4)}); @@ -1597,9 +1587,8 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *OffsetToTop; if (CGM.getItaniumVTableContext().isRelativeLayout()) { // Get the vtable pointer. - llvm::Value *VTable = CGF.GetVTablePtr( - ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), - ClassDecl); + llvm::Value *VTable = + CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -1611,9 +1600,8 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, CGF.ConvertType(CGF.getContext().getPointerDiffType()); // Get the vtable pointer. - llvm::Value *VTable = CGF.GetVTablePtr( - ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), - ClassDecl); + llvm::Value *VTable = + CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -1729,7 +1717,7 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, QualType T = Context.getPointerType(Q); auto *VTTDecl = ImplicitParamDecl::Create( Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"), - T, ImplicitParamDecl::CXXVTT); + T, ImplicitParamKind::CXXVTT); Params.insert(Params.begin() + 1, VTTDecl); getStructorImplicitParamDecl(CGF) = VTTDecl; } @@ -2211,8 +2199,7 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, NonVirtualAdjustment); } - // Cast back to the original type. - return CGF.Builder.CreateBitCast(ResultPtr, InitialPtr.getType()); + return ResultPtr; } llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF, @@ -2315,8 +2302,8 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, // cookie, otherwise return 0 to avoid an infinite loop calling DTORs. // We can't simply ignore this load using nosanitize metadata because // the metadata may be lost. - llvm::FunctionType *FTy = llvm::FunctionType::get( - CGF.SizeTy, llvm::PointerType::getUnqual(CGF.getLLVMContext()), false); + llvm::FunctionType *FTy = + llvm::FunctionType::get(CGF.SizeTy, CGF.UnqualPtrTy, false); llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie"); return CGF.Builder.CreateCall(F, numElementsPtr.getPointer()); @@ -2659,7 +2646,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // We're assuming that the destructor function is something we can // reasonably call with the default CC. - llvm::Type *dtorTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + llvm::Type *dtorTy = CGF.UnqualPtrTy; // Preserve address space of addr. auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0; @@ -2807,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.isNoDestroy(CGM.getContext())) return; + // OpenMP offloading supports C++ constructors and destructors but we do not + // always have 'atexit' available. Instead lower these to use the LLVM global + // destructors which we can handle directly in the runtime. Note that this is + // not strictly 1-to-1 with using `atexit` because we no longer tear down + // globals in reverse order of when they were constructed. + if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal()) + return CGF.registerGlobalDtorWithLLVM(D, dtor, addr); + // emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit // or __cxa_atexit depending on whether this VarDecl is a thread-local storage // or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled. @@ -2839,7 +2834,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD, static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) { llvm::GlobalValue::LinkageTypes VarLinkage = - CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + CGM.getLLVMLinkageVarDefinition(VD); // For internal linkage variables, we don't need an external or weak wrapper. if (llvm::GlobalValue::isLocalLinkage(VarLinkage)) @@ -3100,9 +3095,6 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CharUnits Align = CGM.getContext().getDeclAlign(VD); Val = Builder.CreateAlignedLoad(Var->getValueType(), Val, Align); } - if (Val->getType() != Wrapper->getReturnType()) - Val = Builder.CreatePointerBitCastOrAddrSpaceCast( - Val, Wrapper->getReturnType(), ""); Builder.CreateRet(Val); } @@ -3676,9 +3668,10 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { // Check if the alias exists. If it doesn't, then get or create the global. if (CGM.getItaniumVTableContext().isRelativeLayout()) VTable = CGM.getModule().getNamedAlias(VTableName); - if (!VTable) - VTable = - CGM.getModule().getOrInsertGlobal(VTableName, CGM.GlobalsInt8PtrTy); + if (!VTable) { + llvm::Type *Ty = llvm::ArrayType::get(CGM.GlobalsInt8PtrTy, 0); + VTable = CGM.getModule().getOrInsertGlobal(VTableName, Ty); + } CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts())); @@ -3718,14 +3711,17 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, return llvm::GlobalValue::InternalLinkage; switch (Ty->getLinkage()) { - case NoLinkage: - case InternalLinkage: - case UniqueExternalLinkage: + case Linkage::Invalid: + llvm_unreachable("Linkage hasn't been computed!"); + + case Linkage::None: + case Linkage::Internal: + case Linkage::UniqueExternal: return llvm::GlobalValue::InternalLinkage; - case VisibleNoLinkage: - case ModuleLinkage: - case ExternalLinkage: + case Linkage::VisibleNone: + case Linkage::Module: + case Linkage::External: // RTTI is not enabled, which means that this type info struct is going // to be used for exception handling. Give it linkonce_odr linkage. if (!CGM.getLangOpts().RTTI) @@ -3950,9 +3946,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( // If there's already an old global variable, replace it with the new one. if (OldGV) { GV->takeName(OldGV); - llvm::Constant *NewPtr = - llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); - OldGV->replaceAllUsesWith(NewPtr); + OldGV->replaceAllUsesWith(GV); OldGV->eraseFromParent(); } @@ -4519,7 +4513,9 @@ namespace { } /// Emits a call to __cxa_begin_catch and enters a cleanup to call -/// __cxa_end_catch. +/// __cxa_end_catch. If -fassume-nothrow-exception-dtor is specified, we assume +/// that the exception object's dtor is nothrow, therefore the __cxa_end_catch +/// call can be marked as nounwind even if EndMightThrow is true. /// /// \param EndMightThrow - true if __cxa_end_catch might throw static llvm::Value *CallBeginCatch(CodeGenFunction &CGF, @@ -4528,7 +4524,9 @@ static llvm::Value *CallBeginCatch(CodeGenFunction &CGF, llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn); - CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow); + CGF.EHStack.pushCleanup<CallEndCatch>( + NormalAndEHCleanup, + EndMightThrow && !CGF.CGM.getLangOpts().AssumeNothrowExceptionDtor); return call; } @@ -4660,8 +4658,7 @@ static void InitCatchParam(CodeGenFunction &CGF, auto catchRD = CatchType->getAsCXXRecordDecl(); CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD); - llvm::Type *PtrTy = - llvm::PointerType::getUnqual(CGF.getLLVMContext()); // addrspace 0 ok + llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok // Check for a copy expression. If we don't have a copy expression, // that means a trivial copy is okay. @@ -4849,8 +4846,7 @@ void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Dtor, llvm::Constant *Addr) { if (D.getTLSKind() != VarDecl::TLS_None) { - llvm::PointerType *PtrTy = - llvm::PointerType::getUnqual(CGF.getLLVMContext()); + llvm::PointerType *PtrTy = CGF.UnqualPtrTy; // extern "C" int __pt_atexit_np(int flags, int(*)(int,...), ...); llvm::FunctionType *AtExitTy = diff --git a/clang/lib/CodeGen/LinkInModulesPass.cpp b/clang/lib/CodeGen/LinkInModulesPass.cpp new file mode 100644 index 000000000000..6ce2b94c1db8 --- /dev/null +++ b/clang/lib/CodeGen/LinkInModulesPass.cpp @@ -0,0 +1,29 @@ +//===-- LinkInModulesPass.cpp - Module Linking pass --------------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// LinkInModulesPass implementation. +/// +//===----------------------------------------------------------------------===// + +#include "LinkInModulesPass.h" +#include "BackendConsumer.h" + +using namespace llvm; + +LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC, + bool ShouldLinkFiles) + : BC(BC), ShouldLinkFiles(ShouldLinkFiles) {} + +PreservedAnalyses LinkInModulesPass::run(Module &M, ModuleAnalysisManager &AM) { + + if (BC && BC->LinkInModules(&M, ShouldLinkFiles)) + report_fatal_error("Bitcode module linking failed, compilation aborted!"); + + return PreservedAnalyses::all(); +} diff --git a/clang/lib/CodeGen/LinkInModulesPass.h b/clang/lib/CodeGen/LinkInModulesPass.h new file mode 100644 index 000000000000..7fe94d625058 --- /dev/null +++ b/clang/lib/CodeGen/LinkInModulesPass.h @@ -0,0 +1,42 @@ +//===-- LinkInModulesPass.h - Module Linking pass ----------------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides a pass to link in Modules from a provided +/// BackendConsumer. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_LINKINMODULESPASS_H +#define LLVM_BITCODE_LINKINMODULESPASS_H + +#include "BackendConsumer.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; +class ModulePass; +class Pass; + +/// Create and return a pass that links in Moduels from a provided +/// BackendConsumer to a given primary Module. Note that this pass is designed +/// for use with the legacy pass manager. +class LinkInModulesPass : public PassInfoMixin<LinkInModulesPass> { + clang::BackendConsumer *BC; + bool ShouldLinkFiles; + +public: + LinkInModulesPass(clang::BackendConsumer *BC, bool ShouldLinkFiles = true); + + PreservedAnalyses run(Module &M, AnalysisManager<Module> &); + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index a692abaf3b75..172c4c937b97 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -828,9 +828,9 @@ private: /// Info on the global variable used to guard initialization of static locals. /// The BitIndex field is only used for externally invisible declarations. struct GuardInfo { - GuardInfo() : Guard(nullptr), BitIndex(0) {} - llvm::GlobalVariable *Guard; - unsigned BitIndex; + GuardInfo() = default; + llvm::GlobalVariable *Guard = nullptr; + unsigned BitIndex = 0; }; /// Map from DeclContext to the current guard variable. We assume that the @@ -1235,7 +1235,6 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers( const VBOffsets &VBaseMap = Layout.getVBaseOffsetsMap(); CGBuilderTy &Builder = CGF.Builder; - unsigned AS = getThisAddress(CGF).getAddressSpace(); llvm::Value *Int8This = nullptr; // Initialize lazily. for (const CXXBaseSpecifier &S : RD->vbases()) { @@ -1256,14 +1255,12 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers( VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty); if (!Int8This) - Int8This = Builder.CreateBitCast(getThisValue(CGF), - CGF.Int8Ty->getPointerTo(AS)); + Int8This = getThisValue(CGF); + llvm::Value *VtorDispPtr = Builder.CreateInBoundsGEP(CGF.Int8Ty, Int8This, VBaseOffset); // vtorDisp is always the 32-bits before the vbase in the class layout. VtorDispPtr = Builder.CreateConstGEP1_32(CGF.Int8Ty, VtorDispPtr, -4); - VtorDispPtr = Builder.CreateBitCast( - VtorDispPtr, CGF.Int32Ty->getPointerTo(AS), "vtordisp.ptr"); Builder.CreateAlignedStore(VtorDispValue, VtorDispPtr, CharUnits::fromQuantity(4)); @@ -1379,8 +1376,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( case Dtor_Base: // The base destructor most closely tracks the user-declared constructor, so // we delegate back to the normal declarator case. - return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*IsConstantVariable=*/false); + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage); case Dtor_Complete: // The complete destructor is like an inline function, but it may be // imported and therefore must be exported as well. This requires changing @@ -1528,7 +1524,7 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, auto *IsMostDerived = ImplicitParamDecl::Create( Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), &Context.Idents.get("is_most_derived"), Context.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); // The 'most_derived' parameter goes second if the ctor is variadic and last // if it's not. Dtors can't be variadic. const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); @@ -1541,7 +1537,7 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, auto *ShouldDelete = ImplicitParamDecl::Create( Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), &Context.Idents.get("should_call_delete"), Context.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Params.push_back(ShouldDelete); getStructorImplicitParamDecl(CGF) = ShouldDelete; } @@ -1569,14 +1565,9 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { if (!CGF.CurFuncIsThunk && MD->isVirtual()) { CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(CGF.CurGD); if (!Adjustment.isZero()) { - unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace(); - llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS), - *thisTy = This->getType(); - This = CGF.Builder.CreateBitCast(This, charPtrTy); assert(Adjustment.isPositive()); This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This, -Adjustment.getQuantity()); - This = CGF.Builder.CreateBitCast(This, thisTy, "this.adjusted"); } } setCXXABIThisValue(CGF, This); @@ -1682,7 +1673,11 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD, llvm::GlobalVariable *VTable) { - if (!CGM.getCodeGenOpts().LTOUnit) + // Emit type metadata on vtables with LTO or IR instrumentation. + // In IR instrumentation, the type metadata could be used to find out vtable + // definitions (for type profiling) among all global variables. + if (!CGM.getCodeGenOpts().LTOUnit && + !CGM.getCodeGenOpts().hasProfileIRInstr()) return; // TODO: Should VirtualFunctionElimination also be supported here? @@ -1897,9 +1892,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, llvm::Comdat *C = nullptr; if (!VFTableComesFromAnotherTU && - (llvm::GlobalValue::isWeakForLinker(VFTableLinkage) || - (llvm::GlobalValue::isLocalLinkage(VFTableLinkage) && - VTableAliasIsRequred))) + llvm::GlobalValue::isWeakForLinker(VFTableLinkage)) C = CGM.getModule().getOrInsertComdat(VFTableName.str()); // Only insert a pointer into the VFTable for RTTI data if we are not @@ -2273,7 +2266,6 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret, if (RA.isEmpty()) return Ret.getPointer(); - auto OrigTy = Ret.getType(); Ret = Ret.withElementType(CGF.Int8Ty); llvm::Value *V = Ret.getPointer(); @@ -2290,8 +2282,7 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret, if (RA.NonVirtual) V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual); - // Cast back to the original type. - return CGF.Builder.CreateBitCast(V, OrigTy); + return V; } bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr, @@ -2512,9 +2503,6 @@ LValue MicrosoftCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); - unsigned AS = cast<llvm::PointerType>(V->getType())->getAddressSpace(); - V = CGF.Builder.CreateBitCast(V, RealVarTy->getPointerTo(AS)); - CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Address Addr(V, RealVarTy, Alignment); @@ -2998,7 +2986,6 @@ MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) { NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD); // The rest of the fields are common with data member pointers. - FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy); return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/true, RD, NonVirtualBaseAdjustment, VBTableIndex); } @@ -3227,9 +3214,6 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress( CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr, const MemberPointerType *MPT) { assert(MPT->isMemberDataPointer()); - unsigned AS = Base.getAddressSpace(); - llvm::Type *PType = - CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS); CGBuilderTy &Builder = CGF.Builder; const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); @@ -3257,16 +3241,9 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress( Addr = Base.getPointer(); } - // Cast to char*. - Addr = Builder.CreateBitCast(Addr, CGF.Int8Ty->getPointerTo(AS)); - // Apply the offset, which we assume is non-null. - Addr = Builder.CreateInBoundsGEP(CGF.Int8Ty, Addr, FieldOffset, + return Builder.CreateInBoundsGEP(CGF.Int8Ty, Addr, FieldOffset, "memptr.offset"); - - // Cast the address to the appropriate pointer type, adopting the address - // space of the base pointer. - return Builder.CreateBitCast(Addr, PType); } llvm::Value * @@ -3523,8 +3500,6 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer( const FunctionProtoType *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>(); const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( - CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); CGBuilderTy &Builder = CGF.Builder; MSInheritanceModel Inheritance = RD->getMSInheritanceModel(); @@ -3554,16 +3529,10 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer( ThisPtrForCall = This.getPointer(); } - if (NonVirtualBaseAdjustment) { - // Apply the adjustment and cast back to the original struct type. - llvm::Value *Ptr = Builder.CreateBitCast(ThisPtrForCall, CGF.Int8PtrTy); - Ptr = Builder.CreateInBoundsGEP(CGF.Int8Ty, Ptr, NonVirtualBaseAdjustment); - ThisPtrForCall = Builder.CreateBitCast(Ptr, ThisPtrForCall->getType(), - "this.adjusted"); - } + if (NonVirtualBaseAdjustment) + ThisPtrForCall = Builder.CreateInBoundsGEP(CGF.Int8Ty, ThisPtrForCall, + NonVirtualBaseAdjustment); - FunctionPointer = - Builder.CreateBitCast(FunctionPointer, FTy->getPointerTo()); CGCallee Callee(FPT, FunctionPointer); return Callee; } @@ -3674,14 +3643,17 @@ uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent, static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) { switch (Ty->getLinkage()) { - case NoLinkage: - case InternalLinkage: - case UniqueExternalLinkage: + case Linkage::Invalid: + llvm_unreachable("Linkage hasn't been computed!"); + + case Linkage::None: + case Linkage::Internal: + case Linkage::UniqueExternal: return llvm::GlobalValue::InternalLinkage; - case VisibleNoLinkage: - case ModuleLinkage: - case ExternalLinkage: + case Linkage::VisibleNone: + case Linkage::Module: + case Linkage::External: return llvm::GlobalValue::LinkOnceODRLinkage; } llvm_unreachable("Invalid linkage!"); @@ -4008,7 +3980,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { // Check to see if we've already declared this TypeDescriptor. if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName)) - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return GV; // Note for the future: If we would ever like to do deferred emission of // RTTI, check if emitting vtables opportunistically need any adjustment. @@ -4034,7 +4006,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { MangledName); if (Var->isWeakForLinker()) Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName())); - return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy); + return Var; } /// Gets or a creates a Microsoft CompleteObjectLocator. @@ -4118,7 +4090,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, &getContext().Idents.get("src"), getContext().getLValueReferenceType(RecordTy, /*SpelledAsLValue=*/true), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); if (IsCopy) FunctionArgs.push_back(&SrcParam); @@ -4128,7 +4100,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr, SourceLocation(), &getContext().Idents.get("is_most_derived"), - getContext().IntTy, ImplicitParamDecl::Other); + getContext().IntTy, ImplicitParamKind::Other); // Only add the parameter to the list if the class has virtual bases. if (RD->getNumVBases() > 0) FunctionArgs.push_back(&IsMostDerived); @@ -4227,8 +4199,6 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure); else CopyCtor = CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete)); - - CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy); } else { CopyCtor = llvm::Constant::getNullValue(CGM.Int8PtrTy); } @@ -4438,14 +4408,11 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) if (CXXDestructorDecl *DtorD = RD->getDestructor()) if (!DtorD->isTrivial()) - CleanupFn = llvm::ConstantExpr::getBitCast( - CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)), - CGM.Int8PtrTy); + CleanupFn = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)); // This is unused as far as we can tell, initialize it to null. llvm::Constant *ForwardCompat = getImageRelativeConstant(llvm::Constant::getNullValue(CGM.Int8PtrTy)); - llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant( - llvm::ConstantExpr::getBitCast(CTA, CGM.Int8PtrTy)); + llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(CTA); llvm::StructType *TIType = getThrowInfoType(); llvm::Constant *Fields[] = { llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags @@ -4479,7 +4446,7 @@ void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { // Call into the runtime to throw the exception. llvm::Value *Args[] = { - CGF.Builder.CreateBitCast(AI.getPointer(), CGM.Int8PtrTy), + AI.getPointer(), TI }; CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args); diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 114a9c1e2eac..ee543e40b460 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -160,6 +160,9 @@ public: LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(llvm::codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); + CodeGenOpts.DwarfVersion = CI.getCodeGenOpts().DwarfVersion; + CodeGenOpts.DebugCompilationDir = + CI.getInvocation().getCodeGenOpts().DebugCompilationDir; CodeGenOpts.DebugPrefixMap = CI.getInvocation().getCodeGenOpts().DebugPrefixMap; CodeGenOpts.DebugStrictDwarf = CI.getCodeGenOpts().DebugStrictDwarf; diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index 055dd3704386..16fbf52a517d 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -409,9 +409,10 @@ void SwiftAggLowering::splitVectorEntry(unsigned index) { CharUnits begin = Entries[index].Begin; for (unsigned i = 0; i != numElts; ++i) { - Entries[index].Type = eltTy; - Entries[index].Begin = begin; - Entries[index].End = begin + eltSize; + unsigned idx = index + i; + Entries[idx].Type = eltTy; + Entries[idx].Begin = begin; + Entries[idx].End = begin + eltSize; begin += eltSize; } } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 3d79f92137ab..60224d458f6a 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -137,7 +137,7 @@ llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( if (auto *C = dyn_cast<llvm::Constant>(Src)) return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); // Try to preserve the source's name to make IR more readable. - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + return CGF.Builder.CreateAddrSpaceCast( Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : ""); } diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 14ed5e5d2d2c..0c0781a2d5ab 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -81,6 +81,9 @@ public: CodeGen::CodeGenModule &CGM, const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {} + /// Provides a convenient hook to handle extra target-specific globals. + virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {} + /// Any further codegen related checks that need to be done on a function call /// in a target specific manner. virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 561110ff8c0d..be5145daa00b 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -185,7 +185,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { assert(Ty->isVectorType() && "expected vector type!"); const auto *VT = Ty->castAs<VectorType>(); - if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { + if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); assert(VT->getElementType()->castAs<BuiltinType>()->getKind() == BuiltinType::UChar && @@ -194,7 +194,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { llvm::Type::getInt1Ty(getVMContext()), 16)); } - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) { + if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); const auto *BT = VT->getElementType()->castAs<BuiltinType>(); @@ -323,12 +323,11 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); - // For alignment adjusted HFAs, cap the argument alignment to 16, leave it - // default otherwise. + // For HFAs/HVAs, cap the argument alignment to 16, otherwise + // set it to 8 according to the AAPCS64 document. unsigned Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); - unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); - Align = (Align > BaseAlign && Align >= 16) ? 16 : 0; + Align = (Align >= 16) ? 16 : 8; return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0, nullptr, true, Align); @@ -369,8 +368,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, return ABIArgInfo::getIgnore(); if (const auto *VT = RetTy->getAs<VectorType>()) { - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || - VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) + if (VT->getVectorKind() == VectorKind::SveFixedLengthData || + VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) return coerceIllegalVector(RetTy); } @@ -444,8 +443,8 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { // Check whether VT is a fixed-length SVE vector. These types are // represented as scalable vectors in function args/return and must be // coerced from fixed vectors. - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || - VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) + if (VT->getVectorKind() == VectorKind::SveFixedLengthData || + VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) return true; // Check whether VT is legal. diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 796a2be81a09..03ac6b78598f 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/Basic/TargetOptions.h" using namespace clang; using namespace clang::CodeGen; @@ -248,6 +249,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } } + + // Use pass-by-reference in stead of pass-by-value for struct arguments in + // function ABI. + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_private)); } // Otherwise just do the default thing. @@ -268,6 +275,8 @@ public: void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, CodeGenModule &CGM) const; + void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -299,12 +308,13 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) return false; - return D->hasAttr<OpenCLKernelAttr>() || - (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || - (isa<VarDecl>(D) && - (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || - cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || - cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())); + return !D->hasAttr<OMPDeclareTargetDeclAttr>() && + (D->hasAttr<OpenCLKernelAttr>() || + (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || + (isa<VarDecl>(D) && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()))); } void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( @@ -317,26 +327,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { - unsigned Min = 0; - unsigned Max = 0; - if (FlatWGS) { - Min = FlatWGS->getMin() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - Max = FlatWGS->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - } - if (ReqdWGS && Min == 0 && Max == 0) - Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); - - if (Min != 0) { - assert(Min <= Max && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); + M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value for HIP. @@ -349,24 +340,8 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } - if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { - unsigned Min = - Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); - unsigned Max = Attr->getMax() ? Attr->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue() - : 0; - - if (Min != 0) { - assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min); - if (Max != 0) - AttrVal = AttrVal + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-waves-per-eu", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); - } + if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) + M.handleAMDGPUWavesPerEUAttr(F, Attr); if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { unsigned NumSGPR = Attr->getNumSGPR(); @@ -383,6 +358,40 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( } } +/// Emits control constants used to change per-architecture behaviour in the +/// AMDGPU ROCm device libraries. +void AMDGPUTargetCodeGenInfo::emitTargetGlobals( + CodeGen::CodeGenModule &CGM) const { + StringRef Name = "__oclc_ABI_version"; + llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name); + if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) + return; + + if (CGM.getTarget().getTargetOpts().CodeObjectVersion == + llvm::CodeObjectVersionKind::COV_None) + return; + + auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); + llvm::Constant *COV = llvm::ConstantInt::get( + Type, CGM.getTarget().getTargetOpts().CodeObjectVersion); + + // It needs to be constant weak_odr without externally_initialized so that + // the load instuction can be eliminated by the IPSCCP. + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, + nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); + + // Replace any external references to this variable with the new global. + if (OriginalGV) { + OriginalGV->replaceAllUsesWith(GV); + GV->takeName(OriginalGV); + OriginalGV->eraseFromParent(); + } +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { @@ -401,13 +410,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (FD) setFunctionDeclAttributes(FD, F, M); - const bool IsHIPKernel = - M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>(); - - // TODO: This should be moved to language specific attributes instead. - if (IsHIPKernel) - F->addFnAttr("uniform-work-group-size", "true"); - if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); @@ -449,12 +451,11 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, return DefaultGlobalAS; LangAS AddrSpace = D->getType().getAddressSpace(); - assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); if (AddrSpace != LangAS::Default) return AddrSpace; // Only promote to address space 4 if VarDecl has constant initialization. - if (CGM.isTypeConstant(D->getType(), false, false) && + if (D->getType().isConstantStorage(CGM.getContext(), false, false) && D->hasConstantInitialization()) { if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) return *ConstAS; @@ -470,20 +471,25 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, std::string Name; switch (Scope) { case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: Name = "singlethread"; break; case SyncScope::HIPWavefront: case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: Name = "wavefront"; break; case SyncScope::HIPWorkgroup: case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: Name = "workgroup"; break; case SyncScope::HIPAgent: case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: Name = "agent"; break; + case SyncScope::SystemScope: case SyncScope::HIPSystem: case SyncScope::OpenCLAllSVMDevices: Name = ""; @@ -595,6 +601,53 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( return F; } +void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( + llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, + const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal, + int32_t *MaxThreadsVal) { + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); + Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); + } + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + if (MinThreadsVal) + *MinThreadsVal = Min; + if (MaxThreadsVal) + *MaxThreadsVal = Max; + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + if (F) + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); +} + +void CodeGenModule::handleAMDGPUWavesPerEUAttr( + llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); + unsigned Max = + Attr->getMax() + ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() + : 0; + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); +} + std::unique_ptr<TargetCodeGenInfo> CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes()); diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 6391a8aeaa67..7b2c31139b0b 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -148,6 +148,13 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { uint64_t ArraySize = ATy->getSize().getZExtValue(); QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible to be + // passed via FARs in C++. + if (const auto *RTy = EltTy->getAs<RecordType>()) { + if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); for (uint64_t i = 0; i < ArraySize; ++i) { if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, @@ -163,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true)) - return true; const RecordDecl *RD = RTy->getDecl(); - // Unions aren't eligible unless they're empty (which is caught above). + if (isEmptyRecord(getContext(), Ty, true, true) && + (!RD->isUnion() || !isa<CXXRecordDecl>(RD))) + return true; + // Unions aren't eligible unless they're empty in C (which is caught above). if (RD->isUnion()) return false; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); @@ -222,6 +230,8 @@ bool LoongArchABIInfo::detectFARsEligibleStruct( if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off)) return false; + if (!Field1Ty) + return false; // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) return false; @@ -299,12 +309,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, CGCXXABI::RAA_DirectInMemory); } - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - uint64_t Size = getContext().getTypeSize(Ty); + // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or + // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size + // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. + if (isEmptyRecord(getContext(), Ty, true) && Size == 0) + return ABIArgInfo::getIgnore(); + // Pass floating point values via FARs if possible. if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && FRLen >= Size && FARsLeft) { @@ -312,6 +324,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getDirect(); } + // Pass 128-bit/256-bit vector values via vector registers directly. + if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && + (getTarget().hasFeature("lsx"))) || + ((getContext().getTypeSize(Ty) == 256) && + getTarget().hasFeature("lasx")))) + return ABIArgInfo::getDirect(); + // Complex types for the *f or *d ABI must be passed directly rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 1ca0192333a0..d0dc7c258a03 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -71,12 +71,12 @@ public: return true; } -private: // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, int Operand); +private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) { llvm::Value *Handle = nullptr; @@ -256,24 +256,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // Create !{<func-ref>, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } - if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) { - // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node - llvm::APSInt MaxThreads(32); - MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); - if (MaxThreads > 0) - addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); - - // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was - // not specified in __launch_bounds__ or if the user specified a 0 value, - // we don't have to add a PTX directive. - if (Attr->getMinBlocks()) { - llvm::APSInt MinBlocks(32); - MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); - if (MinBlocks > 0) - // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node - addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); - } - } + if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) + M.handleCUDALaunchBoundsAttr(F, Attr); } // Attach kernel metadata directly if compiling for NVPTX. @@ -303,6 +287,55 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { } } +void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, + const CUDALaunchBoundsAttr *Attr, + int32_t *MaxThreadsVal, + int32_t *MinBlocksVal, + int32_t *MaxClusterRankVal) { + // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node + llvm::APSInt MaxThreads(32); + MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); + if (MaxThreads > 0) { + if (MaxThreadsVal) + *MaxThreadsVal = MaxThreads.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", + MaxThreads.getExtValue()); + } + } + + // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it + // was not specified in __launch_bounds__ or if the user specified a 0 value, + // we don't have to add a PTX directive. + if (Attr->getMinBlocks()) { + llvm::APSInt MinBlocks(32); + MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); + if (MinBlocks > 0) { + if (MinBlocksVal) + *MinBlocksVal = MinBlocks.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", + MinBlocks.getExtValue()); + } + } + } + if (Attr->getMaxBlocks()) { + llvm::APSInt MaxBlocks(32); + MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); + if (MaxBlocks > 0) { + if (MaxClusterRankVal) + *MaxClusterRankVal = MaxBlocks.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", + MaxBlocks.getExtValue()); + } + } + } +} + std::unique_ptr<TargetCodeGenInfo> CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 9cdd2aa07791..40dddde508c1 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -431,7 +431,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy; if (isIndirect) - DirectTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + DirectTy = CGF.UnqualPtrTy; // Case 1: consume registers. Address RegAddr = Address::invalid(); @@ -620,6 +620,9 @@ public: bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; + void emitTargetMetadata(CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> + &MangledDeclNames) const override; }; class PPC64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -940,6 +943,24 @@ PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( /*IsAIX*/ false); } +void PPC64_SVR4_TargetCodeGenInfo::emitTargetMetadata( + CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const { + if (CGM.getTypes().isLongDoubleReferenced()) { + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + const auto *flt = &CGM.getTarget().getLongDoubleFormat(); + if (flt == &llvm::APFloat::PPCDoubleDouble()) + CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi", + llvm::MDString::get(Ctx, "doubledouble")); + else if (flt == &llvm::APFloat::IEEEquad()) + CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi", + llvm::MDString::get(Ctx, "ieeequad")); + else if (flt == &llvm::APFloat::IEEEdouble()) + CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi", + llvm::MDString::get(Ctx, "ieeedouble")); + } +} + bool PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index b6d8ae462675..1e1d249b37ac 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -8,7 +8,6 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" -#include "llvm/TargetParser/RISCVTargetParser.h" using namespace clang; using namespace clang::CodeGen; @@ -152,6 +151,13 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { uint64_t ArraySize = ATy->getSize().getZExtValue(); QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible for + // the FP calling convention in C++. + if (const auto *RTy = EltTy->getAs<RecordType>()) { + if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); for (uint64_t i = 0; i < ArraySize; ++i) { bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, @@ -168,7 +174,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true)) + if (isEmptyRecord(getContext(), Ty, true, true)) return true; const RecordDecl *RD = RTy->getDecl(); // Unions aren't eligible unless they're empty (which is caught above). @@ -238,6 +244,8 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, NeededArgFPRs = 0; bool IsCandidate = detectFPCCEligibleStructHelper( Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Field1Ty) + return false; // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) return false; @@ -310,16 +318,20 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { assert(Ty->isVectorType() && "expected vector type!"); const auto *VT = Ty->castAs<VectorType>(); - assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector && + assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData && "Unexpected vector kind"); assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - const auto *BT = VT->getElementType()->castAs<BuiltinType>(); - unsigned EltSize = getContext().getTypeSize(BT); + auto VScale = + getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts()); + // The MinNumElts is simplified from equation: + // NumElts / VScale = + // (EltSize * NumElts / (VScale * RVVBitsPerBlock)) + // * (RVVBitsPerBlock / EltSize) llvm::ScalableVectorType *ResType = - llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), - llvm::RISCV::RVVBitsPerBlock / EltSize); + llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), + VT->getNumElements() / VScale->first); return ABIArgInfo::getDirect(ResType); } @@ -419,7 +431,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, } if (const VectorType *VT = Ty->getAs<VectorType>()) - if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector) + if (VT->getVectorKind() == VectorKind::RVVFixedLengthData) return coerceVLSVector(Ty); // Aggregates which are <= 2*XLen will be passed in registers if possible, diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp index f5cafaa97315..a337a52a94ec 100644 --- a/clang/lib/CodeGen/Targets/Sparc.cpp +++ b/clang/lib/CodeGen/Targets/Sparc.cpp @@ -286,7 +286,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGBuilderTy &Builder = CGF.Builder; Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"), getVAListElementType(CGF), SlotSize); - llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); + llvm::Type *ArgPtrTy = CGF.UnqualPtrTy; auto TypeInfo = getContext().getTypeInfoInChars(Ty); diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 31679d899a44..2af240350438 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -87,12 +87,15 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { /// Similar to llvm::CCState, but for Clang. struct CCState { CCState(CGFunctionInfo &FI) - : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} + : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), + Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} llvm::SmallBitVector IsPreassigned; unsigned CC = CallingConv::CC_C; unsigned FreeRegs = 0; unsigned FreeSSERegs = 0; + RequiredArgs Required; + bool IsDelegateCall = false; }; /// X86_32ABIInfo - The X86-32 ABI information. @@ -140,7 +143,8 @@ class X86_32ABIInfo : public ABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; - ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, + unsigned ArgIndex) const; /// Updates the number of available free registers, returns /// true if any registers were allocated. @@ -737,8 +741,8 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c } } -ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, - CCState &State) const { +ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, + unsigned ArgIndex) const { // FIXME: Set alignment on indirect arguments. bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; @@ -753,6 +757,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, false, State); + } else if (State.IsDelegateCall) { + // Avoid having different alignments on delegate call args by always + // setting the alignment to 4, which is what we do for inallocas. + ABIArgInfo Res = getIndirectResult(Ty, false, State); + Res.setIndirectAlign(CharUnits::fromQuantity(4)); + return Res; } else if (RAA == CGCXXABI::RAA_DirectInMemory) { // The field index doesn't matter, we'll fix it up later. return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); @@ -805,11 +815,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; - // Pass over-aligned aggregates on Windows indirectly. This behavior was - // added in MSVC 2015. Use the required alignment from the record layout, - // since that may be less than the regular type alignment, and types with - // required alignment of less than 4 bytes are not passed indirectly. - if (IsWin32StructABI) { + // Pass over-aligned aggregates to non-variadic functions on Windows + // indirectly. This behavior was added in MSVC 2015. Use the required + // alignment from the record layout, since that may be less than the + // regular type alignment, and types with required alignment of less than 4 + // bytes are not passed indirectly. + if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) { unsigned AlignInBits = 0; if (RT) { const ASTRecordLayout &Layout = @@ -935,12 +946,13 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool UsedInAlloca = false; MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); - for (int I = 0, E = Args.size(); I < E; ++I) { + for (unsigned I = 0, E = Args.size(); I < E; ++I) { // Skip arguments that have already been assigned. if (State.IsPreassigned.test(I)) continue; - Args[I].info = classifyArgumentType(Args[I].type, State); + Args[I].info = + classifyArgumentType(Args[I].type, State, I); UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } @@ -1500,6 +1512,24 @@ static bool checkAVXParamFeature(DiagnosticsEngine &Diag, return false; } +static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag, + SourceLocation CallLoc, + const llvm::StringMap<bool> &CallerMap, + const llvm::StringMap<bool> &CalleeMap, + QualType Ty, bool IsArgument) { + bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512"); + bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512"); + + // Forbid 512-bit or larger vector pass or return when we disabled ZMM + // instructions. + if (Caller256 || Callee256) + return Diag.Report(CallLoc, diag::err_avx_calling_convention) + << IsArgument << Ty << "evex512"; + + return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + "avx512f", IsArgument); +} + static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, SourceLocation CallLoc, const llvm::StringMap<bool> &CallerMap, @@ -1507,8 +1537,8 @@ static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, bool IsArgument) { uint64_t Size = Ctx.getTypeSize(Ty); if (Size > 256) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, - "avx512f", IsArgument); + return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + IsArgument); if (Size > 128) return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", @@ -2948,9 +2978,7 @@ static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); - llvm::Value *Res = - CGF.Builder.CreateBitCast(overflow_arg_area, - llvm::PointerType::getUnqual(LTy)); + llvm::Value *Res = overflow_arg_area; // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: // l->overflow_arg_area + sizeof(type). @@ -3053,8 +3081,6 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Type *TyHi = ST->getElementType(1); assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && "Unexpected ABI info for mixed regs"); - llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); - llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); llvm::Value *GPAddr = CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); llvm::Value *FPAddr = @@ -3065,13 +3091,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Copy the first element. // FIXME: Our choice of alignment here and below is probably pessimistic. llvm::Value *V = CGF.Builder.CreateAlignedLoad( - TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), + TyLo, RegLoAddr, CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); // Copy the second element. V = CGF.Builder.CreateAlignedLoad( - TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), + TyHi, RegHiAddr, CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); diff --git a/clang/lib/CodeGen/Targets/XCore.cpp b/clang/lib/CodeGen/Targets/XCore.cpp index 8be240c018d0..aeb48f851e16 100644 --- a/clang/lib/CodeGen/Targets/XCore.cpp +++ b/clang/lib/CodeGen/Targets/XCore.cpp @@ -543,7 +543,7 @@ static bool appendArrayType(SmallStringEnc &Enc, QualType QT, const ArrayType *AT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, StringRef NoSizeEnc) { - if (AT->getSizeModifier() != ArrayType::Normal) + if (AT->getSizeModifier() != ArraySizeModifier::Normal) return false; Enc += "a("; if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) |
